diff --git a/devices/rtx/device/gpu/evalShading.h b/devices/rtx/device/gpu/evalShading.h
index 2c8558a01..367eb39f2 100644
--- a/devices/rtx/device/gpu/evalShading.h
+++ b/devices/rtx/device/gpu/evalShading.h
@@ -119,7 +119,7 @@ VISRTX_DEVICE NextRay materialNextRay(
 {
   if (shadingState.callableBaseIndex
       == ~DeviceObjectIndex(0)) // No next ray by default
-    return NextRay{vec3(0.0f), vec3(0.0f)};
+    return NextRay{vec3(0.0f), vec3(0.0f), 0.0f};
 
   return optixDirectCall<NextRay>(shadingState.callableBaseIndex
           + int(SurfaceShaderEntryPoints::EvaluateNextRay),
@@ -128,6 +128,24 @@ VISRTX_DEVICE NextRay materialNextRay(
       &rs);
 }
 
+// Solid-angle pdf the material's BSDF sampler would assign to direction `wi`
+// given outgoing direction `wo` (both world space), for environment MIS. 0 for
+// no material and for materials whose sampler cannot produce `wi` (e.g. Matte,
+// which has no continuation ray).
+VISRTX_DEVICE float materialEvalPdf(const MaterialShadingState &shadingState,
+    const vec3 &wo,
+    const vec3 &wi)
+{
+  if (shadingState.callableBaseIndex == ~DeviceObjectIndex(0))
+    return 0.0f;
+
+  return optixDirectCall<float>(shadingState.callableBaseIndex
+          + int(SurfaceShaderEntryPoints::EvaluatePdf),
+      &shadingState.data,
+      &wo,
+      &wi);
+}
+
 VISRTX_DEVICE vec3 materialShadeSurface(
     const MaterialShadingState &shadingState,
     const SurfaceHit &hit,
diff --git a/devices/rtx/device/gpu/gpu_util.h b/devices/rtx/device/gpu/gpu_util.h
index a5e91c800..4fb230ef7 100644
--- a/devices/rtx/device/gpu/gpu_util.h
+++ b/devices/rtx/device/gpu/gpu_util.h
@@ -403,7 +403,10 @@ VISRTX_DEVICE bool getBackgroundLight(
       // For orthonormal matrices, inverse = transpose
       const mat3 xfmInv = glm::transpose(mat3(hdriLight.xfm));
       const vec3 localRayDir = xfmInv * rayDir;
-      outRadiance += sampleHDRI(light, localRayDir);
+      // sampleHDRI applies hdri.scale; tint by light.color to match the NEE
+      // radiance in sampleHDRILight (raw * hdri.scale * color), so env MIS
+      // deposits identical radiance on the NEE and BSDF-escape sides.
+      outRadiance += sampleHDRI(light, localRayDir) * light.color;
       hasVisibleHDRI = true;
     }
   }
@@ -411,6 +414,30 @@ VISRTX_DEVICE bool getBackgroundLight(
   return hasVisibleHDRI;
 }
 
+// Solid-angle sampling pdf of the visible HDRI environment(s) at `rayDir`, used
+// as the light-sampling density on the escape side of environment MIS. It must
+// match the NEE importance pdf in sampleHDRILight exactly: raw-texel luminance
+// (NO scale/color) times pdfWeight, with the same instance/HDRI transform chain
+// as getBackgroundLight. Summed over visible HDRIs — exact for the single-HDRI
+// case, a mixture-pdf approximation when several are visible.
+VISRTX_DEVICE float envPdf(const FrameGPUData &fd, const vec3 &rayDir)
+{
+  float pdf = 0.0f;
+  for (size_t i = 0; i < fd.world.numHdriLightInstances; i++) {
+    const auto &hdriLight = fd.world.hdriLightInstances[i];
+    const auto &light = fd.registry.lights[hdriLight.lightIndex];
+    if (!light.hdri.visible)
+      continue;
+    const vec3 localRayDir = glm::transpose(mat3(hdriLight.xfm)) * rayDir;
+    const vec3 d = light.hdri.xfm * localRayDir;
+    const vec2 thetaPhi = sphericalCoordsFromDirection(d);
+    const vec2 uv = vec2(thetaPhi.y / kTwoPi, thetaPhi.x / kPi);
+    pdf += dot(sampleHDRI(light, uv), vec3(0.2126f, 0.7152f, 0.0722f))
+        * light.hdri.pdfWeight;
+  }
+  return pdf;
+}
+
 VISRTX_DEVICE uint32_t computeGeometryPrimId(const SurfaceHit &hit)
 {
   if (!hit.foundHit)
diff --git a/devices/rtx/device/gpu/sbt.h b/devices/rtx/device/gpu/sbt.h
index 1f0c92645..57802a94c 100644
--- a/devices/rtx/device/gpu/sbt.h
+++ b/devices/rtx/device/gpu/sbt.h
@@ -47,6 +47,7 @@ enum class SurfaceShaderEntryPoints
   EvaluateTransmission,
   EvaluateNormal,
   Shade,
+  EvaluatePdf,
   Count
 };
 
diff --git a/devices/rtx/device/gpu/shadingState.h b/devices/rtx/device/gpu/shadingState.h
index d299f6ccc..03f65be22 100644
--- a/devices/rtx/device/gpu/shadingState.h
+++ b/devices/rtx/device/gpu/shadingState.h
@@ -61,6 +61,12 @@ struct NextRay
 {
   vec3 direction;
   vec3 contributionWeight;
+  // Solid-angle pdf of `direction`, used for balance-heuristic environment MIS.
+  // +inf marks a lobe whose env contribution the escape estimator owns outright
+  // (primary ray, transmission); 0 marks a dead ray. Must equal the value the
+  // material's EvaluatePdf callable returns for the same direction on the
+  // reflection side, so NEE-side and escape-side MIS weights partition to 1.
+  float pdf{INFINITY};
   uint32_t flags{NEXT_RAY_NONE};
 };
 
diff --git a/devices/rtx/device/material/shaders/MDLShader_ptx.cu b/devices/rtx/device/material/shaders/MDLShader_ptx.cu
index 0380e4bbb..376187a33 100644
--- a/devices/rtx/device/material/shaders/MDLShader_ptx.cu
+++ b/devices/rtx/device/material/shaders/MDLShader_ptx.cu
@@ -220,10 +220,23 @@ NextRay __direct_callable__nextRay(
       ? NEXT_RAY_CONTINUES_THROUGH_SURFACE
       : NEXT_RAY_NONE;
 
+  // Env-MIS solid-angle pdf for the sampled direction, matching
+  // __direct_callable__evaluatePdf so the balance heuristic partitions to 1.
+  // A specular (delta) lobe can't be evaluated by NEE, and a through-surface
+  // continuation is past the NEE hemisphere gate — both report +inf so the BSDF
+  // escape owns the environment (w_bsdf = 1). Glossy/diffuse reflections report
+  // the finite sampling pdf and are MIS-combined with NEE.
+  const bool isSpecular =
+      (sample_data.event_type & mi::neuraylib::BSDF_EVENT_SPECULAR) != 0;
+  const float pdf =
+      (isSpecular || (flags & NEXT_RAY_CONTINUES_THROUGH_SURFACE))
+      ? INFINITY
+      : sample_data.pdf;
   return NextRay{direction,
       vec3(sample_data.bsdf_over_pdf.x,
           sample_data.bsdf_over_pdf.y,
           sample_data.bsdf_over_pdf.z),
+      pdf,
       flags};
 }
 
@@ -275,3 +288,31 @@ vec3 __direct_callable__evaluateNormal(const MDLShadingState *shadingState)
 {
   return make_vec3(shadingState->state.normal);
 }
+
+// Env-MIS BSDF density at `wi` given outgoing `wo` (both world space): the
+// balance-heuristic light-side weight. mdlBsdf_evaluate fills `eval_data.pdf`
+// with the solid-angle sampling pdf, matching NextRay.pdf in nextRay (MDL's
+// evaluate-pdf and sample-pdf are the same density). A pure specular lobe
+// evaluates to pdf 0 (NEE can't reach a delta) — consistent with the escape
+// owning it via +inf. Mirrors shadeSurface's ior/k1/k2 setup exactly.
+VISRTX_CALLABLE float __direct_callable__evaluatePdf(
+    const MDLShadingState *shadingState, const vec3 *wo, const vec3 *wi)
+{
+  BsdfEvaluateData eval_data = {};
+  if (shadingState->isFrontFace) {
+    eval_data.ior1 = make_float3(1.0f, 1.0f, 1.0f);
+    eval_data.ior2.x = MI_NEURAYLIB_BSDF_USE_MATERIAL_IOR;
+  } else {
+    eval_data.ior1.x = MI_NEURAYLIB_BSDF_USE_MATERIAL_IOR;
+    eval_data.ior2 = make_float3(1.0f, 1.0f, 1.0f);
+  }
+  eval_data.k1 = make_float3(normalize(*wo));
+  eval_data.k2 = make_float3(normalize(*wi));
+
+  mdlBsdf_evaluate(&eval_data,
+      &shadingState->state,
+      &shadingState->resData,
+      shadingState->argBlock);
+
+  return eval_data.pdf;
+}
diff --git a/devices/rtx/device/material/shaders/MatteShader_ptx.cu b/devices/rtx/device/material/shaders/MatteShader_ptx.cu
index 6e9fee9ee..bd2876a0d 100644
--- a/devices/rtx/device/material/shaders/MatteShader_ptx.cu
+++ b/devices/rtx/device/material/shaders/MatteShader_ptx.cu
@@ -59,7 +59,7 @@ VISRTX_CALLABLE void __direct_callable__init(MatteShadingState *shadingState,
 VISRTX_CALLABLE NextRay __direct_callable__nextRay(
     const MatteShadingState *, const Ray *, RandState *)
 {
-  return NextRay{vec3(0.0f), vec3(0.0f)};
+  return NextRay{vec3(0.0f), vec3(0.0f), 0.0f};
 }
 
 VISRTX_CALLABLE
@@ -105,3 +105,12 @@ VISRTX_CALLABLE vec3 __direct_callable__shadeSurface(
   return shadingState->baseColor * kInvPi * NdotL * lightSample->radiance
       / lightSample->pdf;
 }
+
+// Matte has no continuation ray (nextRay returns a dead ray), so its BSDF can
+// never produce the environment direction: report pdf 0 so env MIS leaves NEE
+// owning the environment (w_nee = 1), matching the pre-MIS behavior.
+VISRTX_CALLABLE float __direct_callable__evaluatePdf(
+    const MatteShadingState *, const vec3 *, const vec3 *)
+{
+  return 0.0f;
+}
diff --git a/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu b/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu
index 8ca80df8a..8176a8d91 100644
--- a/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu
+++ b/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu
@@ -482,10 +482,102 @@ VISRTX_CALLABLE vec3 __direct_callable__shadeSurface(
   return base * NdotL * lightSample->radiance / lightSample->pdf;
 }
 
+//-----------------------------------------------------------------------------
+// BSDF sampling pdf (solid angle), for environment MIS. This is the closed-form
+// density of __direct_callable__nextRay's sampling strategy, evaluated for an
+// arbitrary reflection-side direction. The sampler fills NextRay.pdf by calling
+// this same function, so the NEE-side weight (materialEvalPdf) and the
+// escape-side weight (NextRay.pdf) are identical functions and the balance-
+// heuristic weights partition to 1 exactly (unbiased).
+//
+// Transmission (through-surface) directions return 0: NEE's shadeSurface
+// early-outs at NdotL<=0, so they are never combined — the escape estimator
+// owns them outright (NextRay.pdf = +inf at sample time).
+//-----------------------------------------------------------------------------
+
+VISRTX_DEVICE float pbrBsdfPdf(
+    const PhysicallyBasedShadingState *state, const vec3 &V, const vec3 &L)
+{
+  const vec3 N = state->normal;
+  const float NdotV = dot(N, V);
+  const float NdotL = dot(N, L);
+  if (!(NdotV > 0.0f) || !(NdotL > 0.0f))
+    return 0.0f;
+
+  const vec3 F0 = computeF0(state);
+  const vec3 F90 = computeF90(state);
+  const vec3 Fv = evalFresnelWithIridescence(state, F0, F90, NdotV);
+  const vec3 transmissionFilter = computeTransmissionFilter(state);
+
+  // V-only base split (diffuse vs specular) — mirrors the sampler exactly.
+  const float specSelW = fmaxf(luminance(Fv), 0.0f)
+      + fmaxf(luminance(glm::max(vec3(1.0f) - Fv, vec3(0.0f)) * transmissionFilter),
+          0.0f);
+  const float diffSelW = fmaxf(luminance(glm::max(vec3(1.0f) - Fv, vec3(0.0f))
+                            * state->baseColor * (1.0f - state->metallic)
+                            * (1.0f - state->transmission) * state->occlusion),
+      0.0f);
+  const float baseSel = specSelW + diffSelW;
+
+  float pdf = 0.0f;
+  if (baseSel > 0.0f) {
+    const float pSpec = specSelW / baseSel;
+    const float pDiff = diffSelW / baseSel;
+
+    // Diffuse lobe: cosine-weighted around N.
+    pdf += pDiff * NdotL * kInvPi;
+
+    // Specular reflection lobe: VNDF reflection density × reflect-given-spec.
+    const float alpha = fmaxf(pow2(state->roughness), 1e-4f);
+    const float alpha2 = alpha * alpha;
+    const vec3 H = normalize(V + L);
+    const float NdotH = fmaxf(dot(N, H), 0.0f);
+    const float VdotH = fmaxf(dot(V, H), 0.0f);
+    const vec3 Fh = evalFresnelWithIridescence(state, F0, F90, VdotH);
+    const vec3 Ltrans = glm::refract(-V, H, state->eta);
+    const bool tir = luminance(transmissionFilter) > 0.0f
+        && (glm::length(Ltrans) < 1e-6f || dot(Ltrans, N) >= 0.0f);
+    const float reflW = tir ? 1.0f : fmaxf(luminance(Fh), 0.0f);
+    const float transW = tir ? 0.0f
+                             : fmaxf(luminance(glm::max(vec3(1.0f) - Fh, vec3(0.0f))
+                                       * transmissionFilter),
+                                   0.0f);
+    const float reflectGivenSpec =
+        (reflW + transW) > 0.0f ? reflW / (reflW + transW) : 1.0f;
+    const float pdfReflVndf =
+        ggxD(NdotH, alpha2) * smithG1GGX(NdotV, alpha2) / (4.0f * NdotV);
+    pdf += pSpec * reflectGivenSpec * pdfReflVndf;
+  }
+
+  // Clearcoat is a top-level pick with probability ccProb; the base mixture
+  // above is reached with the complementary (1 - ccProb).
+  const vec3 Nc = state->clearcoatNormal;
+  const float NcDotV = fmaxf(dot(Nc, V), 0.0f);
+  const float FcV = CLEARCOAT_F0 + (1.0f - CLEARCOAT_F0) * pow5(1.0f - NcDotV);
+  const float ccProb = glm::clamp(state->clearcoat * FcV, 0.0f, 1.0f);
+  pdf *= (1.0f - ccProb);
+
+  if (ccProb > 0.0f && NcDotV > 0.0f && dot(Nc, L) > 0.0f) {
+    const vec3 Hc = normalize(V + L);
+    const float NcDotH = fmaxf(dot(Nc, Hc), 0.0f);
+    const float alphaC = fmaxf(pow2(state->clearcoatRoughness), 1e-4f);
+    const float alphaC2 = alphaC * alphaC;
+    pdf += ccProb * ggxD(NcDotH, alphaC2) * smithG1GGX(NcDotV, alphaC2)
+        / (4.0f * NcDotV);
+  }
+
+  return pdf;
+}
+
 //-----------------------------------------------------------------------------
 // Next-ray importance sampling: stochastic alpha, Fresnel-aware lobe pick,
 // GGX VNDF reflection/refraction, plus a clearcoat lobe sampled with
 // probability equal to its view-angle Fresnel weight. Sheen is NEE-only.
+//
+// Lobe selection is V-only at the diffuse/specular split (so the sampling
+// density is the closed form pbrBsdfPdf evaluates); the reflect/transmit split
+// within the specular lobe uses the microfacet Fresnel F(VdotH), which TIR
+// folds entirely into reflection. NextRay.pdf is filled from pbrBsdfPdf.
 //-----------------------------------------------------------------------------
 
 VISRTX_CALLABLE NextRay __direct_callable__nextRay(
@@ -508,14 +600,14 @@ VISRTX_CALLABLE NextRay __direct_callable__nextRay(
     const mat3 toWorldC = computeOrthonormalBasis(Nc);
     const vec3 VlocalC = glm::transpose(toWorldC) * V;
     if (VlocalC.z <= 0.0f)
-      return NextRay{Nc, vec3(0.0f)};
+      return NextRay{Nc, vec3(0.0f), 0.0f};
     const float alphaC = fmaxf(pow2(state->clearcoatRoughness), 1e-4f);
     const float alphaC2 = alphaC * alphaC;
     const vec3 HlocalC =
         sampleGGXVNDF(VlocalC, alphaC, pcg_uniform(rs), pcg_uniform(rs));
     const vec3 LlocalC = glm::reflect(-VlocalC, HlocalC);
     if (LlocalC.z <= 0.0f)
-      return NextRay{Nc, vec3(0.0f)};
+      return NextRay{Nc, vec3(0.0f), 0.0f};
     const float VdotHc = fmaxf(dot(VlocalC, HlocalC), 0.0f);
     const float Fc = CLEARCOAT_F0 + (1.0f - CLEARCOAT_F0) * pow5(1.0f - VdotHc);
     const float G1c = smithG1GGX(VlocalC.z, alphaC2);
@@ -524,7 +616,8 @@ VISRTX_CALLABLE NextRay __direct_callable__nextRay(
     // cancels against the matching factor in clearcoatPick.
     const vec3 weight = vec3(state->clearcoat * Fc * G2c / fmaxf(G1c, 1e-8f))
         / fmaxf(clearcoatPick, 1e-8f);
-    return NextRay{normalize(toWorldC * LlocalC), weight};
+    const vec3 Lworld = normalize(toWorldC * LlocalC);
+    return NextRay{Lworld, weight, pbrBsdfPdf(state, V, Lworld)};
   }
 
   // Exit-side clearcoat attenuation, applied to every base-path return.
@@ -543,89 +636,104 @@ VISRTX_CALLABLE NextRay __direct_callable__nextRay(
   const mat3 toLocal = glm::transpose(toWorld);
   const vec3 Vlocal = toLocal * V;
   if (Vlocal.z <= 0.0f)
-    return NextRay{N, vec3(0.0f)};
+    return NextRay{N, vec3(0.0f), 0.0f};
 
   const float alpha = fmaxf(pow2(state->roughness), 1e-4f);
   const float alpha2 = alpha * alpha;
-  const vec3 Hlocal =
-      sampleGGXVNDF(Vlocal, alpha, pcg_uniform(rs), pcg_uniform(rs));
-
   const float NdotV = Vlocal.z;
-  const float VdotH = fmaxf(dot(Vlocal, Hlocal), 0.0f);
 
-  // Fresnel at the sampled microfacet (specular/transmission split) and at
-  // NdotV (diffuse weight) — matches the convention in shadeSurface.
+  // V-only base split (diffuse vs specular), deterministic in V so the sampling
+  // density is the closed form pbrBsdfPdf evaluates. Fresnel at NdotV (Fv) sets
+  // the split; the per-lobe throughput below uses the microfacet Fresnel.
   const vec3 F0 = computeF0(state);
   const vec3 F90 = computeF90(state);
-  const vec3 F = evalFresnelWithIridescence(state, F0, F90, VdotH);
-  const vec3 Fdiff = evalFresnelWithIridescence(state, F0, F90, NdotV);
+  const vec3 Fv = evalFresnelWithIridescence(state, F0, F90, NdotV);
+  const vec3 transmissionFilter = computeTransmissionFilter(state);
+  const bool hasTransmission = luminance(transmissionFilter) > 0.0f;
+
+  const float specSelW = fmaxf(luminance(Fv), 0.0f)
+      + fmaxf(luminance(glm::max(vec3(1.0f) - Fv, vec3(0.0f)) * transmissionFilter),
+          0.0f);
+  // Lambertian throughput collapses to this energy when sampled cosine-weighted
+  // (cos / pdf cancels with 1/pi); mirrors shadeSurface's diffuseBRDF factors.
+  const vec3 diffuseEnergy = glm::max(vec3(1.0f) - Fv, vec3(0.0f))
+      * state->baseColor * (1.0f - state->metallic)
+      * (1.0f - state->transmission) * state->occlusion;
+  const float diffSelW = fmaxf(luminance(diffuseEnergy), 0.0f);
+  const float baseSel = specSelW + diffSelW;
+  if (baseSel <= 0.0f)
+    return NextRay{N, vec3(0.0f), 0.0f};
+  const float pSpec = specSelW / baseSel;
+  const float pDiff = diffSelW / baseSel;
+
+  // Diffuse lobe: sample around the shading normal so pdf=cos/pi matches the
+  // BRDF's NdotL (same axis as shadeSurface's diffuse term).
+  if (pcg_uniform(rs) >= pSpec) {
+    const vec3 wi = sampleHemisphere(*rs, N);
+    const vec3 weight =
+        diffuseEnergy * clearcoatExitAttn(wi) / fmaxf(pDiff, 1e-8f);
+    return NextRay{wi, weight, pbrBsdfPdf(state, V, wi)};
+  }
+
+  // Specular lobe: VNDF-sample the microfacet, then split reflect/transmit by
+  // the microfacet Fresnel F(VdotH). TIR (no valid refraction at this H) folds
+  // all energy into reflection, so reflect/transmit stays a clean binary split.
+  const vec3 Hlocal =
+      sampleGGXVNDF(Vlocal, alpha, pcg_uniform(rs), pcg_uniform(rs));
+  const float VdotH = fmaxf(dot(Vlocal, Hlocal), 0.0f);
+  const vec3 Fh = evalFresnelWithIridescence(state, F0, F90, VdotH);
 
   const vec3 Lrefl = glm::reflect(-Vlocal, Hlocal);
   const vec3 Ltrans = glm::refract(-Vlocal, Hlocal, state->eta);
-  const vec3 transmissionFilter = computeTransmissionFilter(state);
-  const bool hasTransmission = luminance(transmissionFilter) > 0.0f;
   const bool totalInternalReflection =
       hasTransmission && (glm::length(Ltrans) < 1e-6f || Ltrans.z >= 0.0f);
 
-  vec3 reflectEnergy = totalInternalReflection ? vec3(1.0f) : F;
-  vec3 transmitEnergy = totalInternalReflection
-      ? vec3(0.0f)
-      : glm::max(vec3(1.0f) - F, vec3(0.0f)) * transmissionFilter;
-
-  // Diffuse importance: the Lambertian throughput collapses to
-  //   (1-F) * baseColor * (1-metallic) * (1-transmission) * occlusion
-  // when sampled cosine-weighted (cos / pdf cancels with 1/pi). Mirror the
-  // factors used by shadeSurface's diffuseBRDF so the lobe split tracks the
-  // BRDF being estimated. TIR has no diffuse share (all energy is reflected).
-  const vec3 diffuseEnergy = totalInternalReflection
+  const vec3 reflectEnergy = totalInternalReflection ? vec3(1.0f) : Fh;
+  const vec3 transmitEnergy = totalInternalReflection
       ? vec3(0.0f)
-      : glm::max(vec3(1.0f) - Fdiff, vec3(0.0f)) * state->baseColor
-          * (1.0f - state->metallic) * (1.0f - state->transmission)
-          * state->occlusion;
-
-  const float reflectStrength =
-      fmaxf(luminance(glm::max(reflectEnergy, vec3(0.0f))), 0.0f);
-  const float transmitStrength =
-      fmaxf(luminance(glm::max(transmitEnergy, vec3(0.0f))), 0.0f);
-  const float diffuseStrength =
-      fmaxf(luminance(glm::max(diffuseEnergy, vec3(0.0f))), 0.0f);
-  const float combinedStrength =
-      reflectStrength + transmitStrength + diffuseStrength;
-  if (combinedStrength <= 0.0f)
-    return NextRay{N, vec3(0.0f)};
-
-  const float reflectProb = reflectStrength / combinedStrength;
-  const float transmitProb = transmitStrength / combinedStrength;
-  const float diffuseProb = diffuseStrength / combinedStrength;
-
-  const float u = pcg_uniform(rs);
-  if (u < reflectProb) {
+      : glm::max(vec3(1.0f) - Fh, vec3(0.0f)) * transmissionFilter;
+  const float reflW = fmaxf(luminance(reflectEnergy), 0.0f);
+  const float transW = fmaxf(luminance(transmitEnergy), 0.0f);
+  const float specTotal = reflW + transW;
+  const float reflectGivenSpec = specTotal > 0.0f ? reflW / specTotal : 1.0f;
+
+  if (pcg_uniform(rs) < reflectGivenSpec) {
     if (Lrefl.z <= 0.0f)
-      return NextRay{N, vec3(0.0f)};
+      return NextRay{N, vec3(0.0f), 0.0f};
     const float NdotL = Lrefl.z;
     const float G1 = smithG1GGX(NdotV, alpha2);
     const float G2 = smithG2GGX(NdotV, NdotL, alpha2);
     const vec3 Lworld = normalize(toWorld * Lrefl);
+    // VNDF: BRDF·cos/pdf = energy·G2/G1. Divide by the full reflect selection
+    // prob pSpec·reflectGivenSpec; the (1-clearcoatPick) factor cancels against
+    // the clearcoat entry attenuation, leaving only the exit attenuation.
     const vec3 weight = reflectEnergy * (G2 / fmaxf(G1, 1e-8f))
-        * clearcoatExitAttn(Lworld) / fmaxf(reflectProb, 1e-8f);
-    return NextRay{Lworld, weight};
+        * clearcoatExitAttn(Lworld) / fmaxf(pSpec * reflectGivenSpec, 1e-8f);
+    return NextRay{Lworld, weight, pbrBsdfPdf(state, V, Lworld)};
   }
 
-  if (u < reflectProb + transmitProb) {
-    const float NdotL = -Ltrans.z; // L points through the surface.
-    const float G1 = smithG1GGX(NdotV, alpha2);
-    const float G2 = smithG2GGX(NdotV, NdotL, alpha2);
-    const vec3 Lworld = normalize(toWorld * Ltrans);
-    const vec3 weight = transmitEnergy * (G2 / fmaxf(G1, 1e-8f))
-        * clearcoatExitAttn(Lworld) / fmaxf(transmitProb, 1e-8f);
-    return NextRay{Lworld, weight, NEXT_RAY_CONTINUES_THROUGH_SURFACE};
-  }
+  // Transmission lobe (through the surface).
+  const float NdotLt = -Ltrans.z;
+  const float G1t = smithG1GGX(NdotV, alpha2);
+  const float G2t = smithG2GGX(NdotV, NdotLt, alpha2);
+  const vec3 Ltworld = normalize(toWorld * Ltrans);
+  const vec3 weightT = transmitEnergy * (G2t / fmaxf(G1t, 1e-8f))
+      * clearcoatExitAttn(Ltworld) / fmaxf(pSpec * (1.0f - reflectGivenSpec), 1e-8f);
+  // Through-surface escape: NEE's shadeSurface early-outs at NdotL<=0, so the
+  // env behind glass can only be reached by this continuation. Report +inf so
+  // env MIS gives it w_bsdf=1 (the escape owns it), matching the pre-MIS flag.
+  return NextRay{
+      Ltworld, weightT, INFINITY, NEXT_RAY_CONTINUES_THROUGH_SURFACE};
+}
 
-  // Diffuse: sample around the shading normal so pdf=cos/pi matches the BRDF's
-  // NdotL (same axis as shadeSurface's diffuse term). Cos and pdf cancel,
-  // leaving only the energy term and the lobe-pick divisor.
-  const vec3 wi = sampleHemisphere(*rs, N);
-  const vec3 weight =
-      diffuseEnergy * clearcoatExitAttn(wi) / fmaxf(diffuseProb, 1e-8f);
-  return NextRay{wi, weight};
+//-----------------------------------------------------------------------------
+// BSDF sampling pdf at (wo, wi) for environment MIS — the same closed-form
+// density nextRay reports in NextRay.pdf, so NEE-side and escape-side weights
+// agree. Both directions are world space.
+//-----------------------------------------------------------------------------
+
+VISRTX_CALLABLE float __direct_callable__evaluatePdf(
+    const PhysicallyBasedShadingState *state, const vec3 *wo, const vec3 *wi)
+{
+  return pbrBsdfPdf(state, *wo, *wi);
 }
diff --git a/devices/rtx/device/renderer/Interactive_ptx.cu b/devices/rtx/device/renderer/Interactive_ptx.cu
index 52d9ff8c5..f8cdf5045 100644
--- a/devices/rtx/device/renderer/Interactive_ptx.cu
+++ b/devices/rtx/device/renderer/Interactive_ptx.cu
@@ -117,9 +117,21 @@ struct InteractiveShadingPolicy
               glm::lessThanEqual(attenuation, vec3(MIN_CONTRIBUTION_EPSILON))))
         continue;
 
-      const vec3 thisLightContrib =
+      vec3 thisLightContrib =
           materialShadeSurface(shadingState, hit, lightSample, -ray.dir);
 
+      // Environment MIS (balance heuristic): the HDRI is the only light the
+      // indirect bounce's escape can also reach, so combine the NEE and escape
+      // estimators instead of summing them (which double-counted the env).
+      // Interactive loops all lights with no pick, so pLight = envPdf (NO
+      // 1/numLights). Non-env lights keep wNee = 1 (behaviour unchanged).
+      if (frameData.registry.lights[light.lightIndex].type == LightType::HDRI) {
+        const float pLight = envPdf(frameData, lightSample.dir);
+        const float pBsdf =
+            materialEvalPdf(shadingState, -ray.dir, lightSample.dir);
+        thisLightContrib *= pLight / (pLight + pBsdf);
+      }
+
       contrib += thisLightContrib * attenuation;
     }
 
@@ -151,8 +163,17 @@ struct InteractiveShadingPolicy
         contrib += color * nextRay.contributionWeight;
       } else {
         vec3 hdri;
-        if (getBackgroundLight(frameData, bounceRay.dir, hdri))
-          contrib += hdri * nextRay.contributionWeight;
+        if (getBackgroundLight(frameData, bounceRay.dir, hdri)) {
+          // Env MIS escape side: weight the BSDF-sampled escape by the same
+          // balance heuristic as the NEE loop (pLight = envPdf, no 1/numLights).
+          // A delta / through-surface lobe reports +inf => wBsdf = 1; here the
+          // bounce is reflection-only so nextRay.pdf is finite.
+          const float pLight = envPdf(frameData, bounceRay.dir);
+          const float wBsdf = isinf(nextRay.pdf)
+              ? 1.0f
+              : nextRay.pdf / (nextRay.pdf + pLight);
+          contrib += wBsdf * hdri * nextRay.contributionWeight;
+        }
       }
     }
 
diff --git a/devices/rtx/device/renderer/Quality_ptx.cu b/devices/rtx/device/renderer/Quality_ptx.cu
index f14a38491..eafa42dfb 100644
--- a/devices/rtx/device/renderer/Quality_ptx.cu
+++ b/devices/rtx/device/renderer/Quality_ptx.cu
@@ -126,7 +126,16 @@ VISRTX_DEVICE bool shouldTerminatePath(ScreenSample &ss,
   return false;
 }
 
-VISRTX_DEVICE LightSample sampleLights(ScreenSample &ss,
+// A NEE light sample plus whether the picked light is the HDRI environment —
+// the environment is the only light type whose contribution the BSDF escape can
+// also reach, so it is the only one that needs an MIS weight (env MIS).
+struct SurfaceLightSample
+{
+  LightSample ls;
+  bool isEnv;
+};
+
+VISRTX_DEVICE SurfaceLightSample sampleLights(ScreenSample &ss,
     const FrameGPUData &frameData,
     const vec3 &origin,
     const vec3 &normal)
@@ -154,18 +163,21 @@ VISRTX_DEVICE LightSample sampleLights(ScreenSample &ss,
     // Fold the hemisphere-sample pdf cos(theta)/pi with the uniform light pick.
     const vec3 dir = sampleHemisphere(ss.rs, normal);
     const float cosNs = fmaxf(0.f, dot(dir, normal));
-    return LightSample{
-        rendererParams.ambientColor * rendererParams.ambientIntensity,
-        dir,
-        std::numeric_limits<float>::max(),
-        lightPickPdf * cosNs * kInvPi,
-    };
+    return {LightSample{
+                rendererParams.ambientColor * rendererParams.ambientIntensity,
+                dir,
+                std::numeric_limits<float>::max(),
+                lightPickPdf * cosNs * kInvPi,
+            },
+        false};
   } else {
     const auto &lightInstance = world.lightInstances[selectedIdx];
     auto ls =
         sampleLight(ss, origin, lightInstance.lightIndex, lightInstance.xfm);
     ls.pdf *= lightPickPdf;
-    return ls;
+    const bool isEnv = frameData.registry.lights[lightInstance.lightIndex].type
+        == LightType::HDRI;
+    return {ls, isEnv};
   }
 }
 
@@ -319,6 +331,20 @@ VISRTX_GLOBAL void __raygen__()
 
     auto sampleContribution = vec3(1.0f);
 
+    // The environment (visible HDRI lights) is sampled both by NEE at every
+    // scatter vertex (HDRIs are in the light list) and by a BSDF ray that
+    // escapes to it. Balance-heuristic MIS combines the two: `bsdfPdf` carries
+    // the solid-angle pdf of the bounce that produced the current ray, so the
+    // miss can weight the escape estimator by bsdfPdf/(bsdfPdf + pLight). The
+    // primary ray is a delta event (the directly visible backdrop), so it
+    // starts at +inf => w_bsdf = 1.
+    float bsdfPdf = INFINITY;
+
+    // Number of NEE light strata (instances + ambient), matching sampleLights'
+    // uniform pick. Folded into the env light density on both MIS sides.
+    const float numLights = float(frameData.world.numLightInstances
+        + (frameData.renderer.ambientIntensity > 0.0f));
+
     // Coverage pass-throughs are not light-transport events, so they track a
     // separate, generous budget instead of spending bounceDepth — a deep stack
     // of alpha cutouts must not starve the indirect-bounce budget.
@@ -404,6 +430,10 @@ VISRTX_GLOBAL void __raygen__()
 
         const vec3 scatterDir = randomDir(ss.rs);
         ray = Ray{scatterPos + scatterDir * VOLUME_SCATTER_EPSILON, scatterDir};
+        // The volume NEE above already sampled the environment at this scatter
+        // point, so the continuation ray must not re-deposit it on a miss
+        // (bsdfPdf = 0 => w_bsdf = 0). Env MIS for volumes is left as-is.
+        bsdfPdf = 0.0f;
         ++bounceDepth;
         continue;
       }
@@ -439,8 +469,9 @@ VISRTX_GLOBAL void __raygen__()
         // bump-mapped surfaces.
         const vec3 shadowOrigin =
             shadingHitpoint(surfaceHit) + surfaceHit.Ng * surfaceHit.epsilon;
-        LightSample lightSample =
+        const SurfaceLightSample lightPick =
             sampleLights(ss, frameData, shadowOrigin, surfaceHit.Ns);
+        const LightSample &lightSample = lightPick.ls;
         if (lightSample.pdf >= ATTENUATION_EPSILON && lightSample.dist > 0.0f) {
           // Gate on the shading normal so the terminator follows the smooth
           // surface; gating on Ng would carve the per-triangle facet shape
@@ -449,8 +480,23 @@ VISRTX_GLOBAL void __raygen__()
           if (lightDotNs > 0.0f) {
             const vec3 directLight = materialShadeSurface(
                 shadingState, surfaceHit, lightSample, -ray.dir);
+            // Env MIS: only the HDRI environment can also be reached by the
+            // BSDF escape, so only it gets a balance-heuristic weight. The
+            // light density uses envPdf on BOTH sides (here and at the miss),
+            // not lightSample.pdf, so wNee and wBsdf use identical pdf functions
+            // and partition to 1 exactly — unbiased regardless of how closely
+            // envPdf tracks the NEE importance pdf (the NEE estimator still
+            // divides by its true lightSample.pdf inside materialShadeSurface).
+            // Other light types: p_bsdf = 0 => w_nee = 1 (behaviour unchanged).
+            float wNee = 1.0f;
+            if (lightPick.isEnv) {
+              const float pBsdf =
+                  materialEvalPdf(shadingState, -ray.dir, lightSample.dir);
+              const float pLight = envPdf(frameData, lightSample.dir) / numLights;
+              wNee = pLight / (pLight + pBsdf);
+            }
             const vec3 contribUpper =
-                sampleContribution * opacity * directLight;
+                wNee * sampleContribution * opacity * directLight;
             const float maxContrib = glm::max(
                 contribUpper.x, glm::max(contribUpper.y, contribUpper.z));
             constexpr float SHADOW_SKIP_EPSILON = 1.0e-5f;
@@ -481,6 +527,12 @@ VISRTX_GLOBAL void __raygen__()
         auto nextRay = materialNextRay(shadingState, ray, ss.rs);
         sampleContribution *= nextRay.contributionWeight;
 
+        // Carry the bounce's solid-angle pdf for the env-MIS weight at a miss.
+        // Reflection/diffuse lobes report a finite pdf (MIS-combined with NEE);
+        // a transmission lobe reports +inf (NEE can't reach the env behind the
+        // surface, so the escape owns it => w_bsdf = 1).
+        bsdfPdf = nextRay.pdf;
+
         if (!continuesThroughSurface(nextRay))
           accumulateValue(sample.opacity, 1.0f, sample.opacity);
 
@@ -494,9 +546,16 @@ VISRTX_GLOBAL void __raygen__()
       }
 
       if (!surfaceHit.foundHit && !volumeSample.didScatter) {
-        // Sample the environment as a final bounce
+        // Deposit the environment, MIS-weighted against NEE. pLight mirrors the
+        // NEE env density: the HDRI importance pdf (envPdf) folded with the same
+        // uniform 1/numLights light pick sampleLights applied. bsdfPdf == +inf
+        // (delta / transmission / primary ray) => w_bsdf = 1.
         if (vec3 hdri; getBackgroundLight(frameData, ray.dir, hdri)) {
-          sample.color += sampleContribution * hdri;
+          const float pLight =
+              numLights > 0.0f ? envPdf(frameData, ray.dir) / numLights : 0.0f;
+          const float wBsdf =
+              isinf(bsdfPdf) ? 1.0f : bsdfPdf / (bsdfPdf + pLight);
+          sample.color += wBsdf * sampleContribution * hdri;
           accumulateValue(sample.opacity, 1.f, sample.opacity);
         }
 
diff --git a/devices/rtx/device/renderer/Renderer.cpp b/devices/rtx/device/renderer/Renderer.cpp
index 23b160d44..76d93163c 100644
--- a/devices/rtx/device/renderer/Renderer.cpp
+++ b/devices/rtx/device/renderer/Renderer.cpp
@@ -541,6 +541,11 @@ void Renderer::initOptixPipeline()
     callableDescs[SBT_CALLABLE_MATTE_OFFSET
         + int(SurfaceShaderEntryPoints::Shade)] = callableDesc;
 
+    callableDesc.callables.entryFunctionNameDC =
+        "__direct_callable__evaluatePdf";
+    callableDescs[SBT_CALLABLE_MATTE_OFFSET
+        + int(SurfaceShaderEntryPoints::EvaluatePdf)] = callableDesc;
+
     // Physically Based
     callableDesc.callables.moduleDC =
         deviceState()->materialShaders.physicallyBased;
@@ -583,6 +588,11 @@ void Renderer::initOptixPipeline()
     callableDescs[SBT_CALLABLE_PHYSICALLYBASED_OFFSET
         + int(SurfaceShaderEntryPoints::Shade)] = callableDesc;
 
+    callableDesc.callables.entryFunctionNameDC =
+        "__direct_callable__evaluatePdf";
+    callableDescs[SBT_CALLABLE_PHYSICALLYBASED_OFFSET
+        + int(SurfaceShaderEntryPoints::EvaluatePdf)] = callableDesc;
+
     // Spatial Field Samplers
     OptixProgramGroupDesc samplerDesc = {};
     samplerDesc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
@@ -998,6 +1008,10 @@ void Renderer::initOptixPipeline()
         callableDesc.callables.entryFunctionNameDC =
             "__direct_callable__shadeSurface";
         callableDescs.push_back(callableDesc);
+
+        callableDesc.callables.entryFunctionNameDC =
+            "__direct_callable__evaluatePdf";
+        callableDescs.push_back(callableDesc);
       }
 
       m_lastMDLMaterialLibraryUpdateCheck =