diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh
index 7a3ac17b054..5ae19512cc4 100755
--- a/.github/buildomat/jobs/deploy.sh
+++ b/.github/buildomat/jobs/deploy.sh
@@ -134,19 +134,6 @@ z_swadm () {
 	pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm $@
 }
 
-# only set this if you want to override the version of opte/xde installed by the
-# install_opte.sh script
-OPTE_COMMIT=""
-if [[ "x$OPTE_COMMIT" != "x" ]]; then
-	curl  -sSfOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde
-	pfexec rem_drv xde || true
-	pfexec mv xde /kernel/drv/amd64/xde
-	pfexec add_drv xde || true
-	curl  -sSfOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/release/$OPTE_COMMIT/opteadm
-	chmod +x opteadm
-	cp opteadm /tmp/opteadm
-	pfexec mv opteadm /opt/oxide/opte/bin/opteadm
-fi
 
 #
 # XXX work around 14537 (UFS should not allow directories to be unlinked) which
@@ -197,6 +184,24 @@ ptime -m tar xvzf /input/package/work/package.tar.gz
 # shellcheck source=/dev/null
 source .github/buildomat/ci-env.sh
 
+# Source the OPTE override (if any) from the canonical location and apply it.
+#
+# When set, download the xde driver and opteadm directly from buildomat and
+# swap them in. The deploy target is a ramdisk image without pkg(5), so we
+# use rem_drv/add_drv instead of the p5p approach used by install_opte.sh
+# and releng.
+source tools/opte_version_override
+if [[ "x$OPTE_COMMIT" != "x" ]]; then
+	curl -sSfOL "https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde"
+	pfexec rem_drv xde || true
+	pfexec mv xde /kernel/drv/amd64/xde
+	pfexec add_drv xde || true
+	curl -sSfOL "https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/release/$OPTE_COMMIT/opteadm"
+	chmod +x opteadm
+	cp opteadm /tmp/opteadm
+	pfexec mv opteadm /opt/oxide/opte/bin/opteadm
+fi
+
 # Ask buildomat for the range of extra addresses that we're allowed to use, and
 # break them up into the ranges we need.
 
diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh
index b43b91e9ec4..78df41dc5f6 100755
--- a/.github/buildomat/jobs/package.sh
+++ b/.github/buildomat/jobs/package.sh
@@ -60,5 +60,7 @@ files=(
 	target/release/xtask
 	target/debug/bootstrap
 	tests/*
+	tools/opte_version
+	tools/opte_version_override
 )
 ptime -m tar cvzf $WORK/package.tar.gz "${files[@]}" "${packages[@]}"
diff --git a/.github/workflows/check-opte-ver.yml b/.github/workflows/check-opte-ver.yml
index e516eeacbe6..65a3b23c121 100644
--- a/.github/workflows/check-opte-ver.yml
+++ b/.github/workflows/check-opte-ver.yml
@@ -1,10 +1,7 @@
 name: check-opte-ver
 on:
   pull_request:
-    paths:
-      - '.github/workflows/check-opte-ver.yml'
-      - 'Cargo.toml'
-      - 'tools/opte_version'
+    branches: [main]
 jobs:
   check-opte-ver:
     runs-on: ubuntu-22.04
@@ -18,3 +15,22 @@ jobs:
         run: cargo install toml-cli@0.2.3
       - name: Check OPTE version and rev match
         run: ./tools/ci_check_opte_ver.sh
+
+  # Runs on every PR regardless of paths changed, since the override
+  # file could have been set in an earlier commit and slip through on
+  # an unrelated PR otherwise.
+  check-opte-override:
+    if: github.base_ref == 'main'
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461
+      - name: Reject OPTE override on main
+        run: |
+          source tools/opte_version_override
+          if [[ "x$OPTE_COMMIT" != "x" ]]; then
+            echo "::error::OPTE_COMMIT is set in tools/opte_version_override."
+            echo "::error::The OPTE override must be cleared before merging to main."
+            exit 1
+          fi
diff --git a/Cargo.lock b/Cargo.lock
index 8b7a39695cf..5f2f24a9239 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -275,9 +275,9 @@ dependencies = [
 
 [[package]]
 name = "assert_cmd"
-version = "2.1.2"
+version = "2.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c5bcfa8749ac45dd12cb11055aeeb6b27a3895560d60d71e3c23bf979e60514"
+checksum = "39bae1d3fa576f7c6519514180a72559268dd7d1fe104070956cb687bc6673bd"
 dependencies = [
  "anstyle",
  "bstr",
@@ -1239,9 +1239,9 @@ dependencies = [
 
 [[package]]
 name = "cfg-expr"
-version = "0.20.6"
+version = "0.20.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78cef5b5a1a6827c7322ae2a636368a573006b27cfa76c7ebd53e834daeaab6a"
+checksum = "3c6b04e07d8080154ed4ac03546d9a2b303cc2fe1901ba0b35b301516e289368"
 dependencies = [
  "smallvec 1.15.1",
  "target-lexicon",
@@ -1741,10 +1741,11 @@ dependencies = [
 [[package]]
 name = "common"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/dendrite?rev=1ddaa5d6b101fbaa2c29eca847111cbef1a272ad#1ddaa5d6b101fbaa2c29eca847111cbef1a272ad"
+source = "git+https://github.com/oxidecomputer/dendrite?rev=cc8e02a0800034c431c8cf96b889ea638da3d194#cc8e02a0800034c431c8cf96b889ea638da3d194"
 dependencies = [
  "anyhow",
  "chrono",
+ "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)",
  "oxnet",
  "rand 0.9.2",
  "schemars 0.8.22",
@@ -1762,11 +1763,10 @@ dependencies = [
 [[package]]
 name = "common"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/dendrite?rev=cc8e02a0800034c431c8cf96b889ea638da3d194#cc8e02a0800034c431c8cf96b889ea638da3d194"
+source = "git+https://github.com/oxidecomputer/dendrite?rev=e10e4f5a993fe950ab1b478abb5dcbfa7aa92091#e10e4f5a993fe950ab1b478abb5dcbfa7aa92091"
 dependencies = [
  "anyhow",
  "chrono",
- "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)",
  "oxnet",
  "rand 0.9.2",
  "schemars 0.8.22",
@@ -2525,7 +2525,7 @@ dependencies = [
 [[package]]
 name = "ddm-admin-client"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/maghemite?rev=4d1f20f793da102b29b914569725ebc9fdf746dd#4d1f20f793da102b29b914569725ebc9fdf746dd"
+source = "git+https://github.com/oxidecomputer/maghemite?rev=c3c3032f8bdc91d6faf2b36e05b8375a0980765c#c3c3032f8bdc91d6faf2b36e05b8375a0980765c"
 dependencies = [
  "oxnet",
  "progenitor 0.13.0",
@@ -3108,18 +3108,18 @@ dependencies = [
 [[package]]
 name = "dpd-client"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/dendrite?rev=1ddaa5d6b101fbaa2c29eca847111cbef1a272ad#1ddaa5d6b101fbaa2c29eca847111cbef1a272ad"
+source = "git+https://github.com/oxidecomputer/dendrite?rev=cc8e02a0800034c431c8cf96b889ea638da3d194#cc8e02a0800034c431c8cf96b889ea638da3d194"
 dependencies = [
  "async-trait",
  "chrono",
- "common 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=1ddaa5d6b101fbaa2c29eca847111cbef1a272ad)",
+ "common 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=cc8e02a0800034c431c8cf96b889ea638da3d194)",
  "crc8",
  "futures",
  "http",
  "oxnet",
- "progenitor 0.13.0",
+ "progenitor 0.11.2",
  "regress",
- "reqwest 0.13.2",
+ "reqwest 0.12.28",
  "schemars 0.8.22",
  "serde",
  "serde_json",
@@ -3132,18 +3132,18 @@ dependencies = [
 [[package]]
 name = "dpd-client"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/dendrite?rev=cc8e02a0800034c431c8cf96b889ea638da3d194#cc8e02a0800034c431c8cf96b889ea638da3d194"
+source = "git+https://github.com/oxidecomputer/dendrite?rev=e10e4f5a993fe950ab1b478abb5dcbfa7aa92091#e10e4f5a993fe950ab1b478abb5dcbfa7aa92091"
 dependencies = [
  "async-trait",
  "chrono",
- "common 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=cc8e02a0800034c431c8cf96b889ea638da3d194)",
+ "common 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=e10e4f5a993fe950ab1b478abb5dcbfa7aa92091)",
  "crc8",
  "futures",
  "http",
  "oxnet",
- "progenitor 0.11.2",
+ "progenitor 0.13.0",
  "regress",
- "reqwest 0.12.28",
+ "reqwest 0.13.2",
  "schemars 0.8.22",
  "serde",
  "serde_json",
@@ -6200,11 +6200,11 @@ dependencies = [
 
 [[package]]
 name = "libtest-mimic"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33"
+checksum = "14e6ba06f0ade6e504aff834d7c34298e5155c6baca353cc6a4aaff2f9fd7f33"
 dependencies = [
- "anstream 0.6.21",
+ "anstream 1.0.0",
  "anstyle",
  "clap",
  "escape8259",
@@ -6247,18 +6247,18 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
 
 [[package]]
 name = "linkme"
-version = "0.3.35"
+version = "0.3.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e3283ed2d0e50c06dd8602e0ab319bb048b6325d0bba739db64ed8205179898"
+checksum = "e83272d46373fb8decca684579ac3e7c8f3d71d4cc3aa693df8759e260ae41cf"
 dependencies = [
  "linkme-impl",
 ]
 
 [[package]]
 name = "linkme-impl"
-version = "0.3.35"
+version = "0.3.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5cec0ec4228b4853bb129c84dbf093a27e6c7a20526da046defc334a1b017f7"
+checksum = "32d59e20403c7d08fe62b4376edfe5c7fb2ef1e6b1465379686d0f21c8df444b"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -6503,7 +6503,7 @@ dependencies = [
 [[package]]
 name = "mg-admin-client"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/maghemite?rev=4d1f20f793da102b29b914569725ebc9fdf746dd#4d1f20f793da102b29b914569725ebc9fdf746dd"
+source = "git+https://github.com/oxidecomputer/maghemite?rev=c3c3032f8bdc91d6faf2b36e05b8375a0980765c#c3c3032f8bdc91d6faf2b36e05b8375a0980765c"
 dependencies = [
  "chrono",
  "colored 3.1.1",
@@ -7648,7 +7648,7 @@ dependencies = [
  "chrono",
  "crucible-agent-client",
  "dns-service-client",
- "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=1ddaa5d6b101fbaa2c29eca847111cbef1a272ad)",
+ "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=e10e4f5a993fe950ab1b478abb5dcbfa7aa92091)",
  "dropshot 0.16.7",
  "futures",
  "gateway-messages",
@@ -8594,7 +8594,7 @@ dependencies = [
  "display-error-chain",
  "dns-server",
  "dns-service-client",
- "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=1ddaa5d6b101fbaa2c29eca847111cbef1a272ad)",
+ "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=e10e4f5a993fe950ab1b478abb5dcbfa7aa92091)",
  "dropshot 0.16.7",
  "ereport-types",
  "expectorate",
@@ -8660,6 +8660,7 @@ dependencies = [
  "num-integer",
  "omicron-cockroach-metrics",
  "omicron-common",
+ "omicron-ddm-admin-client",
  "omicron-passwords",
  "omicron-rpaths",
  "omicron-sled-agent",
@@ -9231,6 +9232,7 @@ dependencies = [
  "reqwest 0.13.2",
  "ring",
  "rustls 0.22.4",
+ "schemars 0.8.22",
  "serde",
  "sha2",
  "slog",
@@ -11698,7 +11700,7 @@ dependencies = [
 [[package]]
 name = "rdb-types"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/maghemite?rev=4d1f20f793da102b29b914569725ebc9fdf746dd#4d1f20f793da102b29b914569725ebc9fdf746dd"
+source = "git+https://github.com/oxidecomputer/maghemite?rev=c3c3032f8bdc91d6faf2b36e05b8375a0980765c#c3c3032f8bdc91d6faf2b36e05b8375a0980765c"
 dependencies = [
  "oxnet",
  "schemars 0.8.22",
@@ -14505,9 +14507,9 @@ checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c"
 
 [[package]]
 name = "target-spec"
-version = "3.5.7"
+version = "3.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "585c173ce474b6257cfb2a107949e48eb1ab9cae21cecbdf13401ae3be4a411a"
+checksum = "b00e973676af5497c2a69cc9787e2205c00f3b6f4f70e7d7b0112e28aa84b501"
 dependencies = [
  "cfg-expr",
  "guppy-workspace-hack",
@@ -16697,7 +16699,7 @@ name = "wicket-common"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=1ddaa5d6b101fbaa2c29eca847111cbef1a272ad)",
+ "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=e10e4f5a993fe950ab1b478abb5dcbfa7aa92091)",
  "dropshot 0.16.7",
  "gateway-client",
  "gateway-types",
@@ -16762,7 +16764,7 @@ dependencies = [
  "clap",
  "debug-ignore",
  "display-error-chain",
- "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=1ddaa5d6b101fbaa2c29eca847111cbef1a272ad)",
+ "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=e10e4f5a993fe950ab1b478abb5dcbfa7aa92091)",
  "dropshot 0.16.7",
  "either",
  "expectorate",
diff --git a/Cargo.toml b/Cargo.toml
index fa16fa91492..d4cd540a36a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -493,7 +493,7 @@ digest = "0.10.7"
 dns-server = { path = "dns-server" }
 dns-server-api = { path = "dns-server-api" }
 dns-service-client = { path = "clients/dns-service-client" }
-dpd-client = { git = "https://github.com/oxidecomputer/dendrite", rev = "1ddaa5d6b101fbaa2c29eca847111cbef1a272ad" }
+dpd-client = { git = "https://github.com/oxidecomputer/dendrite", rev = "e10e4f5a993fe950ab1b478abb5dcbfa7aa92091" }
 dropshot = { version = "0.16.6", features = [ "usdt-probes" ] }
 dropshot-api-manager = "0.6.0"
 dropshot-api-manager-types = "0.6.0"
@@ -599,8 +599,8 @@ ntp-admin-api = { path = "ntp-admin/api" }
 ntp-admin-client = { path = "clients/ntp-admin-client" }
 ntp-admin-types = { path = "ntp-admin/types" }
 ntp-admin-types-versions = { path = "ntp-admin/types/versions" }
-mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "4d1f20f793da102b29b914569725ebc9fdf746dd" }
-ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "4d1f20f793da102b29b914569725ebc9fdf746dd" }
+mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "c3c3032f8bdc91d6faf2b36e05b8375a0980765c" }
+ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "c3c3032f8bdc91d6faf2b36e05b8375a0980765c" }
 multimap = "0.10.1"
 nexus-auth = { path = "nexus/auth" }
 nexus-background-task-interface = { path = "nexus/background-task-interface" }
@@ -737,7 +737,7 @@ rats-corim = { git = "https://github.com/oxidecomputer/rats-corim.git", rev = "f
 raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid.git", rev = "a4cf01df76f35430ff5d39dc2fe470bcb953503b" }
 rayon = "1.10"
 rcgen = "0.12.1"
-rdb-types = { git = "https://github.com/oxidecomputer/maghemite", rev = "4d1f20f793da102b29b914569725ebc9fdf746dd" }
+rdb-types = { git = "https://github.com/oxidecomputer/maghemite", rev = "c3c3032f8bdc91d6faf2b36e05b8375a0980765c" }
 reconfigurator-cli = { path = "dev-tools/reconfigurator-cli" }
 reedline = "0.40.0"
 ref-cast = "1.0"
diff --git a/clients/ddm-admin-client/src/lib.rs b/clients/ddm-admin-client/src/lib.rs
index 7a8b56d499d..b8815d17473 100644
--- a/clients/ddm-admin-client/src/lib.rs
+++ b/clients/ddm-admin-client/src/lib.rs
@@ -2,7 +2,7 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-// Copyright 2023 Oxide Computer Company
+// Copyright 2026 Oxide Computer Company
 
 #![allow(clippy::redundant_closure_call)]
 #![allow(clippy::needless_lifetimes)]
@@ -107,6 +107,40 @@ impl Client {
         self.inner.enable_stats(request).await.map(|resp| resp.into_inner())
     }
 
+    /// Returns DDM peer information including interface names.
+    ///
+    /// The `if_name` field on each peer provides a live sled-to-port
+    /// mapping, identifying which switch port a peer sled is connected
+    /// through (e.g., `"tfportrear0_0"`).
+    pub async fn get_peers(
+        &self,
+    ) -> Result<
+        std::collections::HashMap<String, types::PeerInfo>,
+        Error<types::Error>,
+    > {
+        self.inner.get_peers().await.map(|resp| resp.into_inner())
+    }
+
+    /// Returns multicast routes learned from DDM peers.
+    ///
+    /// Each route includes the origin (overlay/underlay mapping),
+    /// the nexthop peer that advertised it, and the path vector.
+    pub async fn get_multicast_groups(
+        &self,
+    ) -> Result<Vec<types::MulticastRoute>, Error<types::Error>> {
+        self.inner.get_multicast_groups().await.map(|resp| resp.into_inner())
+    }
+
+    /// Returns multicast origins that this DDM instance is advertising.
+    pub async fn get_originated_multicast_groups(
+        &self,
+    ) -> Result<Vec<types::MulticastOrigin>, Error<types::Error>> {
+        self.inner
+            .get_originated_multicast_groups()
+            .await
+            .map(|resp| resp.into_inner())
+    }
+
     /// Returns the addresses of connected sleds.
     ///
     /// Note: These sleds have not yet been verified.
diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs
index d2c47534b3a..aa2fde293fd 100644
--- a/common/src/api/external/mod.rs
+++ b/common/src/api/external/mod.rs
@@ -2543,6 +2543,8 @@ impl Vni {
     ///
     /// This is a low-numbered VNI to avoid colliding with user VNIs.
     /// However, it is not in the Oxide-reserved range yet.
+    ///
+    /// Should match `oxide_vpc::api::DEFAULT_MULTICAST_VNI`.
     pub const DEFAULT_MULTICAST_VNI: Self = Self(77);
 
     /// Oxide reserves a slice of initial VNIs for its own use.
diff --git a/dev-tools/ls-apis/tests/api_dependencies.out b/dev-tools/ls-apis/tests/api_dependencies.out
index e2274ddd2d5..7b08511f2e5 100644
--- a/dev-tools/ls-apis/tests/api_dependencies.out
+++ b/dev-tools/ls-apis/tests/api_dependencies.out
@@ -29,6 +29,7 @@ Crucible Pantry (client: crucible-pantry-client)
 Maghemite DDM Admin (client: ddm-admin-client)
     consumed by: installinator (omicron/installinator) via 1 path
     consumed by: mgd (maghemite/mgd) via 1 path
+    consumed by: omicron-nexus (omicron/nexus) via 1 path
     consumed by: omicron-sled-agent (omicron/sled-agent) via 1 path
     consumed by: wicketd (omicron/wicketd) via 1 path
 
diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out
index 30eeb5158d6..437765d19d2 100644
--- a/dev-tools/omdb/tests/successes.out
+++ b/dev-tools/omdb/tests/successes.out
@@ -656,7 +656,7 @@ task: "bfd_manager"
   configured period: every <REDACTED_DURATION>s
   last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
     started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
-    last completion reported error: failed to resolve addresses for Dendrite services: proto error: no records found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }
+    last completion reported error: failed to resolve addresses for Dendrite services: proto error: no records found for Query { name: Name("_mgs._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }
 
 task: "blueprint_planner"
   configured period: every <REDACTED_DURATION>m
@@ -1342,7 +1342,7 @@ task: "bfd_manager"
   configured period: every <REDACTED_DURATION>s
   last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
     started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
-    last completion reported error: failed to resolve addresses for Dendrite services: proto error: no records found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }
+    last completion reported error: failed to resolve addresses for Dendrite services: proto error: no records found for Query { name: Name("_mgs._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }
 
 task: "blueprint_planner"
   configured period: every <REDACTED_DURATION>m
diff --git a/dev-tools/releng/src/main.rs b/dev-tools/releng/src/main.rs
index 874f25e3fe7..2569884a13a 100644
--- a/dev-tools/releng/src/main.rs
+++ b/dev-tools/releng/src/main.rs
@@ -270,6 +270,18 @@ async fn main() -> Result<()> {
     let opte_version =
         fs::read_to_string(WORKSPACE_DIR.join("tools/opte_version")).await?;
 
+    // Parse tools/opte_version_override for OPTE_COMMIT. When set, we
+    // download the override p5p from buildomat and use it as a package
+    // source during image build instead of the helios pkg repo version.
+    let opte_override = parse_opte_version_override(
+        &WORKSPACE_DIR.join("tools/opte_version_override"),
+    )
+    .await?;
+    if let Some(ov) = &opte_override {
+        info!(logger, "OPTE override active: commit={}", ov.commit);
+    }
+    let opte_version = opte_version.trim();
+
     let client = reqwest::ClientBuilder::new()
         .connect_timeout(Duration::from_secs(15))
         .timeout(Duration::from_secs(120))
@@ -617,7 +629,7 @@ async fn main() -> Result<()> {
             .arg("-o") // output directory for image
             .arg(args.output_dir.join(format!("os-{}", target)))
             .arg("-F") // pass extra image builder features
-            .arg(format!("optever={}", opte_version.trim()))
+            .arg(format!("optever={opte_version}"))
             .arg("-P") // include all files from extra proto area
             .arg(proto_dir.join("root"))
             .arg("-N") // image name
@@ -675,11 +687,33 @@ async fn main() -> Result<()> {
                 .arg(format!("helios-dev={HELIOS_PKGREPO}"))
         }
 
-        // helios-build experiment-image
-        jobs.push_command(format!("{}-image", target), image_cmd)
+        // When OPTE_COMMIT is set, download the override p5p from buildomat
+        // and add it as a package source for the image build.
+        if let Some(ov) = &opte_override {
+            let p5p_path = tempdir.path().join(format!("opte-{target}.p5p"));
+            let commit = ov.commit.clone();
+            let dest = p5p_path.clone();
+            let cl = client.clone();
+            let log = logger.clone();
+            jobs.push(
+                format!("{target}-opte-p5p"),
+                download_opte_p5p(log, cl, commit, dest),
+            );
+
+            image_cmd = image_cmd
+                .arg("-p")
+                .arg(format!("helios-dev=file://{p5p_path}"));
+        }
+
+        let image_job = jobs
+            .push_command(format!("{target}-image"), image_cmd)
             .after("helios-setup")
             .after("helios-incorp")
-            .after(format!("{}-proto", target));
+            .after(format!("{target}-proto"));
+
+        if opte_override.is_some() {
+            image_job.after(format!("{target}-opte-p5p"));
+        }
     }
     // Build the recovery target after we build the host target. Only one
     // of these will build at a time since Cargo locks its target directory;
@@ -887,6 +921,73 @@ async fn build_proto_area(
     Ok(())
 }
 
+/// Parsed contents of `tools/opte_version_override` when an override is active.
+struct OpteOverride {
+    commit: String,
+}
+
+/// Parse `tools/opte_version_override` for `OPTE_COMMIT`. Returns `None` if
+/// `OPTE_COMMIT` is unset or empty.
+async fn parse_opte_version_override(
+    path: &Utf8PathBuf,
+) -> Result<Option<OpteOverride>> {
+    let contents = fs::read_to_string(path)
+        .await
+        .context("failed to read tools/opte_version_override")?;
+
+    for line in contents.lines() {
+        let line = line.trim();
+        if let Some(val) = line.strip_prefix("OPTE_COMMIT=") {
+            let val = val.trim_matches('"');
+            if !val.is_empty() {
+                return Ok(Some(OpteOverride { commit: val.to_string() }));
+            }
+        }
+    }
+
+    Ok(None)
+}
+
+const OPTE_BUILDOMAT_BASE: &str =
+    "https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte";
+
+/// Download the OPTE override p5p archive from buildomat.
+async fn download_opte_p5p(
+    logger: Logger,
+    client: reqwest::Client,
+    commit: String,
+    dest: Utf8PathBuf,
+) -> Result<()> {
+    let url = format!("{OPTE_BUILDOMAT_BASE}/repo/{commit}/opte.p5p");
+    info!(logger, "downloading OPTE override p5p from {url}");
+    for attempt in 1..=RETRY_ATTEMPTS {
+        let result = async {
+            let response = client.get(&url).send().await?.error_for_status()?;
+            let bytes = response.bytes().await?;
+            fs::write(&dest, &bytes).await?;
+            Ok::<_, anyhow::Error>(())
+        }
+        .await;
+
+        match result {
+            Ok(()) => {
+                info!(logger, "downloaded OPTE p5p to {dest}");
+                return Ok(());
+            }
+            Err(err) => {
+                if attempt == RETRY_ATTEMPTS {
+                    return Err(err).with_context(|| {
+                        format!("failed to download OPTE p5p from {url}")
+                    });
+                }
+                info!(logger, "retrying OPTE p5p download (attempt {attempt})");
+            }
+        }
+    }
+
+    bail!("failed to download OPTE p5p after {RETRY_ATTEMPTS} attempts")
+}
+
 async fn host_add_root_profile(host_proto_root: Utf8PathBuf) -> Result<()> {
     fs::create_dir_all(&host_proto_root).await?;
     fs::write(
diff --git a/illumos-utils/src/opte/illumos.rs b/illumos-utils/src/opte/illumos.rs
index 28ca9f85566..f17adacf52a 100644
--- a/illumos-utils/src/opte/illumos.rs
+++ b/illumos-utils/src/opte/illumos.rs
@@ -13,6 +13,7 @@ use sled_agent_types::inventory::NetworkInterfaceKind;
 use slog::Logger;
 use slog::info;
 use std::net::IpAddr;
+use std::net::Ipv6Addr;
 
 #[derive(thiserror::Error, Debug)]
 pub enum Error {
@@ -70,6 +71,11 @@ pub enum Error {
         "Tried to update attached subnets on non-existent port ({0}, {1:?})"
     )]
     AttachedSubnetUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind),
+
+    #[error(
+        "address {0} is not within the underlay multicast subnet (ff04::/64)"
+    )]
+    InvalidMcastUnderlay(Ipv6Addr),
 }
 
 /// Delete all xde devices on the system.
diff --git a/illumos-utils/src/opte/mod.rs b/illumos-utils/src/opte/mod.rs
index e9e2546cb0a..780d63f44e9 100644
--- a/illumos-utils/src/opte/mod.rs
+++ b/illumos-utils/src/opte/mod.rs
@@ -33,14 +33,25 @@ use oxnet::IpNet;
 use oxnet::Ipv4Net;
 use oxnet::Ipv6Net;
 pub use port::Port;
-pub use port_manager::MulticastGroupCfg;
 pub use port_manager::PortCreateParams;
 pub use port_manager::PortManager;
 pub use port_manager::PortTicket;
+pub use sled_agent_types::multicast::MulticastGroupCfg;
 use std::net::IpAddr;
 use std::net::Ipv4Addr;
 use std::net::Ipv6Addr;
 
+// `oxide_vpc::api::DEFAULT_MULTICAST_VNI` and
+// `omicron_common::api::external::Vni::DEFAULT_MULTICAST_VNI` live in sibling
+// crates that cannot reference each other's constant. They must stay
+// numerically equal: the MRIB, M2P mappings, and OPTE all route on this
+// value, so any divergence would black-hole multicast traffic.
+const _: () = assert!(
+    oxide_vpc::api::DEFAULT_MULTICAST_VNI
+        == omicron_common::api::external::Vni::DEFAULT_MULTICAST_VNI.as_u32(),
+    "oxide_vpc::api::DEFAULT_MULTICAST_VNI must equal omicron_common Vni::DEFAULT_MULTICAST_VNI",
+);
+
 /// Information about the gateway for an OPTE port
 #[derive(Debug, Clone, Copy)]
 #[allow(dead_code)]
diff --git a/illumos-utils/src/opte/non_illumos.rs b/illumos-utils/src/opte/non_illumos.rs
index 42487cde09c..2170f1ace6f 100644
--- a/illumos-utils/src/opte/non_illumos.rs
+++ b/illumos-utils/src/opte/non_illumos.rs
@@ -2,25 +2,38 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-//! Mock / dummy versions of the OPTE module, for non-illumos platforms
+//! Mock / dummy versions of the OPTE module, for non-illumos platforms.
+//!
+//! Most methods are either `unimplemented!()` or silent no-ops.
+//! Multicast subscribe/unsubscribe is an exception, as it maintains real
+//! in-memory state because port manager tests assert on subscription contents.
 
 use crate::addrobj::AddrObject;
 use oxide_vpc::api::AddRouterEntryReq;
+use oxide_vpc::api::ClearMcast2PhysReq;
+use oxide_vpc::api::ClearMcastForwardingReq;
 use oxide_vpc::api::ClearVirt2PhysReq;
 use oxide_vpc::api::DelRouterEntryReq;
 use oxide_vpc::api::DetachSubnetResp;
-use oxide_vpc::api::Direction;
+use oxide_vpc::api::DumpMcast2PhysResp;
+use oxide_vpc::api::DumpMcastForwardingResp;
 use oxide_vpc::api::DumpVirt2PhysResp;
 use oxide_vpc::api::IpCfg;
 use oxide_vpc::api::IpCidr;
 use oxide_vpc::api::ListPortsResp;
+use oxide_vpc::api::McastSubscribeReq;
+use oxide_vpc::api::McastUnsubscribeReq;
+use oxide_vpc::api::MulticastUnderlay;
 use oxide_vpc::api::NoResp;
 use oxide_vpc::api::PortInfo;
 use oxide_vpc::api::RouterClass;
 use oxide_vpc::api::RouterTarget;
 use oxide_vpc::api::SetExternalIpsReq;
 use oxide_vpc::api::SetFwRulesReq;
+use oxide_vpc::api::SetMcast2PhysReq;
+use oxide_vpc::api::SetMcastForwardingReq;
 use oxide_vpc::api::SetVirt2PhysReq;
+use oxide_vpc::api::SourceFilter;
 use oxide_vpc::api::VpcCfg;
 use sled_agent_types::inventory::NetworkInterfaceKind;
 use slog::Logger;
@@ -76,6 +89,11 @@ pub enum Error {
         "Tried to update attached subnets on non-existent port ({0}, {1:?})"
     )]
     AttachedSubnetUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind),
+
+    #[error(
+        "address {0} is not within the underlay multicast subnet (ff04::/64)"
+    )]
+    InvalidMcastUnderlay(std::net::Ipv6Addr),
 }
 
 pub fn initialize_xde_driver(
@@ -172,12 +190,19 @@ pub(crate) struct PortData {
     pub port: PortInfo,
     /// The routes for this port. This simulates the router layer.
     pub routes: Vec<RouteInfo>,
+    /// Multicast group subscriptions: group IP → source filter.
+    pub mcast_subscriptions: HashMap<IpAddr, SourceFilter>,
 }
 
 #[derive(Debug)]
 pub(crate) struct State {
     pub ports: HashMap<String, PortData>,
     pub underlay_initialized: bool,
+    /// Multicast-to-physical mappings, keyed on (group, underlay).
+    ///
+    /// Persisted across [`Handle`] lifetimes to simulate xde kernel state
+    /// surviving sled-agent restarts.
+    pub m2p: Vec<(oxide_vpc::api::IpAddr, MulticastUnderlay)>,
 }
 
 const NO_RESPONSE: NoResp = NoResp { unused: 99 };
@@ -185,7 +210,11 @@ static OPTE_STATE: OnceLock<Mutex<State>> = OnceLock::new();
 
 fn opte_state() -> &'static Mutex<State> {
     OPTE_STATE.get_or_init(|| {
-        Mutex::new(State { ports: HashMap::new(), underlay_initialized: false })
+        Mutex::new(State {
+            ports: HashMap::new(),
+            underlay_initialized: false,
+            m2p: Vec::new(),
+        })
     })
 }
 
@@ -237,7 +266,11 @@ impl Handle {
                 return Err(OpteError::DuplicatePort(entry.key().to_string()));
             }
             Entry::Vacant(entry) => {
-                entry.insert(PortData { port, routes: Vec::new() });
+                entry.insert(PortData {
+                    port,
+                    routes: Vec::new(),
+                    mcast_subscriptions: HashMap::new(),
+                });
             }
         }
         Ok(NO_RESPONSE)
@@ -270,14 +303,46 @@ impl Handle {
         Ok(NO_RESPONSE)
     }
 
-    /// Allow traffic to / from a CIDR block on a port.
-    pub fn allow_cidr(
+    /// Subscribe a port to a multicast group.
+    pub fn mcast_subscribe(
         &self,
-        _: &str,
-        _: IpCidr,
-        _: Direction,
+        req: &McastSubscribeReq,
     ) -> Result<NoResp, OpteError> {
-        unimplemented!("Not yet used in tests")
+        let mut inner = opte_state().lock().unwrap();
+        let Some(port_data) = inner.ports.get_mut(&req.port_name) else {
+            return Err(OpteError::NoPort(req.port_name.clone()));
+        };
+        let group_ip: IpAddr = match req.group {
+            oxide_vpc::api::IpAddr::Ip4(v4) => {
+                std::net::Ipv4Addr::from(v4).into()
+            }
+            oxide_vpc::api::IpAddr::Ip6(v6) => {
+                std::net::Ipv6Addr::from(v6).into()
+            }
+        };
+        port_data.mcast_subscriptions.insert(group_ip, req.filter.clone());
+        Ok(NO_RESPONSE)
+    }
+
+    /// Unsubscribe a port from a multicast group.
+    pub fn mcast_unsubscribe(
+        &self,
+        req: &McastUnsubscribeReq,
+    ) -> Result<NoResp, OpteError> {
+        let mut inner = opte_state().lock().unwrap();
+        let Some(port_data) = inner.ports.get_mut(&req.port_name) else {
+            return Err(OpteError::NoPort(req.port_name.clone()));
+        };
+        let group_ip: IpAddr = match req.group {
+            oxide_vpc::api::IpAddr::Ip4(v4) => {
+                std::net::Ipv4Addr::from(v4).into()
+            }
+            oxide_vpc::api::IpAddr::Ip6(v6) => {
+                std::net::Ipv6Addr::from(v6).into()
+            }
+        };
+        port_data.mcast_subscriptions.remove(&group_ip);
+        Ok(NO_RESPONSE)
     }
 
     /// Delete a router entry from a port.
@@ -323,6 +388,64 @@ impl Handle {
         unimplemented!("Not yet used in tests")
     }
 
+    /// Set a multicast-to-physical mapping.
+    pub fn set_m2p(&self, req: &SetMcast2PhysReq) -> Result<NoResp, OpteError> {
+        let mut state = opte_state().lock().unwrap();
+        // Deduplicate by replacing existing entry for the same group.
+        state.m2p.retain(|(g, _)| *g != req.group);
+        state.m2p.push((req.group, req.underlay));
+        Ok(NO_RESPONSE)
+    }
+
+    /// Clear a multicast-to-physical mapping.
+    pub fn clear_m2p(
+        &self,
+        req: &ClearMcast2PhysReq,
+    ) -> Result<NoResp, OpteError> {
+        let mut state = opte_state().lock().unwrap();
+        state.m2p.retain(|(g, u)| !(*g == req.group && *u == req.underlay));
+        Ok(NO_RESPONSE)
+    }
+
+    /// Set multicast forwarding for a port.
+    pub fn set_mcast_fwd(
+        &self,
+        _: &SetMcastForwardingReq,
+    ) -> Result<NoResp, OpteError> {
+        Ok(NO_RESPONSE)
+    }
+
+    /// Clear multicast forwarding for a port.
+    pub fn clear_mcast_fwd(
+        &self,
+        _: &ClearMcastForwardingReq,
+    ) -> Result<NoResp, OpteError> {
+        Ok(NO_RESPONSE)
+    }
+
+    /// Dump all multicast-to-physical mappings.
+    pub fn dump_m2p(&self) -> Result<DumpMcast2PhysResp, OpteError> {
+        let state = opte_state().lock().unwrap();
+        let mut ip4 = Vec::new();
+        let mut ip6 = Vec::new();
+        for (group, underlay) in &state.m2p {
+            match group {
+                oxide_vpc::api::IpAddr::Ip4(v4) => {
+                    ip4.push((*v4, *underlay));
+                }
+                oxide_vpc::api::IpAddr::Ip6(v6) => {
+                    ip6.push((*v6, *underlay));
+                }
+            }
+        }
+        Ok(DumpMcast2PhysResp { ip4, ip6 })
+    }
+
+    /// Dump all multicast forwarding entries.
+    pub fn dump_mcast_fwd(&self) -> Result<DumpMcastForwardingResp, OpteError> {
+        Ok(DumpMcastForwardingResp { entries: Vec::new() })
+    }
+
     /// List ports on the current system.
     #[allow(dead_code)]
     pub(crate) fn list_ports(&self) -> Result<ListPortsResp, OpteError> {
diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs
index 638dd52de3d..464e1aa3709 100644
--- a/illumos-utils/src/opte/port_manager.rs
+++ b/illumos-utils/src/opte/port_manager.rs
@@ -4,6 +4,7 @@
 
 //! Manager for all OPTE ports on a Helios system
 
+use crate::addrobj::AddrObject;
 use crate::dladm::OPTE_LINK_PREFIX;
 use crate::opte::AttachedSubnet;
 use crate::opte::EnsureAttachedSubnetResult;
@@ -17,8 +18,6 @@ use crate::opte::port::PortData;
 use ipnetwork::Ipv4Network;
 use ipnetwork::Ipv6Network;
 use macaddr::MacAddr6;
-use omicron_common::address::IPV4_MULTICAST_RANGE;
-use omicron_common::address::IPV6_MULTICAST_RANGE;
 use omicron_common::api::external;
 use omicron_common::api::internal::shared::ExternalIpGatewayMap;
 use omicron_common::api::internal::shared::InternetGatewayRouterTarget;
@@ -35,10 +34,13 @@ use omicron_common::api::internal::shared::RouterVersion;
 use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost;
 use oxide_vpc::api::AddRouterEntryReq;
 use oxide_vpc::api::AttachedSubnetConfig;
+use oxide_vpc::api::ClearMcast2PhysReq;
+use oxide_vpc::api::ClearMcastForwardingReq;
 use oxide_vpc::api::DelRouterEntryReq;
 use oxide_vpc::api::DetachSubnetResp;
 use oxide_vpc::api::DhcpCfg;
 use oxide_vpc::api::ExternalIpCfg;
+use oxide_vpc::api::FilterMode;
 use oxide_vpc::api::IpCfg;
 use oxide_vpc::api::IpCidr;
 use oxide_vpc::api::Ipv4Cfg;
@@ -46,10 +48,16 @@ use oxide_vpc::api::Ipv4Cidr;
 use oxide_vpc::api::Ipv6Cfg;
 use oxide_vpc::api::Ipv6Cidr;
 use oxide_vpc::api::MacAddr;
+use oxide_vpc::api::McastSubscribeReq;
+use oxide_vpc::api::McastUnsubscribeReq;
+use oxide_vpc::api::MulticastUnderlay;
 use oxide_vpc::api::RouterClass;
 use oxide_vpc::api::SNat4Cfg;
 use oxide_vpc::api::SNat6Cfg;
 use oxide_vpc::api::SetExternalIpsReq;
+use oxide_vpc::api::SetMcast2PhysReq;
+use oxide_vpc::api::SetMcastForwardingReq;
+use oxide_vpc::api::SourceFilter;
 use oxide_vpc::api::TransitIpConfig;
 use oxide_vpc::api::VpcCfg;
 use oxnet::IpNet;
@@ -61,6 +69,15 @@ use sled_agent_types::instance::ExternalIpv6Config;
 use sled_agent_types::instance::ResolvedVpcFirewallRule;
 use sled_agent_types::inventory::NetworkInterface;
 use sled_agent_types::inventory::NetworkInterfaceKind;
+use sled_agent_types::multicast::ClearMcast2Phys;
+use sled_agent_types::multicast::ClearMcastForwarding;
+use sled_agent_types::multicast::Mcast2PhysMapping;
+use sled_agent_types::multicast::McastFilterMode;
+use sled_agent_types::multicast::McastForwardingEntry;
+use sled_agent_types::multicast::McastForwardingNextHop;
+use sled_agent_types::multicast::McastReplication;
+use sled_agent_types::multicast::McastSourceFilter;
+use sled_agent_types::multicast::MulticastGroupCfg;
 use slog::Logger;
 use slog::debug;
 use slog::error;
@@ -73,6 +90,7 @@ use std::collections::HashSet;
 use std::net::IpAddr;
 use std::net::Ipv4Addr;
 use std::net::Ipv6Addr;
+use std::net::UdpSocket;
 use std::sync::Arc;
 use std::sync::Mutex;
 use std::sync::atomic::AtomicU64;
@@ -89,20 +107,36 @@ struct RouteSet {
     active_ports: usize,
 }
 
-/// Configuration for multicast groups on an OPTE port.
-///
-/// TODO: This type should be moved to [oxide_vpc::api] when OPTE dependencies
-/// are updated, following the same pattern as other VPC configuration types
-/// like [ExternalIpCfg], [IpCfg], etc.
+/// Mutable per-port state tracked alongside the immutable `Port`.
+#[derive(Debug)]
+struct PortState {
+    port: Port,
+    /// Active multicast subscriptions, mapping group IP to source filter.
+    mcast_subscriptions: HashMap<IpAddr, SourceFilter>,
+}
+
+impl PortState {
+    fn new(port: Port) -> Self {
+        Self { port, mcast_subscriptions: HashMap::new() }
+    }
+}
+
+/// Convert a `MulticastGroupCfg` into OPTE's `SourceFilter`.
 ///
-/// TODO: Eventually remove.
-#[derive(Debug, Clone, PartialEq)]
-pub struct MulticastGroupCfg {
-    /// The multicast group IP address (IPv4 or IPv6).
-    pub group_ip: IpAddr,
-    /// Source addresses for source-filtered multicast (optional for ASM,
-    /// required for SSM).
-    pub sources: Vec<IpAddr>,
+/// Empty sources maps to ASM (EXCLUDE with no entries, accepting all
+/// sources). Non-empty sources maps to SSM (INCLUDE with the listed
+/// sources).
+fn multicast_cfg_to_source_filter(cfg: &MulticastGroupCfg) -> SourceFilter {
+    if cfg.sources.is_empty() {
+        SourceFilter::default()
+    } else {
+        SourceFilter::Include(
+            cfg.sources
+                .iter()
+                .map(|s| oxide_vpc::api::IpAddr::from(*s))
+                .collect(),
+        )
+    }
 }
 
 #[derive(Debug)]
@@ -115,9 +149,10 @@ struct PortManagerInner {
     /// IP address of the hosting sled on the underlay.
     underlay_ip: Ipv6Addr,
 
-    /// Map of all ports, keyed on the interface Uuid and its kind
-    /// (which includes the Uuid of the parent instance or service)
-    ports: Mutex<BTreeMap<(Uuid, NetworkInterfaceKind), Port>>,
+    /// Map of all ports and their mutable state, keyed on the interface
+    /// Uuid and its kind (which includes the Uuid of the parent instance
+    /// or service).
+    ports: Mutex<BTreeMap<(Uuid, NetworkInterfaceKind), PortState>>,
 
     /// Map of all current resolved routes.
     routes: Mutex<HashMap<RouterId, RouteSet>>,
@@ -127,6 +162,32 @@ struct PortManagerInner {
     ///
     /// IGW IDs are specific to the VPC of each NIC.
     eip_gateways: Mutex<HashMap<Uuid, HashMap<IpAddr, HashSet<Uuid>>>>,
+
+    /// Underlay NIC interface names (e.g., "cxgbe0", "cxgbe1").
+    ///
+    /// Used to program NIC multicast MAC filters via
+    /// `UdpSocket::join_multicast_v6`.
+    // Empty in tests where no real underlay NICs exist.
+    underlay_nics: Vec<String>,
+
+    /// UDP sockets held open to maintain NIC multicast MAC filters.
+    ///
+    /// On T6 hardware the NIC will not deliver multicast frames to
+    /// xde unless the corresponding multicast MAC filter is programmed.
+    /// Joining an IPv6 multicast group on a UDP socket causes the
+    /// kernel to call `mac_multicast_add` on the interface, which
+    /// programs the filter. The socket receives no data (xde's
+    /// siphon/flow hook intercepts first) and exists solely to hold
+    /// the filter entry.
+    ///
+    /// Dropping the socket removes the filter.
+    ///
+    /// See <https://github.com/oxidecomputer/opte/issues/908>.
+    //
+    // Leaf lock: acquiring this while another `PortManagerInner` lock is held
+    // breaks the acyclic graph. The locked region must not call back
+    // into `PortManager`, as `std::sync::Mutex` is non-reentrant.
+    mcast_underlay_sockets: Mutex<HashMap<Ipv6Addr, UdpSocket>>,
 }
 
 impl PortManagerInner {
@@ -147,6 +208,7 @@ pub struct PortCreateParams<'a> {
     pub firewall_rules: &'a [ResolvedVpcFirewallRule],
     pub dhcp_config: DhcpCfg,
     pub attached_subnets: Vec<AttachedSubnet>,
+    pub multicast_groups: &'a [MulticastGroupCfg],
 }
 
 impl<'a> TryFrom<&PortCreateParams<'a>> for IpCfg {
@@ -341,8 +403,17 @@ pub struct PortManager {
 }
 
 impl PortManager {
-    /// Create a new manager, for creating OPTE ports
-    pub fn new(log: Logger, underlay_ip: Ipv6Addr) -> Self {
+    /// Create a new manager, for creating OPTE ports.
+    ///
+    /// When `underlay_nics` is non-empty, the constructor performs kernel
+    /// I/O: one ioctl to list existing M2P mappings, then one
+    /// `setsockopt(IPV6_JOIN_GROUP)` per mapping per NIC to rehydrate
+    /// multicast MAC filters.
+    pub fn new(
+        log: Logger,
+        underlay_ip: Ipv6Addr,
+        underlay_nics: &[AddrObject],
+    ) -> Self {
         let inner = Arc::new(PortManagerInner {
             log,
             next_port_id: AtomicU64::new(0),
@@ -350,9 +421,108 @@ impl PortManager {
             ports: Mutex::new(BTreeMap::new()),
             routes: Mutex::new(Default::default()),
             eip_gateways: Mutex::new(Default::default()),
+            underlay_nics: underlay_nics
+                .iter()
+                .map(|n| n.interface().to_string())
+                .collect(),
+            mcast_underlay_sockets: Mutex::new(HashMap::new()),
         });
 
-        Self { inner }
+        let mgr = Self { inner };
+
+        // Rehydrate MAC filter sockets for any M2P mappings that
+        // survived in the xde kernel module across a sled-agent
+        // restart. Without this, the NIC's multicast MAC filters
+        // are lost when the old process exits.
+        //
+        // Eager rehydration occurs here, not a lazy approach: the Nexus
+        // convergence loop's `converge_m2p` treats an M2P present on both
+        // DB and sled as already converged and never re-applies `set_mcast_m2p`,
+        // so a missing MAC filter would never be healed by convergence alone.
+        //
+        // Cost: one `dump_m2p` ioctl plus one `setsockopt(IPV6_JOIN_GROUP)`
+        // per surviving group per underlay NIC. Bounded by active groups on
+        // this sled and runs only at sled-agent startup.
+        mgr.rehydrate_underlay_multicast_filters();
+
+        mgr
+    }
+
+    /// Re-open underlay multicast filter sockets for M2P mappings
+    /// that already exist in the xde kernel module.
+    ///
+    /// Called at startup to cover the sled-agent restart case where
+    /// OPTE kernel state persists but userspace socket state is lost.
+    ///
+    /// On a cold boot (no prior xde state), `list_mcast_m2p` returns
+    /// an error or an empty list.
+    fn rehydrate_underlay_multicast_filters(&self) {
+        if self.inner.underlay_nics.is_empty() {
+            return;
+        }
+
+        let mappings = match self.list_mcast_m2p() {
+            Ok(m) => m,
+            Err(e) => {
+                // Expected on cold boot when xde has no prior state.
+                debug!(
+                    self.inner.log,
+                    "No M2P mappings to rehydrate";
+                    "error" => InlineErrorChain::new(&e),
+                );
+                return;
+            }
+        };
+
+        let mut failed: Vec<String> = Vec::new();
+        for mapping in &mappings {
+            if self.join_underlay_multicast_group(mapping.underlay) {
+                continue;
+            }
+            // Clear the surviving xde M2P entry so `converge_m2p` sees
+            // the gap on its next pass and re-issues `set_mcast_m2p`,
+            // which retries the underlay join. Without this, the entry
+            // stays in xde and convergence treats it as already
+            // converged, leaving the group black-holed until cycled
+            // inactive→active.
+            let clear_req = ClearMcast2Phys {
+                group: mapping.group,
+                underlay: mapping.underlay,
+            };
+            if let Err(e) = self.clear_mcast_m2p(&clear_req) {
+                warn!(
+                    self.inner.log,
+                    "Failed to clear M2P after rehydration join failure, \
+                     group will stay black-holed until convergence retries";
+                    "group" => %mapping.group,
+                    "underlay" => %mapping.underlay,
+                    "error" => InlineErrorChain::new(&e),
+                );
+            }
+            failed.push(mapping.underlay.to_string());
+        }
+
+        let total = mappings.len();
+        let succeeded = total - failed.len();
+        if !mappings.is_empty() {
+            info!(
+                self.inner.log,
+                "Rehydrated underlay multicast filter sockets";
+                "succeeded" => succeeded,
+                "total" => total,
+            );
+        }
+        if !failed.is_empty() {
+            warn!(
+                self.inner.log,
+                "Some underlay multicast filter sockets failed to \
+                 rehydrate; M2P entries cleared so convergence will \
+                 reissue on the next pass";
+                "failed_count" => failed.len(),
+                "total" => total,
+                "failed_underlay_addrs" => ?failed,
+            );
+        }
     }
 
     pub fn underlay_ip(&self) -> &Ipv6Addr {
@@ -371,6 +541,7 @@ impl PortManager {
             firewall_rules,
             dhcp_config,
             attached_subnets: _,
+            multicast_groups,
         } = params;
         let is_service =
             matches!(nic.kind, NetworkInterfaceKind::Service { .. });
@@ -434,7 +605,7 @@ impl PortManager {
                 .ports
                 .lock()
                 .unwrap()
-                .insert((nic.id, nic.kind), port.clone());
+                .insert((nic.id, nic.kind), PortState::new(port.clone()));
             assert!(
                 old.is_none(),
                 "Duplicate OPTE port detected: interface_id = {}, kind = {:?}",
@@ -553,6 +724,12 @@ impl PortManager {
         }
         drop(route_map);
 
+        // Configure multicast group subscriptions if any were
+        // provided at instance start.
+        if !multicast_groups.is_empty() {
+            self.multicast_groups_ensure(nic.id, nic.kind, multicast_groups)?;
+        }
+
         info!(
             self.inner.log,
             "Created OPTE port";
@@ -620,13 +797,14 @@ impl PortManager {
         }
 
         // Note: We're deliberately holding both locks here
-        // to prevent several nexuses computng and applying deltas
+        // to prevent several nexuses computing and applying deltas
         // out of order.
         let ports = self.inner.ports.lock().unwrap();
         let hdl = Handle::new()?;
 
         // Propagate deltas out to all ports.
-        for port in ports.values() {
+        for port_state in ports.values() {
+            let port = &port_state.port;
             // Fetch deltas for all router keys: system, IPv4 subnet, and IPv6
             // subnet.
             let system_delta = deltas.get(&port.system_router_key());
@@ -714,11 +892,11 @@ impl PortManager {
         external_ips: &ExternalIpConfig,
     ) -> Result<(), Error> {
         let ports = self.inner.ports.lock().unwrap();
-        let port = ports.get(&(nic_id, nic_kind)).ok_or_else(|| {
+        let port_state = ports.get(&(nic_id, nic_kind)).ok_or_else(|| {
             Error::ExternalIpUpdateMissingPort(nic_id, nic_kind)
         })?;
 
-        self.external_ips_ensure_port(port, nic_id, external_ips)
+        self.external_ips_ensure_port(&port_state.port, nic_id, external_ips)
     }
 
     /// Ensure external IPs for an OPTE port are up to date.
@@ -772,73 +950,478 @@ impl PortManager {
         Ok(())
     }
 
-    /// Validate multicast group memberships for an OPTE port.
-    ///
-    /// This method validates multicast group configurations but does not yet
-    /// configure OPTE port-level multicast group membership. The actual
-    /// multicast forwarding is currently handled by the reconciler + DPD
-    /// at the dataplane switch level.
-    ///
-    /// TODO: Once OPTE kernel module supports multicast group APIs, this
-    /// method should be updated to configure OPTE port-level multicast
-    /// group membership. Note: multicast groups are fleet-scoped and can span
-    /// across VPCs.
+    /// Ensure multicast group subscriptions for an OPTE port match the
+    /// requested set. This diffs current vs new state and issues
+    /// subscribe/unsubscribe ioctls as needed.
     pub fn multicast_groups_ensure(
         &self,
         nic_id: Uuid,
         nic_kind: NetworkInterfaceKind,
         multicast_groups: &[MulticastGroupCfg],
     ) -> Result<(), Error> {
-        let ports = self.inner.ports.lock().unwrap();
-        let port = ports.get(&(nic_id, nic_kind)).ok_or_else(|| {
-            Error::MulticastUpdateMissingPort(nic_id, nic_kind)
-        })?;
+        // Validate and build the new subscription set before acquiring locks.
+        let mut new_subs: HashMap<IpAddr, SourceFilter> = HashMap::new();
+        for group in multicast_groups {
+            if !group.group_ip.is_multicast() {
+                return Err(Error::InvalidPortIpConfig(format!(
+                    "not a multicast address: {}",
+                    group.group_ip,
+                )));
+            }
+            new_subs
+                .insert(group.group_ip, multicast_cfg_to_source_filter(group));
+        }
 
-        debug!(
+        let hdl = Handle::new()?;
+
+        let mut ports = self.inner.ports.lock().unwrap();
+        let port_state =
+            ports.get_mut(&(nic_id, nic_kind)).ok_or_else(|| {
+                Error::MulticastUpdateMissingPort(nic_id, nic_kind)
+            })?;
+        let port_name = port_state.port.name().to_string();
+
+        // Unsubscribe groups that are no longer requested.
+        let to_remove: Vec<IpAddr> = port_state
+            .mcast_subscriptions
+            .keys()
+            .filter(|g| !new_subs.contains_key(g))
+            .copied()
+            .collect();
+
+        let removed = to_remove.len();
+        for group_ip in &to_remove {
+            debug!(
+                self.inner.log,
+                "unsubscribing from multicast group";
+                "port" => &port_name,
+                "group" => %group_ip,
+            );
+
+            // Effectively infallible, as the IPs are verified as multicast,
+            // the operation is idempotent, and the port exists.
+            hdl.mcast_unsubscribe(&McastUnsubscribeReq {
+                port_name: port_name.clone(),
+                group: (*group_ip).into(),
+            })?;
+
+            port_state.mcast_subscriptions.remove(group_ip);
+        }
+
+        // Subscribe to new groups or update changed filters.
+        let mut added = 0usize;
+        for (group_ip, filter) in &new_subs {
+            let needs_subscribe =
+                match port_state.mcast_subscriptions.get(group_ip) {
+                    None => true,
+                    Some(current) => current != filter,
+                };
+
+            if needs_subscribe {
+                added += 1;
+                debug!(
+                    self.inner.log,
+                    "subscribing to multicast group";
+                    "port" => &port_name,
+                    "group" => %group_ip,
+                    "filter" => ?filter,
+                );
+
+                // Effectively infallible as the IPs are verified as multicast,
+                // the operation is idempotent, and the port exists.
+                hdl.mcast_subscribe(&McastSubscribeReq {
+                    port_name: port_name.clone(),
+                    group: (*group_ip).into(),
+                    filter: filter.clone(),
+                })?;
+
+                port_state
+                    .mcast_subscriptions
+                    .insert(*group_ip, filter.clone());
+            }
+        }
+
+        if added > 0 || removed > 0 {
+            info!(
+                self.inner.log,
+                "multicast subscriptions updated";
+                "port" => &port_name,
+                "added" => added,
+                "removed" => removed,
+                "active_groups" => port_state.mcast_subscriptions.len(),
+            );
+        } else {
+            debug!(
+                self.inner.log,
+                "multicast subscriptions reconciled, no change";
+                "port" => &port_name,
+                "active_groups" => port_state.mcast_subscriptions.len(),
+            );
+        }
+
+        Ok(())
+    }
+
+    /// Install a multicast overlay-to-underlay (M2P) mapping in OPTE.
+    ///
+    /// This setter also programs the underlay NIC multicast MAC filters by
+    /// joining the underlay IPv6 multicast group on a UDP socket, ensuring the
+    /// NIC delivers frames to xde. See `mcast_underlay_sockets` docs.
+    pub fn set_mcast_m2p(&self, req: &Mcast2PhysMapping) -> Result<(), Error> {
+        let addr: Ipv6Addr = req.underlay;
+
+        info!(
             self.inner.log,
-            "Validating multicast group configuration for OPTE port";
-            "port_name" => port.name(),
-            "nic_id" => ?nic_id,
-            "groups" => ?multicast_groups,
+            "Setting multicast overlay-to-underlay mapping";
+            "group" => %req.group,
+            "underlay" => %addr,
         );
 
-        // Validate multicast group configurations
-        for group in multicast_groups {
-            if !group.group_ip.is_multicast() {
-                error!(
+        let underlay = MulticastUnderlay::new(addr.into())
+            .map_err(|_| Error::InvalidMcastUnderlay(addr))?;
+        let hdl = Handle::new()?;
+        hdl.set_m2p(&SetMcast2PhysReq { group: req.group.into(), underlay })?;
+
+        self.join_underlay_multicast_group(addr);
+
+        Ok(())
+    }
+
+    /// Remove a multicast overlay-to-underlay (M2P) mapping from OPTE.
+    ///
+    /// Drops the corresponding underlay MAC filter socket, removing the
+    /// NIC multicast MAC filter entry.
+    pub fn clear_mcast_m2p(&self, req: &ClearMcast2Phys) -> Result<(), Error> {
+        let addr: Ipv6Addr = req.underlay;
+
+        info!(
+            self.inner.log,
+            "Clearing multicast overlay-to-underlay mapping";
+            "group" => %req.group,
+            "underlay" => %addr,
+        );
+
+        let underlay = MulticastUnderlay::new(addr.into())
+            .map_err(|_| Error::InvalidMcastUnderlay(addr))?;
+        let hdl = Handle::new()?;
+        hdl.clear_m2p(&ClearMcast2PhysReq {
+            group: req.group.into(),
+            underlay,
+        })?;
+
+        self.leave_underlay_multicast_group(addr);
+
+        Ok(())
+    }
+
+    /// Join an underlay IPv6 multicast group on all underlay NICs via a
+    /// UDP socket, programming the NIC's multicast MAC filters.
+    ///
+    /// On T6 hardware the NIC drops multicast frames unless the
+    /// corresponding MAC filter is installed. Joining the group on a
+    /// socket triggers `mac_multicast_add` in the kernel. The socket
+    /// receives no data, as xde intercepts first.
+    ///
+    /// The cxgbe driver supports [at most 336 multicast filter
+    /// entries][cxgbe-mcast-limit] per interface. Beyond that, joins
+    /// will fail and the NIC will not deliver frames for those groups.
+    ///
+    /// Failures are logged but not propagated. The M2P mapping in OPTE
+    /// is the primary requirement, and MAC filter programming is
+    /// best-effort (e.g., NIC transiently unplumbed at boot, cxgbe
+    /// multicast filter table exhausted). See [opte#908] for context.
+    ///
+    /// [cxgbe-mcast-limit]: https://github.com/oxidecomputer/illumos-gate/blob/c43b3b549678498219f87d7bb5882e9a9a904ade/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c#L759-L765
+    /// [opte#908]: https://github.com/oxidecomputer/opte/issues/908
+    fn join_underlay_multicast_group(&self, addr: Ipv6Addr) -> bool {
+        if self.inner.underlay_nics.is_empty() {
+            return false;
+        }
+
+        let mut sockets = self.inner.mcast_underlay_sockets.lock().unwrap();
+        if sockets.contains_key(&addr) {
+            return true;
+        }
+
+        let sock = match UdpSocket::bind("[::]:0") {
+            Ok(s) => s,
+            Err(e) => {
+                warn!(
                     self.inner.log,
-                    "Invalid multicast IP address";
-                    "group_ip" => %group.group_ip,
-                    "port_name" => port.name(),
+                    "Failed to bind UDP socket for underlay multicast filter";
+                    "addr" => %addr,
+                    "error" => %e,
                 );
-                return Err(Error::InvalidPortIpConfig(String::from(
-                    "invalid multicast IP address",
-                )));
+                return false;
             }
+        };
+
+        // Minimize the receive buffer. This socket exists solely to
+        // trigger MAC filter programming. xde intercepts packets before
+        // they reach the socket. The small buffer limits resource waste
+        // if that invariant is ever violated.
+        if let Err(e) = sock.set_nonblocking(true) {
+            warn!(
+                self.inner.log,
+                "Failed to set underlay multicast socket non-blocking";
+                "addr" => %addr,
+                "error" => %e,
+            );
+        }
+        // The kernel may round up from 1 to its own minimum.
+        let _ = unsafe {
+            libc::setsockopt(
+                std::os::unix::io::AsRawFd::as_raw_fd(&sock),
+                libc::SOL_SOCKET,
+                libc::SO_RCVBUF,
+                &1i32 as *const i32 as *const libc::c_void,
+                std::mem::size_of::<i32>() as libc::socklen_t,
+            )
+        };
+
+        let joined_any = self
+            .inner
+            .underlay_nics
+            .iter()
+            .filter_map(|nic_name| {
+                let if_index = nix::net::if_::if_nametoindex(nic_name.as_str())
+                    .map_err(|e| {
+                        warn!(
+                            self.inner.log,
+                            "Failed to resolve underlay NIC index";
+                            "nic" => nic_name,
+                            "error" => %e,
+                        );
+                    })
+                    .ok()?;
+
+                sock.join_multicast_v6(&addr, if_index)
+                    .map_err(|e| {
+                        warn!(
+                            self.inner.log,
+                            "Failed to join underlay multicast group on NIC";
+                            "addr" => %addr,
+                            "nic" => nic_name,
+                            "if_index" => if_index,
+                            "error" => %e,
+                        );
+                    })
+                    .ok()?;
+
+                debug!(
+                    self.inner.log,
+                    "Joined underlay multicast group on NIC";
+                    "addr" => %addr,
+                    "nic" => nic_name,
+                    "if_index" => if_index,
+                );
+                Some(())
+            })
+            .count()
+            > 0;
+
+        if joined_any {
+            sockets.insert(addr, sock);
+            true
+        } else {
+            warn!(
+                self.inner.log,
+                "no NIC joins succeeded for underlay multicast group, \
+                 will retry on next call";
+                "addr" => %addr,
+            );
+            false
+        }
+    }
+
+    /// Drop the UDP socket for an underlay multicast address, removing
+    /// the NIC MAC filter entries.
+    fn leave_underlay_multicast_group(&self, addr: Ipv6Addr) {
+        let mut sockets = self.inner.mcast_underlay_sockets.lock().unwrap();
+        if sockets.remove(&addr).is_some() {
+            debug!(
+                self.inner.log,
+                "Removed underlay multicast filter socket";
+                "addr" => %addr,
+            );
         }
+    }
 
-        // TODO: Configure firewall rules to allow multicast traffic.
-        // Add exceptions in source/dest MAC/L3 addr checking for multicast
-        // addresses matching known groups, only doing cidr-checking on the
-        // multicasst destination side.
+    /// Set multicast forwarding next hops for an underlay group address.
+    pub fn set_mcast_fwd(
+        &self,
+        req: &McastForwardingEntry,
+    ) -> Result<(), Error> {
+        // Safe to unwrap: 77 is well within the 24-bit VNI range.
+        let mcast_vni =
+            Vni::new(oxide_vpc::api::DEFAULT_MULTICAST_VNI).unwrap();
+        let addr: Ipv6Addr = req.underlay;
 
         info!(
             self.inner.log,
-            "OPTE port configured for multicast traffic";
-            "port_name" => port.name(),
-            "ipv4_range" => %IPV4_MULTICAST_RANGE,
-            "ipv6_range" => %IPV6_MULTICAST_RANGE,
-            "multicast_groups" => multicast_groups.len(),
+            "Setting multicast forwarding";
+            "underlay" => %addr,
+            "next_hops" => req.next_hops.len(),
         );
 
-        // TODO: Configure OPTE port for specific multicast group membership
-        // once OPTE kernel module APIs are available. This is distinct from
-        // zone vNIC underlay configuration (see instance.rs
-        // `join_multicast_group_inner`).
+        let underlay = MulticastUnderlay::new(addr.into())
+            .map_err(|_| Error::InvalidMcastUnderlay(addr))?;
+        let next_hops = req
+            .next_hops
+            .iter()
+            .map(|nexthop| oxide_vpc::api::McastForwardingNextHop {
+                next_hop: oxide_vpc::api::NextHopV6 {
+                    addr: nexthop.next_hop.into(),
+                    vni: mcast_vni,
+                },
+                replication: match nexthop.replication {
+                    McastReplication::External => {
+                        oxide_vpc::api::Replication::External
+                    }
+                    McastReplication::Underlay => {
+                        oxide_vpc::api::Replication::Underlay
+                    }
+                    McastReplication::Both => oxide_vpc::api::Replication::Both,
+                },
+                source_filter: match nexthop.filter.mode {
+                    McastFilterMode::Include => SourceFilter::Include(
+                        nexthop
+                            .filter
+                            .sources
+                            .iter()
+                            .copied()
+                            .map(Into::into)
+                            .collect(),
+                    ),
+                    McastFilterMode::Exclude => SourceFilter::Exclude(
+                        nexthop
+                            .filter
+                            .sources
+                            .iter()
+                            .copied()
+                            .map(Into::into)
+                            .collect(),
+                    ),
+                },
+            })
+            .collect();
+        let hdl = Handle::new()?;
+        hdl.set_mcast_fwd(&SetMcastForwardingReq { underlay, next_hops })?;
+        Ok(())
+    }
+
+    /// Remove all multicast forwarding entries for an underlay group address.
+    pub fn clear_mcast_fwd(
+        &self,
+        req: &ClearMcastForwarding,
+    ) -> Result<(), Error> {
+        let addr: Ipv6Addr = req.underlay;
 
+        info!(
+            self.inner.log,
+            "Clearing multicast forwarding";
+            "underlay" => %addr,
+        );
+
+        let underlay = MulticastUnderlay::new(addr.into())
+            .map_err(|_| Error::InvalidMcastUnderlay(addr))?;
+        let hdl = Handle::new()?;
+        hdl.clear_mcast_fwd(&ClearMcastForwardingReq { underlay })?;
         Ok(())
     }
 
+    /// Dump all multicast overlay-to-underlay (M2P) mappings from OPTE.
+    pub fn list_mcast_m2p(&self) -> Result<Vec<Mcast2PhysMapping>, Error> {
+        let hdl = Handle::new()?;
+        let resp = hdl.dump_m2p()?;
+        let mappings = resp
+            .ip4
+            .into_iter()
+            .map(|(group, underlay)| Mcast2PhysMapping {
+                group: IpAddr::V4(group.into()),
+                underlay: Ipv6Addr::from(underlay.addr()),
+            })
+            .chain(resp.ip6.into_iter().map(|(group, underlay)| {
+                Mcast2PhysMapping {
+                    group: IpAddr::V6(group.into()),
+                    underlay: Ipv6Addr::from(underlay.addr()),
+                }
+            }))
+            .collect();
+        Ok(mappings)
+    }
+
+    /// Dump all multicast forwarding entries from OPTE.
+    pub fn list_mcast_fwd(&self) -> Result<Vec<McastForwardingEntry>, Error> {
+        let hdl = Handle::new()?;
+        let resp = hdl.dump_mcast_fwd()?;
+        resp.entries
+            .into_iter()
+            .map(|entry| {
+                let next_hops = entry
+                    .next_hops
+                    .into_iter()
+                    .filter_map(|nexthop| {
+                        let replication = match nexthop.replication {
+                            oxide_vpc::api::Replication::External => {
+                                McastReplication::External
+                            }
+                            oxide_vpc::api::Replication::Underlay => {
+                                McastReplication::Underlay
+                            }
+                            oxide_vpc::api::Replication::Both => {
+                                McastReplication::Both
+                            }
+                            oxide_vpc::api::Replication::Reserved => {
+                                // Reserved is a 2-bit padding value with
+                                // no valid semantic meaning. Its presence
+                                // in the forwarding table indicates a bug
+                                // or manual opteadm intervention. Skip
+                                // this hop rather than failing the entire
+                                // list so the reconciler can still program
+                                // valid next-hops.
+                                warn!(
+                                    self.inner.log,
+                                    "skipping next hop with Reserved \
+                                     replication mode";
+                                    "next_hop" => %nexthop.next_hop.addr
+                                );
+                                return None;
+                            }
+                        };
+
+                        Some(McastForwardingNextHop {
+                            next_hop: nexthop.next_hop.addr.into(),
+                            replication,
+                            filter: McastSourceFilter {
+                                mode: match nexthop.source_filter.mode() {
+                                    FilterMode::Include => {
+                                        McastFilterMode::Include
+                                    }
+                                    FilterMode::Exclude => {
+                                        McastFilterMode::Exclude
+                                    }
+                                },
+                                sources: nexthop
+                                    .source_filter
+                                    .sources()
+                                    .iter()
+                                    .copied()
+                                    .map(Into::into)
+                                    .collect(),
+                            },
+                        })
+                    })
+                    .collect();
+
+                Ok(McastForwardingEntry {
+                    underlay: Ipv6Addr::from(entry.underlay.addr()),
+                    next_hops,
+                })
+            })
+            .collect()
+    }
+
     pub fn firewall_rules_ensure(
         &self,
         vni: external::Vni,
@@ -856,10 +1439,11 @@ impl PortManager {
 
         // We update VPC rules as a set so grab only
         // the relevant ports using the VPC's VNI.
-        let vpc_ports = ports
-            .iter()
-            .filter(|((_, _), port)| u32::from(vni) == u32::from(*port.vni()));
-        for ((_, _), port) in vpc_ports {
+        let vpc_ports = ports.iter().filter(|((_, _), port_state)| {
+            u32::from(vni) == u32::from(*port_state.port.vni())
+        });
+        for ((_, _), port_state) in vpc_ports {
+            let port = &port_state.port;
             let rules = opte_firewall_rules(rules, port.vni(), port.mac());
             let port_name = port.name().to_string();
             info!(
@@ -969,7 +1553,7 @@ impl PortManager {
         ensure_added: Vec<AttachedSubnet>,
     ) -> EnsureAttachedSubnetResult {
         let ports = self.inner.ports.lock().unwrap();
-        let Some(port) = ports.get(&(nic_id, nic_kind)) else {
+        let Some(port_state) = ports.get(&(nic_id, nic_kind)) else {
             return EnsureAttachedSubnetResult {
                 diff: Default::default(),
                 error: Some(Error::AttachedSubnetUpdateMissingPort(
@@ -977,7 +1561,11 @@ impl PortManager {
                 )),
             };
         };
-        self.attached_subnets_ensure_port(port, ensure_removed, ensure_added)
+        self.attached_subnets_ensure_port(
+            &port_state.port,
+            ensure_removed,
+            ensure_added,
+        )
     }
 
     fn attached_subnets_ensure_port(
@@ -1029,10 +1617,10 @@ impl PortManager {
         subnet: AttachedSubnet,
     ) -> Result<(), Error> {
         let ports = self.inner.ports.lock().unwrap();
-        let port = ports.get(&(nic_id, nic_kind)).ok_or_else(|| {
+        let port_state = ports.get(&(nic_id, nic_kind)).ok_or_else(|| {
             Error::AttachedSubnetUpdateMissingPort(nic_id, nic_kind)
         })?;
-        self.attach_subnet_port(port, subnet)
+        self.attach_subnet_port(&port_state.port, subnet)
     }
 
     fn attach_subnet_port(
@@ -1078,10 +1666,10 @@ impl PortManager {
         subnet: IpCidr,
     ) -> Result<(), Error> {
         let ports = self.inner.ports.lock().unwrap();
-        let port = ports.get(&(nic_id, nic_kind)).ok_or_else(|| {
+        let port_state = ports.get(&(nic_id, nic_kind)).ok_or_else(|| {
             Error::AttachedSubnetUpdateMissingPort(nic_id, nic_kind)
         })?;
-        self.detach_subnet_port(port, subnet)
+        self.detach_subnet_port(&port_state.port, subnet)
     }
 
     fn detach_subnet_port(
@@ -1154,7 +1742,7 @@ impl PortTicket {
 
     fn release_inner(&mut self) -> Result<(), Error> {
         let mut ports = self.manager.ports.lock().unwrap();
-        let Some(port) = ports.remove(&(self.id, self.kind)) else {
+        let Some(port_state) = ports.remove(&(self.id, self.kind)) else {
             error!(
                 self.manager.log,
                 "Tried to release non-existent port";
@@ -1163,6 +1751,7 @@ impl PortTicket {
             );
             return Err(Error::ReleaseMissingPort(self.id, self.kind));
         };
+        let port = &port_state.port;
         drop(ports);
 
         // Cleanup the set of subnets we want to receive routes for.
@@ -1199,7 +1788,7 @@ impl PortTicket {
             "Removed OPTE port from manager";
             "id" => ?&self.id,
             "kind" => ?&self.kind,
-            "port" => ?&port,
+            "port" => ?&port_state,
         );
         Ok(())
     }
@@ -1229,6 +1818,9 @@ impl Drop for PortTicket {
 mod tests {
     use super::PortCreateParams;
     use super::PortManager;
+    #[cfg(target_os = "illumos")]
+    use crate::addrobj::AddrObject;
+    use crate::opte::Error;
     use crate::opte::Handle;
     use macaddr::MacAddr6;
     use omicron_common::api::external::{MacAddr, Vni};
@@ -1242,9 +1834,11 @@ mod tests {
     use omicron_common::api::internal::shared::RouterVersion;
     use omicron_test_utils::dev::test_setup_log;
     use oxide_vpc::api::DhcpCfg;
+    use oxide_vpc::api::FilterMode;
     use oxide_vpc::api::IpCfg;
     use oxide_vpc::api::Ipv4Cidr;
     use oxide_vpc::api::Ipv6Cidr;
+    use oxide_vpc::api::SourceFilter;
     use oxnet::IpNet;
     use oxnet::Ipv4Net;
     use oxnet::Ipv6Net;
@@ -1255,17 +1849,75 @@ mod tests {
     use sled_agent_types::inventory::NetworkInterfaceKind;
     use sled_agent_types::inventory::SourceNatConfigV4;
     use sled_agent_types::inventory::SourceNatConfigV6;
+    use sled_agent_types::multicast::MulticastGroupCfg;
     use std::collections::HashSet;
+    use std::net::IpAddr;
     use std::net::Ipv4Addr;
     use std::net::Ipv6Addr;
+    #[cfg(target_os = "illumos")]
+    use std::time::Duration;
+    #[cfg(target_os = "illumos")]
+    use std::time::Instant;
     use uuid::Uuid;
 
+    // Maximum ephemeral port number for source NAT (14-bit range).
+    const MAX_PORT: u16 = (1 << 14) - 1;
+
+    /// Loopback interface name on illumos. Tests that verify kernel
+    /// IPv6 multicast membership are illumos-only because they shell
+    /// out to illumos's `netstat -g -f inet6`.
+    #[cfg(target_os = "illumos")]
+    const LOOPBACK_IF: &str = "lo0";
+
+    /// Returns `true` iff `netstat -g -f inet6` reports `group` as a
+    /// membership on `interface`.
+    ///
+    /// Used to verify that `join_multicast_v6`/leave on the filter
+    /// socket actually reached the kernel's IP layer for the named
+    /// underlay NIC, rather than just updating the in-process
+    /// `mcast_underlay_sockets` map.
+    #[cfg(target_os = "illumos")]
+    fn netstat_v6_has_membership(interface: &str, group: &Ipv6Addr) -> bool {
+        let out = std::process::Command::new("netstat")
+            .args(["-g", "-n", "-f", "inet6"])
+            .output()
+            .expect("netstat -g invocation failed");
+        let group_str = group.to_string();
+        String::from_utf8_lossy(&out.stdout).lines().any(|line| {
+            let mut fields = line.split_whitespace();
+            if let (Some(iface), Some(grp)) = (fields.next(), fields.next()) {
+                iface == interface && grp == group_str
+            } else {
+                false
+            }
+        })
+    }
+
+    /// Poll `netstat -g` until membership matches `expected`, panicking
+    /// on timeout. The kernel should update synchronously on the join
+    /// or leave syscall, but polling absorbs possible transient delay.
+    #[cfg(target_os = "illumos")]
+    fn poll_v6_membership(interface: &str, group: &Ipv6Addr, expected: bool) {
+        let deadline = Instant::now() + Duration::from_secs(5);
+        while Instant::now() < deadline {
+            if netstat_v6_has_membership(interface, group) == expected {
+                return;
+            }
+            std::thread::sleep(Duration::from_millis(100));
+        }
+        panic!(
+            "timeout: membership for {group} on {interface} expected {}",
+            if expected { "present" } else { "absent" }
+        );
+    }
+
     // Regression for https://github.com/oxidecomputer/omicron/issues/7541.
     #[test]
     fn multiple_ports_does_not_destroy_default_route() {
         let logctx =
             test_setup_log("multiple_ports_does_not_destroy_default_route");
-        let manager = PortManager::new(logctx.log.clone(), Ipv6Addr::LOCALHOST);
+        let manager =
+            PortManager::new(logctx.log.clone(), Ipv6Addr::LOCALHOST, &[]);
         let default_ipv4_route =
             IpNet::V4(Ipv4Net::new(Ipv4Addr::UNSPECIFIED, 0).unwrap());
         let default_ipv6_route =
@@ -1310,7 +1962,6 @@ mod tests {
             }),
             v6: None,
         };
-        const MAX_PORT: u16 = (1 << 14) - 1;
         let (port0, _ticket0) = manager
             .create_port(PortCreateParams {
                 nic: &NetworkInterface {
@@ -1335,6 +1986,7 @@ mod tests {
                     dns6_servers: Vec::new(),
                 },
                 attached_subnets: vec![],
+                multicast_groups: &[],
             })
             .unwrap();
 
@@ -1514,6 +2166,7 @@ mod tests {
                     dns6_servers: Vec::new(),
                 },
                 attached_subnets: vec![],
+                multicast_groups: &[],
             })
             .unwrap();
 
@@ -1685,6 +2338,7 @@ mod tests {
                 dns6_servers: vec![],
             },
             attached_subnets: vec![],
+            multicast_groups: &[],
         };
         let IpCfg::Ipv4(oxide_vpc::api::Ipv4Cfg {
             vpc_subnet,
@@ -1758,6 +2412,7 @@ mod tests {
                 dns6_servers: vec![],
             },
             attached_subnets: vec![],
+            multicast_groups: &[],
         };
         let IpCfg::Ipv6(oxide_vpc::api::Ipv6Cfg {
             vpc_subnet,
@@ -1842,6 +2497,7 @@ mod tests {
                 dns6_servers: vec![],
             },
             attached_subnets: vec![],
+            multicast_groups: &[],
         };
         let IpCfg::DualStack { ipv4, ipv6 } = IpCfg::try_from(&prs).unwrap()
         else {
@@ -1932,6 +2588,7 @@ mod tests {
                 dns6_servers: vec![],
             },
             attached_subnets: vec![],
+            multicast_groups: &[],
         };
         let _ = IpCfg::try_from(&prs).expect_err(
             "Should fail to convert with public IPv6 and private IPv4",
@@ -1978,9 +2635,453 @@ mod tests {
                 dns6_servers: vec![],
             },
             attached_subnets: vec![],
+            multicast_groups: &[],
         };
         let _ = IpCfg::try_from(&prs).expect_err(
             "Should fail to convert with public IPv4 and private IPv6",
         );
     }
+
+    #[test]
+    fn multicast_groups_ensure_diffing() {
+        let logctx = test_setup_log("multicast_groups_ensure_diffing");
+        let manager =
+            PortManager::new(logctx.log.clone(), Ipv6Addr::LOCALHOST, &[]);
+
+        let handle = Handle::new().unwrap();
+        handle.set_xde_underlay("underlay0", "underlay1").unwrap();
+
+        let nic_id = Uuid::new_v4();
+        let nic_kind = NetworkInterfaceKind::Service { id: Uuid::new_v4() };
+
+        let private_subnet =
+            Ipv4Net::new(Ipv4Addr::new(172, 20, 0, 0), 24).unwrap();
+        let private_ip = Ipv4Addr::new(172, 20, 0, 4);
+        let ip_config =
+            PrivateIpConfig::new_ipv4(private_ip, private_subnet).unwrap();
+        let public_ip = Ipv4Addr::new(10, 0, 0, 4);
+
+        let external_ips = ExternalIpConfig {
+            v4: Some(ExternalIpv4Config {
+                source_nat: Some(
+                    SourceNatConfigV4::new(public_ip, 0, MAX_PORT).unwrap(),
+                ),
+                ..Default::default()
+            }),
+            v6: None,
+        };
+
+        // Bindings keep the port registered in the manager for this scope.
+        let (_port, _ticket) = manager
+            .create_port(PortCreateParams {
+                nic: &NetworkInterface {
+                    id: nic_id,
+                    kind: nic_kind,
+                    name: "opte0".parse().unwrap(),
+                    ip_config,
+                    mac: MacAddr(MacAddr6::new(
+                        0xa8, 0x40, 0x25, 0x00, 0x00, 0x01,
+                    )),
+                    vni: Vni::SERVICES_VNI,
+                    primary: true,
+                    slot: 0,
+                },
+                external_ips: &external_ips,
+                firewall_rules: &[],
+                dhcp_config: DhcpCfg {
+                    hostname: None,
+                    host_domain: None,
+                    domain_search_list: Vec::new(),
+                    dns4_servers: Vec::new(),
+                    dns6_servers: Vec::new(),
+                },
+                attached_subnets: vec![],
+                multicast_groups: &[],
+            })
+            .unwrap();
+
+        let group1: IpAddr = "239.1.1.1".parse().unwrap();
+        let group2: IpAddr = "239.1.1.2".parse().unwrap();
+        let source_a: IpAddr = "10.0.0.1".parse().unwrap();
+
+        // Subscribe to two groups: one ASM, one SSM.
+        manager
+            .multicast_groups_ensure(
+                nic_id,
+                nic_kind,
+                &[
+                    MulticastGroupCfg { group_ip: group1, sources: vec![] },
+                    MulticastGroupCfg {
+                        group_ip: group2,
+                        sources: vec![source_a],
+                    },
+                ],
+            )
+            .unwrap();
+
+        // Verify port manager tracking.
+        {
+            let ports = manager.inner.ports.lock().unwrap();
+            let port_state = ports.get(&(nic_id, nic_kind)).unwrap();
+            assert_eq!(port_state.mcast_subscriptions.len(), 2);
+            assert_eq!(
+                *port_state.mcast_subscriptions.get(&group1).unwrap(),
+                SourceFilter::default(),
+            );
+            assert_eq!(
+                port_state.mcast_subscriptions.get(&group2).unwrap().mode(),
+                FilterMode::Include,
+            );
+        }
+
+        // Verify mock OPTE state matches.
+        {
+            let opte = handle.state().lock().unwrap();
+            let port = opte.ports.get("opte0").unwrap();
+            assert_eq!(port.mcast_subscriptions.len(), 2);
+            assert!(port.mcast_subscriptions.contains_key(&group1));
+            assert!(port.mcast_subscriptions.contains_key(&group2));
+        }
+
+        // Remove group2, keep group1.
+        manager
+            .multicast_groups_ensure(
+                nic_id,
+                nic_kind,
+                &[MulticastGroupCfg { group_ip: group1, sources: vec![] }],
+            )
+            .unwrap();
+
+        {
+            let ports = manager.inner.ports.lock().unwrap();
+            let port_state = ports.get(&(nic_id, nic_kind)).unwrap();
+            assert_eq!(port_state.mcast_subscriptions.len(), 1);
+            assert!(port_state.mcast_subscriptions.contains_key(&group1));
+            assert!(!port_state.mcast_subscriptions.contains_key(&group2));
+        }
+
+        {
+            let opte = handle.state().lock().unwrap();
+            let port = opte.ports.get("opte0").unwrap();
+            assert_eq!(port.mcast_subscriptions.len(), 1);
+            assert!(!port.mcast_subscriptions.contains_key(&group2));
+        }
+
+        // Remove all groups.
+        manager.multicast_groups_ensure(nic_id, nic_kind, &[]).unwrap();
+
+        {
+            let ports = manager.inner.ports.lock().unwrap();
+            let port_state = ports.get(&(nic_id, nic_kind)).unwrap();
+            assert!(port_state.mcast_subscriptions.is_empty());
+        }
+
+        {
+            let opte = handle.state().lock().unwrap();
+            let port = opte.ports.get("opte0").unwrap();
+            assert!(port.mcast_subscriptions.is_empty());
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn multicast_port_deletion_cleanup() {
+        let logctx = test_setup_log("multicast_port_deletion_cleanup");
+        let manager =
+            PortManager::new(logctx.log.clone(), Ipv6Addr::LOCALHOST, &[]);
+
+        let handle = Handle::new().unwrap();
+        handle.set_xde_underlay("underlay0", "underlay1").unwrap();
+
+        let nic_id = Uuid::new_v4();
+        let nic_kind = NetworkInterfaceKind::Service { id: Uuid::new_v4() };
+
+        let private_subnet =
+            Ipv4Net::new(Ipv4Addr::new(172, 20, 0, 0), 24).unwrap();
+        let private_ip = Ipv4Addr::new(172, 20, 0, 4);
+        let ip_config =
+            PrivateIpConfig::new_ipv4(private_ip, private_subnet).unwrap();
+        let public_ip = Ipv4Addr::new(10, 0, 0, 4);
+
+        let external_ips = ExternalIpConfig {
+            v4: Some(ExternalIpv4Config {
+                source_nat: Some(
+                    SourceNatConfigV4::new(public_ip, 0, MAX_PORT).unwrap(),
+                ),
+                ..Default::default()
+            }),
+            v6: None,
+        };
+
+        let (_port, ticket) = manager
+            .create_port(PortCreateParams {
+                nic: &NetworkInterface {
+                    id: nic_id,
+                    kind: nic_kind,
+                    name: "opte0".parse().unwrap(),
+                    ip_config,
+                    mac: MacAddr(MacAddr6::new(
+                        0xa8, 0x40, 0x25, 0x00, 0x00, 0x01,
+                    )),
+                    vni: Vni::SERVICES_VNI,
+                    primary: true,
+                    slot: 0,
+                },
+                external_ips: &external_ips,
+                firewall_rules: &[],
+                dhcp_config: DhcpCfg {
+                    hostname: None,
+                    host_domain: None,
+                    domain_search_list: Vec::new(),
+                    dns4_servers: Vec::new(),
+                    dns6_servers: Vec::new(),
+                },
+                attached_subnets: vec![],
+                multicast_groups: &[],
+            })
+            .unwrap();
+
+        let group1: IpAddr = "239.2.2.1".parse().unwrap();
+
+        // Subscribe to a multicast group.
+        manager
+            .multicast_groups_ensure(
+                nic_id,
+                nic_kind,
+                &[MulticastGroupCfg { group_ip: group1, sources: vec![] }],
+            )
+            .unwrap();
+
+        // Verify subscription tracking exists.
+        {
+            let ports = manager.inner.ports.lock().unwrap();
+            let port_state = ports.get(&(nic_id, nic_kind)).unwrap();
+            assert_eq!(
+                port_state.mcast_subscriptions.len(),
+                1,
+                "subscription tracking should exist before release"
+            );
+        }
+
+        // Release the port ticket, which should clean up the port
+        // and its subscription tracking.
+        ticket.release();
+
+        // Verify port is removed entirely.
+        {
+            let ports = manager.inner.ports.lock().unwrap();
+            assert!(
+                !ports.contains_key(&(nic_id, nic_kind)),
+                "port should be removed after release"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn multicast_ensure_missing_port_error() {
+        let logctx = test_setup_log("multicast_ensure_missing_port_error");
+        let manager =
+            PortManager::new(logctx.log.clone(), Ipv6Addr::LOCALHOST, &[]);
+
+        let nic_id = Uuid::new_v4();
+        let nic_kind = NetworkInterfaceKind::Instance { id: Uuid::new_v4() };
+        let group: IpAddr = "239.3.3.1".parse().unwrap();
+
+        let res = manager.multicast_groups_ensure(
+            nic_id,
+            nic_kind,
+            &[MulticastGroupCfg { group_ip: group, sources: vec![] }],
+        );
+
+        match res {
+            Err(Error::MulticastUpdateMissingPort(id, kind)) => {
+                assert_eq!(id, nic_id);
+                assert_eq!(kind, nic_kind);
+            }
+            other => {
+                panic!("expected MulticastUpdateMissingPort, got {other:?}")
+            }
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    /// Verify that `set_mcast_m2p` programs underlay NIC multicast MAC
+    /// filters via UDP socket join and that `clear_mcast_m2p` removes them.
+    ///
+    /// Asserts both the in-process `mcast_underlay_sockets` bookkeeping
+    /// and kernel-level IPv6 group membership on the underlay interface
+    /// (observable via `netstat -g -f inet6`). Kernel-level verification
+    /// is what ensures `join_multicast_v6` actually reached IP and, on
+    /// actual hardware, would drive `mac_multicast_add` to program the
+    /// NIC filter.
+    #[cfg(target_os = "illumos")]
+    #[test]
+    fn underlay_multicast_mac_filter_lifecycle() {
+        let logctx = test_setup_log("underlay_multicast_mac_filter_lifecycle");
+        let nics = vec![AddrObject::new_control(LOOPBACK_IF).unwrap()];
+        let manager =
+            PortManager::new(logctx.log.clone(), Ipv6Addr::LOCALHOST, &nics);
+
+        let handle = Handle::new().unwrap();
+        handle.set_xde_underlay("underlay0", "underlay1").unwrap();
+
+        // ff04::1 is within the underlay multicast subnet.
+        let underlay: Ipv6Addr = "ff04::1".parse().unwrap();
+        let group: IpAddr = "239.10.10.1".parse().unwrap();
+
+        let req =
+            sled_agent_types::multicast::Mcast2PhysMapping { group, underlay };
+
+        // Prefligt check: the group must not already be joined on the
+        // underlay interface.
+        assert!(
+            !netstat_v6_has_membership(LOOPBACK_IF, &underlay),
+            "unexpected pre-existing membership {underlay} on {LOOPBACK_IF}",
+        );
+
+        // Set M2P -> socket should be created and kernel should show join.
+        manager.set_mcast_m2p(&req).unwrap();
+        {
+            let sockets = manager.inner.mcast_underlay_sockets.lock().unwrap();
+            assert!(
+                sockets.contains_key(&underlay),
+                "Socket should exist after set_mcast_m2p"
+            );
+        }
+        poll_v6_membership(LOOPBACK_IF, &underlay, true);
+
+        // Setting the same M2P again should be idempotent.
+        manager.set_mcast_m2p(&req).unwrap();
+        {
+            let sockets = manager.inner.mcast_underlay_sockets.lock().unwrap();
+            assert_eq!(
+                sockets.len(),
+                1,
+                "Duplicate set_mcast_m2p should not create extra sockets"
+            );
+        }
+        assert!(
+            netstat_v6_has_membership(LOOPBACK_IF, &underlay),
+            "membership should still be present after idempotent re-set"
+        );
+
+        // Clear M2P -> socket should be removed and kernel membership gone.
+        let clear_req =
+            sled_agent_types::multicast::ClearMcast2Phys { group, underlay };
+        manager.clear_mcast_m2p(&clear_req).unwrap();
+        {
+            let sockets = manager.inner.mcast_underlay_sockets.lock().unwrap();
+            assert!(
+                !sockets.contains_key(&underlay),
+                "Socket should be removed after clear_mcast_m2p"
+            );
+        }
+        poll_v6_membership(LOOPBACK_IF, &underlay, false);
+
+        logctx.cleanup_successful();
+    }
+
+    /// Verify that rehydration at startup reopens filter sockets for
+    /// M2P mappings that survived in mock xde state across a
+    /// PortManager drop (simulating sled-agent restart).
+    #[cfg(target_os = "illumos")]
+    #[test]
+    fn underlay_multicast_mac_filter_rehydration() {
+        let logctx =
+            test_setup_log("underlay_multicast_mac_filter_rehydration");
+        let nics = vec![AddrObject::new_control(LOOPBACK_IF).unwrap()];
+
+        let handle = Handle::new().unwrap();
+        handle.set_xde_underlay("underlay0", "underlay1").unwrap();
+
+        // Use a distinct underlay address to avoid collisions with
+        // other tests sharing the static OPTE_STATE.
+        let underlay: Ipv6Addr = "ff04::99".parse().unwrap();
+        let group: IpAddr = "239.10.10.99".parse().unwrap();
+
+        let req =
+            sled_agent_types::multicast::Mcast2PhysMapping { group, underlay };
+
+        // Phase 1: first PortManager sets M2P (populates mock xde state).
+        {
+            let mgr1 = PortManager::new(
+                logctx.log.clone(),
+                Ipv6Addr::LOCALHOST,
+                &nics,
+            );
+            mgr1.set_mcast_m2p(&req).unwrap();
+            {
+                let sockets = mgr1.inner.mcast_underlay_sockets.lock().unwrap();
+                assert!(sockets.contains_key(&underlay));
+            }
+            poll_v6_membership(LOOPBACK_IF, &underlay, true);
+        }
+
+        // mgr1 dropped: socket closed, kernel membership removed.
+        poll_v6_membership(LOOPBACK_IF, &underlay, false);
+
+        // Mock xde state (static) still has the M2P entry, simulating
+        // xde kernel state surviving a sled-agent restart.
+        {
+            let hdl = Handle::new().unwrap();
+            let dump = hdl.dump_m2p().unwrap();
+            assert!(
+                !dump.ip4.is_empty() || !dump.ip6.is_empty(),
+                "Mock xde should still hold the M2P mapping after drop"
+            );
+        }
+
+        // Phase 2: new PortManager rehydrates from surviving xde state.
+        let mgr2 =
+            PortManager::new(logctx.log.clone(), Ipv6Addr::LOCALHOST, &nics);
+        {
+            let sockets = mgr2.inner.mcast_underlay_sockets.lock().unwrap();
+            assert!(
+                sockets.contains_key(&underlay),
+                "Rehydration should reopen socket for surviving M2P"
+            );
+        }
+        poll_v6_membership(LOOPBACK_IF, &underlay, true);
+
+        // Cleanup and clear the M2P.
+        let clear_req =
+            sled_agent_types::multicast::ClearMcast2Phys { group, underlay };
+        mgr2.clear_mcast_m2p(&clear_req).unwrap();
+        poll_v6_membership(LOOPBACK_IF, &underlay, false);
+
+        logctx.cleanup_successful();
+    }
+
+    /// Verify that no sockets are created when no underlay NICs are
+    /// configured (test/sim mode).
+    #[test]
+    fn underlay_multicast_mac_filter_no_nics() {
+        let logctx = test_setup_log("underlay_multicast_mac_filter_no_nics");
+        let manager =
+            PortManager::new(logctx.log.clone(), Ipv6Addr::LOCALHOST, &[]);
+
+        let handle = Handle::new().unwrap();
+        handle.set_xde_underlay("underlay0", "underlay1").unwrap();
+
+        let underlay: Ipv6Addr = "ff04::2".parse().unwrap();
+        let group: IpAddr = "239.10.10.2".parse().unwrap();
+
+        let req =
+            sled_agent_types::multicast::Mcast2PhysMapping { group, underlay };
+
+        manager.set_mcast_m2p(&req).unwrap();
+        {
+            let sockets = manager.inner.mcast_underlay_sockets.lock().unwrap();
+            assert!(
+                sockets.is_empty(),
+                "No sockets should be created without underlay NICs"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
 }
diff --git a/internal-dns/resolver/src/resolver.rs b/internal-dns/resolver/src/resolver.rs
index 9ce3d6aa48d..a69e48b1cb3 100644
--- a/internal-dns/resolver/src/resolver.rs
+++ b/internal-dns/resolver/src/resolver.rs
@@ -345,6 +345,75 @@ impl Resolver {
         }
     }
 
+    /// Returns the SRV targets paired with their resolved IPv6 sockets.
+    ///
+    /// Like [`Resolver::lookup_all_socket_v6`], but preserves the SRV
+    /// target name so callers can correlate sockets back to their
+    /// source. Per-target IPv6 lookups are best-effort: failures for a
+    /// given target are logged and the entry is dropped from the result.
+    pub async fn lookup_all_socket_v6_by_target(
+        &self,
+        service: ServiceName,
+    ) -> Result<Vec<(String, SocketAddrV6)>, ResolveError> {
+        let name = service.srv_name();
+        trace!(self.log, "lookup_all_socket_v6_by_target srv"; "dns_name" => &name);
+        let response = self.resolver.srv_lookup(&name).await?;
+        debug!(
+            self.log,
+            "lookup_all_socket_v6_by_target srv";
+            "dns_name" => &name,
+            "response" => ?response
+        );
+
+        let futs = std::iter::repeat((self.log.clone(), self.resolver.clone()))
+            .zip(response.into_iter())
+            .map(|((log, resolver), srv)| async move {
+                let target = srv.target().to_string();
+                let port = srv.port();
+                trace!(
+                    log,
+                    "lookup_all_socket_v6_by_target: looking up SRV target";
+                    "name" => &target,
+                );
+                resolver
+                    .ipv6_lookup(target.clone())
+                    .await
+                    .map(|ips| (target.clone(), ips, port))
+                    .map_err(|err| (target, err))
+            });
+
+        let log = self.log.clone();
+        let pairs: Vec<(String, SocketAddrV6)> = futures::future::join_all(
+            futs,
+        )
+        .await
+        .into_iter()
+        .flat_map(move |res| match res {
+            Ok((target, ips, port)) => ips
+                .into_iter()
+                .map(|ip| {
+                    (target.clone(), SocketAddrV6::new(ip.into(), port, 0, 0))
+                })
+                .collect::<Vec<_>>(),
+            Err((target, err)) => {
+                error!(
+                    log,
+                    "lookup_all_socket_v6_by_target: failed looking up target";
+                    "name" => %target,
+                    "error" => ?err,
+                );
+                Vec::new()
+            }
+        })
+        .collect();
+
+        if pairs.is_empty() {
+            Err(ResolveError::NotFound(service))
+        } else {
+            Ok(pairs)
+        }
+    }
+
     // Returns an iterator of SocketAddrs for the specified SRV name.
     //
     // Acts on a raw string for compatibility with the reqwest::dns::Resolve
diff --git a/internal-dns/types/src/config.rs b/internal-dns/types/src/config.rs
index d5bef144343..f9d51051366 100644
--- a/internal-dns/types/src/config.rs
+++ b/internal-dns/types/src/config.rs
@@ -399,6 +399,7 @@ impl DnsConfigBuilder {
         dendrite_port: u16,
         mgs_port: u16,
         mgd_port: u16,
+        ddm_port: u16,
     ) -> anyhow::Result<()> {
         let zone = self.host_dendrite(sled_id, switch_zone_ip)?;
         self.service_backend_zone(ServiceName::Dendrite, &zone, dendrite_port)?;
@@ -407,7 +408,8 @@ impl DnsConfigBuilder {
             &zone,
             mgs_port,
         )?;
-        self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port)
+        self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port)?;
+        self.service_backend_zone(ServiceName::Ddm, &zone, ddm_port)
     }
 
     /// Higher-level shorthand for adding a Nexus zone with both its internal
@@ -779,6 +781,8 @@ mod test {
             "_oximeter-reader._tcp",
         );
         assert_eq!(ServiceName::Dendrite.dns_name(), "_dendrite._tcp",);
+        assert_eq!(ServiceName::Mgd.dns_name(), "_mgd._tcp",);
+        assert_eq!(ServiceName::Ddm.dns_name(), "_ddm._tcp",);
         assert_eq!(
             ServiceName::CruciblePantry.dns_name(),
             "_crucible-pantry._tcp",
@@ -796,6 +800,33 @@ mod test {
         );
     }
 
+    #[test]
+    fn host_zone_switch_publishes_all_services() {
+        let sled_uuid: SledUuid =
+            "001de000-51ed-4000-8000-000000000001".parse().unwrap();
+        let switch_zone_ip = Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1);
+
+        let mut b = DnsConfigBuilder::new();
+        b.host_zone_switch(sled_uuid, switch_zone_ip, 1, 2, 3, 4).unwrap();
+        let config = b.build_full_config_for_initial_generation();
+
+        let services: std::collections::BTreeSet<_> = config
+            .zones
+            .iter()
+            .flat_map(|z| z.records.iter())
+            .map(|(name, _)| name.as_str())
+            .collect();
+        for expected in
+            ["_dendrite._tcp", "_mgs._tcp", "_mgd._tcp", "_ddm._tcp"]
+        {
+            assert!(
+                services.contains(expected),
+                "expected {expected} in published switch-zone services; \
+                 got {services:?}"
+            );
+        }
+    }
+
     #[test]
     fn display_hosts() {
         let sled_uuid = SledUuid::nil();
diff --git a/internal-dns/types/src/names.rs b/internal-dns/types/src/names.rs
index 73b2439e48e..105d0222f3c 100644
--- a/internal-dns/types/src/names.rs
+++ b/internal-dns/types/src/names.rs
@@ -75,6 +75,7 @@ pub enum ServiceName {
     BoundaryNtp,
     InternalNtp,
     Mgd,
+    Ddm,
 }
 
 impl ServiceName {
@@ -116,6 +117,7 @@ impl ServiceName {
             ServiceName::BoundaryNtp => "boundary-ntp",
             ServiceName::InternalNtp => "internal-ntp",
             ServiceName::Mgd => "mgd",
+            ServiceName::Ddm => "ddm",
         }
     }
 
@@ -144,7 +146,8 @@ impl ServiceName {
             | ServiceName::CruciblePantry
             | ServiceName::BoundaryNtp
             | ServiceName::InternalNtp
-            | ServiceName::Mgd => {
+            | ServiceName::Mgd
+            | ServiceName::Ddm => {
                 format!("_{}._tcp", self.service_kind())
             }
             ServiceName::SledAgent(id) => {
diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs
index c877645a239..e3fb881f972 100644
--- a/nexus-config/src/nexus_config.rs
+++ b/nexus-config/src/nexus_config.rs
@@ -923,18 +923,6 @@ pub struct MulticastGroupReconcilerConfig {
     #[serde_as(as = "DurationSeconds<u64>")]
     pub period_secs: Duration,
 
-    /// TTL (in seconds) for the sled-to-switch-port mapping cache.
-    ///
-    /// This cache maps sled IDs to their physical switch ports. It changes when
-    /// sleds are added/removed or inventory is updated.
-    ///
-    /// Default: 3600 seconds (1 hour)
-    #[serde(
-        default = "MulticastGroupReconcilerConfig::default_sled_cache_ttl_secs"
-    )]
-    #[serde_as(as = "DurationSeconds<u64>")]
-    pub sled_cache_ttl_secs: Duration,
-
     /// TTL (in seconds) for the backplane hardware topology cache.
     ///
     /// This cache stores the hardware platform's port mapping. It effectively
@@ -949,10 +937,6 @@ pub struct MulticastGroupReconcilerConfig {
 }
 
 impl MulticastGroupReconcilerConfig {
-    const fn default_sled_cache_ttl_secs() -> Duration {
-        Duration::from_secs(3600) // 1 hour
-    }
-
     const fn default_backplane_cache_ttl_secs() -> Duration {
         Duration::from_secs(86400) // 24 hours
     }
@@ -962,7 +946,6 @@ impl Default for MulticastGroupReconcilerConfig {
     fn default() -> Self {
         Self {
             period_secs: Duration::from_secs(60),
-            sled_cache_ttl_secs: Self::default_sled_cache_ttl_secs(),
             backplane_cache_ttl_secs: Self::default_backplane_cache_ttl_secs(),
         }
     }
@@ -1585,7 +1568,6 @@ mod test {
                         },
                         multicast_reconciler: MulticastGroupReconcilerConfig {
                             period_secs: Duration::from_secs(60),
-                            sled_cache_ttl_secs: MulticastGroupReconcilerConfig::default_sled_cache_ttl_secs(),
                             backplane_cache_ttl_secs: MulticastGroupReconcilerConfig::default_backplane_cache_ttl_secs(),
                         },
                         trust_quorum: TrustQuorumConfig {
diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml
index ba6a93b22bf..688dded79c1 100644
--- a/nexus/Cargo.toml
+++ b/nexus/Cargo.toml
@@ -150,6 +150,7 @@ nexus-reconfigurator-rendezvous.workspace = true
 nexus-types.workspace = true
 nexus-types-versions.workspace = true
 omicron-common.workspace = true
+omicron-ddm-admin-client.workspace = true
 omicron-passwords.workspace = true
 oxide-tokio-rt.workspace = true
 oximeter.workspace = true
diff --git a/nexus/db-queries/src/db/datastore/multicast/groups.rs b/nexus/db-queries/src/db/datastore/multicast/groups.rs
index 0fb1b6e1e2b..4f78463b849 100644
--- a/nexus/db-queries/src/db/datastore/multicast/groups.rs
+++ b/nexus/db-queries/src/db/datastore/multicast/groups.rs
@@ -408,7 +408,7 @@ impl DataStore {
         use nexus_db_schema::schema::multicast_group_member;
         let now = Utc::now();
 
-        // Atomic: only mark `Deleting` if no active members exist.
+        // Atomically mark "Deleting" only if no active members exist.
         let rows = diesel::update(multicast_group::table)
             .filter(multicast_group::id.eq(group_id.into_untyped_uuid()))
             .filter(
diff --git a/nexus/db-queries/src/db/datastore/multicast/members.rs b/nexus/db-queries/src/db/datastore/multicast/members.rs
index 1c2d25a703b..e95e924a6cf 100644
--- a/nexus/db-queries/src/db/datastore/multicast/members.rs
+++ b/nexus/db-queries/src/db/datastore/multicast/members.rs
@@ -2285,7 +2285,7 @@ mod tests {
         assert_eq!(unchanged_member.state, MulticastGroupMemberState::Joined);
         assert_eq!(unchanged_member.time_modified, before_modification);
 
-        // Test starting instance that has no multicast memberships (should be no-op)
+        // Test starting instance that has no multicast memberships (should be noop)
         let non_member_instance = InstanceUuid::new_v4();
         datastore
             .multicast_group_member_set_instance_sled(
@@ -2450,7 +2450,7 @@ mod tests {
             .await
             .expect("Should handle duplicate mark for removal");
 
-        // Test marking instance with no memberships (should be no-op)
+        // Test marking instance with no memberships (should be noop)
         let non_member_instance = InstanceUuid::new_v4();
         datastore
             .multicast_group_members_mark_for_removal(
@@ -2668,7 +2668,7 @@ mod tests {
             .expect("Should list group2 members");
         assert_eq!(group2_members.len(), 2);
 
-        // Test deleting from group with no members (should be no-op)
+        // Test deleting from group with no members (should be noop)
         datastore
             .multicast_group_members_delete_by_group(
                 &opctx,
@@ -2677,7 +2677,7 @@ mod tests {
             .await
             .expect("Should handle deleting from empty group");
 
-        // Test deleting from nonexistent group (should be no-op)
+        // Test deleting from nonexistent group (should be noop)
         let fake_group_id = Uuid::new_v4();
         datastore
             .multicast_group_members_delete_by_group(
diff --git a/nexus/db-queries/src/db/datastore/multicast/ops/member_attach.rs b/nexus/db-queries/src/db/datastore/multicast/ops/member_attach.rs
index 254a2485bd7..69474ac8055 100644
--- a/nexus/db-queries/src/db/datastore/multicast/ops/member_attach.rs
+++ b/nexus/db-queries/src/db/datastore/multicast/ops/member_attach.rs
@@ -137,7 +137,7 @@ impl From<AttachMemberError> for external::Error {
 /// - **Reactivate**: Member in "Left" (time_deleted=NULL) → transition to
 ///   "Joining", update `sled_id`
 /// - **Insert new**: Member in "Left" (time_deleted set) → create new row
-/// - **Idempotent**: Member already "Joining" or "Joined" → no-op
+/// - **Idempotent**: Member already "Joining" or "Joined" → noop
 ///
 /// Atomically validates group and instance exist, retrieves instance's current
 /// sled_id, and performs member upsert. Returns member ID.
diff --git a/nexus/examples/config-second.toml b/nexus/examples/config-second.toml
index 3c1a1a3700a..11c01c98f1f 100644
--- a/nexus/examples/config-second.toml
+++ b/nexus/examples/config-second.toml
@@ -187,9 +187,6 @@ fm.sitrep_gc_period_secs = 600
 fm.rendezvous_period_secs = 300
 probe_distributor.period_secs = 60
 multicast_reconciler.period_secs = 60
-# TTL for sled-to-backplane-port mapping cache
-# Default: 3600 seconds (1 hour) - detects new sleds and inventory changes
-# multicast_reconciler.sled_cache_ttl_secs = 3600
 # TTL for backplane topology cache (static platform configuration)
 # Default: 86400 seconds (24 hours) - refreshed on-demand when validation fails
 # multicast_reconciler.backplane_cache_ttl_secs = 86400
diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml
index b4026bfb1de..530b4c67f59 100644
--- a/nexus/examples/config.toml
+++ b/nexus/examples/config.toml
@@ -171,9 +171,6 @@ fm.sitrep_gc_period_secs = 600
 fm.rendezvous_period_secs = 300
 probe_distributor.period_secs = 60
 multicast_reconciler.period_secs = 60
-# TTL for sled-to-backplane-port mapping cache
-# Default: 3600 seconds (1 hour) - detects new sleds and inventory changes
-# multicast_reconciler.sled_cache_ttl_secs = 3600
 # TTL for backplane topology cache (static platform configuration)
 # Default: 86400 seconds (24 hours) - refreshed on-demand when validation fails
 # multicast_reconciler.backplane_cache_ttl_secs = 86400
diff --git a/nexus/mgs-updates/src/test_util/host_phase_2_test_state.rs b/nexus/mgs-updates/src/test_util/host_phase_2_test_state.rs
index f228f68d961..bce88c3eb32 100644
--- a/nexus/mgs-updates/src/test_util/host_phase_2_test_state.rs
+++ b/nexus/mgs-updates/src/test_util/host_phase_2_test_state.rs
@@ -224,7 +224,8 @@ mod api_impl {
     use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody;
     use sled_agent_types::instance::InstanceEnsureBody;
     use sled_agent_types::instance::InstanceExternalIpBody;
-    use sled_agent_types::instance::InstanceMulticastBody;
+    use sled_agent_types::instance::InstanceMulticastMembership;
+    use sled_agent_types::instance::InstancePathParam;
     use sled_agent_types::instance::SledVmmState;
     use sled_agent_types::instance::VmmIssueDiskSnapshotRequestBody;
     use sled_agent_types::instance::VmmIssueDiskSnapshotRequestPathParam;
@@ -249,6 +250,10 @@ mod api_impl {
     use sled_agent_types::inventory::SledCpuFamily;
     use sled_agent_types::inventory::SledRole;
     use sled_agent_types::inventory::SvcsEnabledNotOnlineResult;
+    use sled_agent_types::multicast::{
+        ClearMcast2Phys, ClearMcastForwarding, Mcast2PhysMapping,
+        McastForwardingEntry,
+    };
     use sled_agent_types::probes::ProbeSet;
     use sled_agent_types::sled::AddSledRequest;
     use sled_agent_types::support_bundle::RangeRequestHeaders;
@@ -268,6 +273,7 @@ mod api_impl {
     use sled_agent_types::zone_bundle::ZoneBundleMetadata;
     use sled_agent_types::zone_bundle::ZonePathParam;
     use sled_agent_types_versions::v1;
+    use sled_agent_types_versions::v7;
     use sled_agent_types_versions::v20;
     use sled_agent_types_versions::v25;
     use sled_agent_types_versions::v26;
@@ -625,48 +631,36 @@ mod api_impl {
             unimplemented!()
         }
 
-        async fn vmm_join_multicast_group(
+        async fn instance_join_multicast_group(
+            _rqctx: RequestContext<Self::Context>,
+            _path_params: Path<InstancePathParam>,
+            _body: TypedBody<InstanceMulticastMembership>,
+        ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+            unimplemented!()
+        }
+
+        async fn instance_leave_multicast_group(
+            _rqctx: RequestContext<Self::Context>,
+            _path_params: Path<InstancePathParam>,
+            _body: TypedBody<InstanceMulticastMembership>,
+        ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+            unimplemented!()
+        }
+
+        async fn vmm_join_multicast_group_v7(
             _rqctx: RequestContext<Self::Context>,
             _path_params: Path<VmmPathParam>,
-            body: TypedBody<InstanceMulticastBody>,
+            _body: TypedBody<v7::instance::InstanceMulticastBody>,
         ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
-            let body_args = body.into_inner();
-            match body_args {
-                InstanceMulticastBody::Join(_) => {
-                    // MGS test utility - just return success for test compatibility
-                    Ok(HttpResponseUpdatedNoContent())
-                }
-                InstanceMulticastBody::Leave(_) => {
-                    // This endpoint is for joining - reject leave operations
-                    Err(HttpError::for_bad_request(
-                        None,
-                        "Join endpoint cannot process Leave operations"
-                            .to_string(),
-                    ))
-                }
-            }
-        }
-
-        async fn vmm_leave_multicast_group(
+            unimplemented!()
+        }
+
+        async fn vmm_leave_multicast_group_v7(
             _rqctx: RequestContext<Self::Context>,
             _path_params: Path<VmmPathParam>,
-            body: TypedBody<InstanceMulticastBody>,
+            _body: TypedBody<v7::instance::InstanceMulticastBody>,
         ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
-            let body_args = body.into_inner();
-            match body_args {
-                InstanceMulticastBody::Leave(_) => {
-                    // MGS test utility - just return success for test compatibility
-                    Ok(HttpResponseUpdatedNoContent())
-                }
-                InstanceMulticastBody::Join(_) => {
-                    // This endpoint is for leaving - reject join operations
-                    Err(HttpError::for_bad_request(
-                        None,
-                        "Leave endpoint cannot process Join operations"
-                            .to_string(),
-                    ))
-                }
-            }
+            unimplemented!()
         }
 
         async fn disk_put(
@@ -757,6 +751,47 @@ mod api_impl {
             unimplemented!()
         }
 
+        async fn set_mcast_m2p(
+            _rqctx: RequestContext<Self::Context>,
+            _body: TypedBody<Mcast2PhysMapping>,
+        ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+            unimplemented!()
+        }
+
+        async fn clear_mcast_m2p(
+            _rqctx: RequestContext<Self::Context>,
+            _body: TypedBody<ClearMcast2Phys>,
+        ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+            unimplemented!()
+        }
+
+        async fn set_mcast_fwd(
+            _rqctx: RequestContext<Self::Context>,
+            _body: TypedBody<McastForwardingEntry>,
+        ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+            unimplemented!()
+        }
+
+        async fn clear_mcast_fwd(
+            _rqctx: RequestContext<Self::Context>,
+            _body: TypedBody<ClearMcastForwarding>,
+        ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+            unimplemented!()
+        }
+
+        async fn list_mcast_m2p(
+            _rqctx: RequestContext<Self::Context>,
+        ) -> Result<HttpResponseOk<Vec<Mcast2PhysMapping>>, HttpError> {
+            unimplemented!()
+        }
+
+        async fn list_mcast_fwd(
+            _rqctx: RequestContext<Self::Context>,
+        ) -> Result<HttpResponseOk<Vec<McastForwardingEntry>>, HttpError>
+        {
+            unimplemented!()
+        }
+
         async fn uplink_ensure(
             _rqctx: RequestContext<Self::Context>,
             _body: TypedBody<SwitchPorts>,
diff --git a/nexus/reconfigurator/execution/src/test_utils.rs b/nexus/reconfigurator/execution/src/test_utils.rs
index cd46adacd0b..fdb17289225 100644
--- a/nexus/reconfigurator/execution/src/test_utils.rs
+++ b/nexus/reconfigurator/execution/src/test_utils.rs
@@ -113,10 +113,12 @@ pub fn overridables_for_test(
         let dendrite_port =
             cptestctx.dendrite.read().unwrap().get(&switch_slot).unwrap().port;
         let mgd_port = cptestctx.mgd.get(&switch_slot).unwrap().port;
+        let ddm_port = cptestctx.ddm.get(&switch_slot).unwrap().port;
         overrides.override_switch_zone_ip(sled_id, ip);
         overrides.override_dendrite_port(sled_id, dendrite_port);
         overrides.override_mgs_port(sled_id, mgs_port);
         overrides.override_mgd_port(sled_id, mgd_port);
+        overrides.override_ddm_port(sled_id, ddm_port);
     }
     overrides
 }
diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs
index d2f5b129258..3f0a3c1e8fa 100644
--- a/nexus/reconfigurator/planning/src/example.rs
+++ b/nexus/reconfigurator/planning/src/example.rs
@@ -1844,7 +1844,8 @@ mod tests {
                 | ServiceName::RepoDepot
                 | ServiceName::ManagementGatewayService
                 | ServiceName::Dendrite
-                | ServiceName::Mgd => {
+                | ServiceName::Mgd
+                | ServiceName::Ddm => {
                     out.insert(service, Ok(()));
                 }
                 // InternalNtp is too large to fit in a single DNS packet and
diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs
index 5f7e9a07da8..58228b81123 100644
--- a/nexus/src/app/background/init.rs
+++ b/nexus/src/app/background/init.rs
@@ -1095,12 +1095,14 @@ impl BackgroundTasksInitializer {
                 datastore.clone(),
                 resolver.clone(),
                 sagas.clone(),
-                inventory_load_watcher.clone(),
                 args.multicast_enabled,
-                config.multicast_reconciler.sled_cache_ttl_secs,
                 config.multicast_reconciler.backplane_cache_ttl_secs,
             )),
             opctx: opctx.child(BTreeMap::new()),
+            // Wake the reconciler whenever the inventory loader publishes a
+            // fresh collection so newly-discovered sleds become resolvable
+            // (DDM-peer fallback / inventory mapping) within the same tick
+            // instead of waiting for the periodic timer.
             watchers: vec![Box::new(inventory_load_watcher.clone())],
             activator: task_multicast_reconciler,
         });
diff --git a/nexus/src/app/background/tasks/multicast/groups.rs b/nexus/src/app/background/tasks/multicast/groups.rs
index db2c51938a5..a8be531d92a 100644
--- a/nexus/src/app/background/tasks/multicast/groups.rs
+++ b/nexus/src/app/background/tasks/multicast/groups.rs
@@ -19,7 +19,13 @@
 //! ## Operations Handled
 //! - **"Creating" state**: Initiate DPD "ensure" to apply configuration
 //! - **"Active" state**: Detect DPD drift and sync directly
-//! - **"Deleting" state**: Switch cleanup and database removal
+//! - **MRIB programming**: For Active groups, reconcile switch MRIB
+//!   routes against a per-pass snapshot (see [`super::mrib`])
+//! - **"Deleting" state**: Switch cleanup, MRIB route withdrawal, and
+//!   database removal
+//! - **M2P/forwarding propagation**: Convergent per-sled propagation of
+//!   M2P mappings and forwarding entries via sled-agent after member
+//!   state changes
 //! - **Extensible processing**: Support for different group types
 //!
 //! # Group State Transition Matrix
@@ -75,9 +81,11 @@
 
 use anyhow::Context;
 use chrono::Utc;
+use futures::future::try_join_all;
 use futures::stream::{self, StreamExt};
 use slog::{debug, error, info, trace, warn};
 
+use dpd_client::types::IpSrc;
 use nexus_db_model::{MulticastGroup, MulticastGroupState, SqlU8};
 use nexus_db_queries::context::OpContext;
 use nexus_db_queries::db::datastore::multicast::EnsureUnderlayResult;
@@ -87,12 +95,15 @@ use omicron_common::address::is_ssm_address;
 use omicron_common::api::external::{self, DataPageParams};
 use omicron_uuid_kinds::{GenericUuid, MulticastGroupUuid};
 
-use super::{
-    MulticastGroupReconciler, StateTransition, map_external_to_underlay_ip,
-};
+use super::{MulticastGroupReconciler, StateTransition};
 use crate::app::multicast::dataplane::{
     GroupUpdateParams, MulticastDataplaneClient,
 };
+use crate::app::multicast::map_external_to_underlay_ip;
+use crate::app::multicast::sled::MulticastSledClient;
+use crate::app::multicast::switch_zone::{
+    MribRouteIndex, MulticastSwitchZoneClient,
+};
 use crate::app::saga::create_saga_dag;
 use crate::app::sagas;
 
@@ -100,7 +111,7 @@ use crate::app::sagas;
 ///
 /// This grace period avoids racing with in-progress member attachment operations
 /// that occur immediately after group creation.
-const ORPHAN_GROUP_MIN_AGE: chrono::Duration = chrono::Duration::seconds(10);
+const ORPHAN_GROUP_MIN_AGE: chrono::TimeDelta = chrono::TimeDelta::seconds(10);
 
 /// Check if DPD tag matches the database group's tag.
 ///
@@ -130,39 +141,59 @@ fn dpd_state_matches_sources(
     let dpd_sources = dpd_group.sources.clone();
     let group_ip = group.multicast_ip.ip();
 
-    // Expected DPD state based on source filter logic (RFC 4607)
-    let expected_sources = if is_ssm_address(group_ip) {
-        Some(&source_filter.specific_sources)
+    if is_ssm_address(group_ip) {
+        // SSM: always expect specific sources
+        match dpd_sources {
+            None => false,
+            Some(dpd_srcs) => {
+                let mut dpd_ips: Vec<_> = dpd_srcs
+                    .into_iter()
+                    .filter_map(|src| match src {
+                        IpSrc::Exact(ip) => Some(ip),
+                        _ => None,
+                    })
+                    .collect();
+                dpd_ips.sort();
+
+                let mut expected: Vec<_> =
+                    source_filter.specific_sources.iter().copied().collect();
+                expected.sort();
+
+                dpd_ips == expected
+            }
+        }
     } else if source_filter.has_any_source_member {
-        None
+        dpd_sources.is_none()
     } else {
-        Some(&source_filter.specific_sources)
-    };
-
-    match (dpd_sources, expected_sources) {
-        (None, None) => true,
-        (Some(_), None) => false, // DPD has sources but shouldn't
-        (None, Some(_)) => false, // DPD missing sources
-        (Some(dpd_srcs), Some(expected)) => {
-            // Extract exact IPs from DPD sources
-            let mut dpd_ips: Vec<_> = dpd_srcs
-                .into_iter()
-                .filter_map(|src| match src {
-                    dpd_client::types::IpSrc::Exact(ip) => Some(ip),
-                    _ => None,
-                })
-                .collect();
-            dpd_ips.sort();
-
-            let mut expected_sorted: Vec<_> =
-                expected.iter().copied().collect();
-            expected_sorted.sort();
-
-            dpd_ips == expected_sorted
+        match dpd_sources {
+            None => source_filter.specific_sources.is_empty(),
+            Some(dpd_srcs) => {
+                let mut dpd_ips: Vec<_> = dpd_srcs
+                    .into_iter()
+                    .filter_map(|src| match src {
+                        IpSrc::Exact(ip) => Some(ip),
+                        _ => None,
+                    })
+                    .collect();
+                dpd_ips.sort();
+
+                let mut expected: Vec<_> =
+                    source_filter.specific_sources.iter().copied().collect();
+                expected.sort();
+
+                dpd_ips == expected
+            }
         }
     }
 }
 
+/// Switch-side clients threaded through group state processors.
+struct GroupReconcileClients<'a> {
+    dataplane: &'a MulticastDataplaneClient,
+    sled: &'a MulticastSledClient,
+    switch_zone: &'a MulticastSwitchZoneClient,
+}
+
 /// Trait for processing different types of multicast groups
 trait GroupStateProcessor {
     /// Process a group in "Creating" state.
@@ -179,7 +210,7 @@ trait GroupStateProcessor {
         reconciler: &MulticastGroupReconciler,
         opctx: &OpContext,
         group: &MulticastGroup,
-        dataplane_client: &MulticastDataplaneClient,
+        clients: &GroupReconcileClients<'_>,
     ) -> Result<StateTransition, anyhow::Error>;
 
     /// Process a group in "Active" state (check DPD sync status).
@@ -188,7 +219,8 @@ trait GroupStateProcessor {
         reconciler: &MulticastGroupReconciler,
         opctx: &OpContext,
         group: &MulticastGroup,
-        dataplane_client: &MulticastDataplaneClient,
+        clients: &GroupReconcileClients<'_>,
+        mrib_route_index: Option<&MribRouteIndex>,
     ) -> Result<StateTransition, anyhow::Error>;
 }
 
@@ -212,23 +244,36 @@ impl GroupStateProcessor for ExternalGroupProcessor {
         reconciler: &MulticastGroupReconciler,
         opctx: &OpContext,
         group: &MulticastGroup,
-        dataplane_client: &MulticastDataplaneClient,
+        clients: &GroupReconcileClients<'_>,
     ) -> Result<StateTransition, anyhow::Error> {
         reconciler
-            .handle_deleting_external_group(opctx, group, dataplane_client)
+            .handle_deleting_external_group(
+                opctx,
+                group,
+                clients.dataplane,
+                clients.sled,
+                clients.switch_zone,
+            )
             .await
     }
 
-    /// Handle groups in "Active" state (check DPD sync status).
     async fn process_active(
         &self,
         reconciler: &MulticastGroupReconciler,
         opctx: &OpContext,
         group: &MulticastGroup,
-        dataplane_client: &MulticastDataplaneClient,
+        clients: &GroupReconcileClients<'_>,
+        mrib_route_index: Option<&MribRouteIndex>,
     ) -> Result<StateTransition, anyhow::Error> {
         reconciler
-            .handle_active_external_group(opctx, group, dataplane_client)
+            .handle_active_external_group(
+                opctx,
+                group,
+                clients.dataplane,
+                clients.sled,
+                clients.switch_zone,
+                mrib_route_index,
+            )
             .await
     }
 }
@@ -336,6 +381,8 @@ impl MulticastGroupReconciler {
         opctx: &OpContext,
         state: MulticastGroupState,
         dataplane_client: Option<&MulticastDataplaneClient>,
+        sled_client: Option<&MulticastSledClient>,
+        switch_zone_client: Option<&MulticastSwitchZoneClient>,
     ) -> Result<usize, String> {
         trace!(opctx.log, "searching for multicast groups"; "state" => %state);
 
@@ -359,11 +406,34 @@ impl MulticastGroupReconciler {
 
         trace!(opctx.log, "found multicast groups"; "count" => groups.len(), "state" => %state);
 
+        let mrib_route_index = match (state, switch_zone_client) {
+            (MulticastGroupState::Active, Some(client)) => client
+                .list_routes_indexed()
+                .await
+                .inspect_err(|e| {
+                    warn!(
+                        opctx.log,
+                        "failed to build per-pass MRIB route snapshot";
+                        "error" => %e,
+                    )
+                })
+                .ok(),
+            _ => None,
+        };
+        let mrib_route_index = mrib_route_index.as_ref();
+
         // Process groups concurrently with configurable parallelism
-        let results = stream::iter(groups)
+        let group_outcomes = stream::iter(groups)
             .map(|group| async move {
                 let result = self
-                    .process_group_state(opctx, &group, dataplane_client)
+                    .process_group_state(
+                        opctx,
+                        &group,
+                        dataplane_client,
+                        sled_client,
+                        switch_zone_client,
+                        mrib_route_index,
+                    )
                     .await;
                 (group, result)
             })
@@ -373,8 +443,8 @@ impl MulticastGroupReconciler {
 
         // Handle results with state-appropriate logging and counting
         let mut processed = 0;
-        let total_results = results.len();
-        for (group, result) in results {
+        let total = group_outcomes.len();
+        for (group, result) in group_outcomes {
             match result {
                 Ok(transition) => {
                     // Count successful transitions based on state expectations
@@ -404,7 +474,7 @@ impl MulticastGroupReconciler {
                         processed += 1;
                     }
 
-                    debug!(
+                    trace!(
                         opctx.log,
                         "processed multicast group";
                         "state" => %state,
@@ -424,13 +494,13 @@ impl MulticastGroupReconciler {
             }
         }
 
-        if total_results > 0 {
+        if total > 0 {
             debug!(
                 opctx.log,
                 "group reconciliation completed";
                 "state" => %state,
                 "processed" => processed,
-                "total" => total_results
+                "total" => total
             );
         }
 
@@ -446,6 +516,8 @@ impl MulticastGroupReconciler {
             opctx,
             MulticastGroupState::Creating,
             None,
+            None,
+            None,
         )
         .await
     }
@@ -455,11 +527,15 @@ impl MulticastGroupReconciler {
         &self,
         opctx: &OpContext,
         dataplane_client: &MulticastDataplaneClient,
+        sled_client: &MulticastSledClient,
+        switch_zone_client: &MulticastSwitchZoneClient,
     ) -> Result<usize, String> {
         self.reconcile_groups_by_state(
             opctx,
             MulticastGroupState::Deleting,
             Some(dataplane_client),
+            Some(sled_client),
+            Some(switch_zone_client),
         )
         .await
     }
@@ -469,11 +545,15 @@ impl MulticastGroupReconciler {
         &self,
         opctx: &OpContext,
         dataplane_client: &MulticastDataplaneClient,
+        sled_client: &MulticastSledClient,
+        switch_zone_client: &MulticastSwitchZoneClient,
     ) -> Result<usize, String> {
         self.reconcile_groups_by_state(
             opctx,
             MulticastGroupState::Active,
             Some(dataplane_client),
+            Some(sled_client),
+            Some(switch_zone_client),
         )
         .await
     }
@@ -485,6 +565,9 @@ impl MulticastGroupReconciler {
         opctx: &OpContext,
         group: &MulticastGroup,
         dataplane_client: Option<&MulticastDataplaneClient>,
+        sled_client: Option<&MulticastSledClient>,
+        switch_zone_client: Option<&MulticastSwitchZoneClient>,
+        mrib_route_index: Option<&MribRouteIndex>,
     ) -> Result<StateTransition, anyhow::Error> {
         // Future: Match on group type to select different processors if
         // we add more nuanced group types
@@ -495,17 +578,37 @@ impl MulticastGroupReconciler {
                 processor.process_creating(self, opctx, group).await
             }
             MulticastGroupState::Deleting => {
-                let dataplane_client = dataplane_client
-                    .context("dataplane client required for deleting state")?;
-                processor
-                    .process_deleting(self, opctx, group, dataplane_client)
-                    .await
+                let clients = GroupReconcileClients {
+                    dataplane: dataplane_client.context(
+                        "dataplane client required for deleting state",
+                    )?,
+                    sled: sled_client
+                        .context("sled client required for deleting state")?,
+                    switch_zone: switch_zone_client.context(
+                        "switch zone client required for deleting state",
+                    )?,
+                };
+                processor.process_deleting(self, opctx, group, &clients).await
             }
             MulticastGroupState::Active => {
-                let dataplane_client = dataplane_client
-                    .context("dataplane client required for active state")?;
+                let clients = GroupReconcileClients {
+                    dataplane: dataplane_client.context(
+                        "dataplane client required for active state",
+                    )?,
+                    sled: sled_client
+                        .context("sled client required for active state")?,
+                    switch_zone: switch_zone_client.context(
+                        "switch zone client required for active state",
+                    )?,
+                };
                 processor
-                    .process_active(self, opctx, group, dataplane_client)
+                    .process_active(
+                        self,
+                        opctx,
+                        group,
+                        &clients,
+                        mrib_route_index,
+                    )
                     .await
             }
             MulticastGroupState::Deleted => {
@@ -602,7 +705,7 @@ impl MulticastGroupReconciler {
         // `backplane_map` validation for rear ports). These uplink members use
         // `Direction::External` and follow a different lifecycle - added when
         // first instance joins, removed when last instance leaves.
-        // Should integrate with `switch_ports_with_uplinks()` or
+        // Should integrate with `switch_ports_with_uplinks` or
         // equivalent front port discovery mechanism, which would be
         // configurable, and later learned (i.e., via `mcastd`/IGMP).
 
@@ -623,6 +726,8 @@ impl MulticastGroupReconciler {
         opctx: &OpContext,
         group: &MulticastGroup,
         dataplane_client: &MulticastDataplaneClient,
+        sled_client: &MulticastSledClient,
+        switch_zone_client: &MulticastSwitchZoneClient,
     ) -> Result<StateTransition, anyhow::Error> {
         debug!(
             opctx.log,
@@ -635,8 +740,53 @@ impl MulticastGroupReconciler {
             "dpd_cleanup_required" => true
         );
 
-        self.process_deleting_group_inner(opctx, group, dataplane_client)
+        // Remove MRIB routes so `mg-lower` withdraws DDM advertisements
+        // before cleaning up DPD and DB state. Bail on failure so the
+        // next pass can retry. Proceeding would delete DB rows and
+        // leave stale DDM advertisements.
+        let group_ip = group.multicast_ip.ip();
+        let group_id = MulticastGroupUuid::from_untyped_uuid(group.id());
+
+        // Remove (*,G) route.
+        switch_zone_client
+            .remove_route(group_ip, None)
+            .await
+            .context("failed to remove MRIB (*,G) route for deleting group")?;
+
+        // Remove (S,G) routes for any sources. Bail on failure
+        // to preserve DB state for retry on the next pass.
+        let source_filter = self
+            .datastore
+            .multicast_groups_source_filter_state(opctx, &[group_id])
+            .await
+            .context(
+                "failed to load source filter for MRIB cleanup; \
+                 bailing to preserve DB state for retry",
+            )?;
+
+        if let Some(filter) = source_filter.get(&group.id()) {
+            // Per-source removals target distinct (S,G) keys. We fan out so
+            // a group with N sources doesn't pay N round-trips serially.
+            try_join_all(filter.specific_sources.iter().map(
+                |source| async move {
+                    switch_zone_client
+                        .remove_route(group_ip, Some(*source))
+                        .await
+                        .with_context(|| format!(
+                            "failed to remove MRIB (S,G) route for source {source}"
+                        ))
+                }),
+            )
             .await?;
+        }
+
+        self.process_deleting_group_inner(
+            opctx,
+            group,
+            dataplane_client,
+            sled_client,
+        )
+        .await?;
         Ok(StateTransition::StateChanged)
     }
 
@@ -649,6 +799,9 @@ impl MulticastGroupReconciler {
         opctx: &OpContext,
         group: &MulticastGroup,
         dataplane_client: &MulticastDataplaneClient,
+        sled_client: &MulticastSledClient,
+        switch_zone_client: &MulticastSwitchZoneClient,
+        mrib_route_index: Option<&MribRouteIndex>,
     ) -> Result<StateTransition, anyhow::Error> {
         let underlay_group_id = group
             .underlay_group_id
@@ -712,7 +865,7 @@ impl MulticastGroupReconciler {
             }
         };
 
-        if needs_update {
+        let res = if needs_update {
             debug!(
                 opctx.log,
                 "updating active multicast group in DPD";
@@ -747,6 +900,22 @@ impl MulticastGroupReconciler {
                         "group_id" => %group.id(),
                         "multicast_ip" => %group.multicast_ip
                     );
+
+                    // Propagate M2P/forwarding to member sleds after DPD
+                    // sync to ensure OPTE state is also consistent.
+                    if let Err(e) = sled_client
+                        .propagate_m2p_and_forwarding(opctx, group)
+                        .await
+                    {
+                        warn!(
+                            opctx.log,
+                            "failed to propagate M2P/forwarding after \
+                             drift correction (will retry)";
+                            "group_id" => %group.id(),
+                            "error" => %e
+                        );
+                    }
+
                     Ok(StateTransition::StateChanged)
                 }
                 Err(e) => {
@@ -761,8 +930,37 @@ impl MulticastGroupReconciler {
                 }
             }
         } else {
+            // Even when DPD is in sync, propagate M2P/forwarding to
+            // member sleds to correct any sled-level drift.
+            if let Err(e) =
+                sled_client.propagate_m2p_and_forwarding(opctx, group).await
+            {
+                warn!(
+                    opctx.log,
+                    "failed to propagate M2P/forwarding (will retry)";
+                    "group_id" => %group.id(),
+                    "error" => %e
+                );
+            }
+
             Ok(StateTransition::NoChange)
-        }
+        };
+
+        // Reconcile MRIB routes based on whether the group has active
+        // ("Joined") members. If all members are "Left", withdraw the DDM
+        // advertisement so peer sleds stop sending traffic.
+        super::mrib::reconcile_group(
+            opctx,
+            &self.datastore,
+            switch_zone_client,
+            mrib_route_index,
+            group,
+            &source_filter,
+            underlay_group_id,
+        )
+        .await;
+
+        res
     }
 
     /// Process a single multicast group in "Creating" state.
@@ -772,7 +970,7 @@ impl MulticastGroupReconciler {
         opctx: &OpContext,
         group: &MulticastGroup,
     ) -> Result<bool, anyhow::Error> {
-        debug!(
+        trace!(
             opctx.log,
             "processing creating multicast group";
             "group" => ?group
@@ -789,7 +987,7 @@ impl MulticastGroupReconciler {
                         format!("failed to fetch linked underlay group {underlay_id}")
                     })?;
 
-                debug!(
+                trace!(
                     opctx.log,
                     "found linked underlay group";
                     "group" => ?group,
@@ -798,12 +996,12 @@ impl MulticastGroupReconciler {
                 underlay
             }
             None => {
-                debug!(
+                trace!(
                     opctx.log,
                     "creating new underlay group";
                     "group" => ?group
                 );
-                match self.ensure_underlay_for_external(opctx, &group).await? {
+                match self.ensure_underlay_for_external(opctx, group).await? {
                     Some(underlay) => underlay,
                     None => return Ok(false), // Group deleted during processing
                 }
@@ -835,9 +1033,9 @@ impl MulticastGroupReconciler {
         >(saga_params)
         .context("failed to create multicast group transaction saga")?;
 
-        let saga_id = self
+        let (saga_id, completion) = self
             .sagas
-            .saga_start(dag)
+            .saga_run(dag)
             .await
             .context("failed to start multicast group transaction saga")?;
 
@@ -851,6 +1049,11 @@ impl MulticastGroupReconciler {
             "expected_outcome" => "Creating → Active"
         );
 
+        // Block this pass on saga completion so subsequent reconciler
+        // steps observe "Active" within the same pass. See module-level
+        // "RPW Saga Coordination" for rationale.
+        completion.await.context("multicast group transaction saga failed")?;
+
         Ok(true)
     }
 
@@ -860,6 +1063,7 @@ impl MulticastGroupReconciler {
         opctx: &OpContext,
         group: &MulticastGroup,
         dataplane_client: &MulticastDataplaneClient,
+        sled_client: &MulticastSledClient,
     ) -> Result<(), anyhow::Error> {
         let tag = Self::get_multicast_tag(group)
             .context("multicast group missing tag")?;
@@ -875,6 +1079,15 @@ impl MulticastGroupReconciler {
             "cleanup_includes" => "[external_group, underlay_group, forwarding_rules, member_ports]"
         );
 
+        // Clear M2P/forwarding from all sleds before DPD cleanup.
+        // This must succeed before deleting DB records, otherwise
+        // stale OPTE state would persist on sleds where the clear
+        // failed, with no DB record to drive a retry on a later pass.
+        sled_client
+            .clear_m2p_and_forwarding(opctx, group)
+            .await
+            .context("failed to clear M2P/forwarding from sleds")?;
+
         // Use dataplane client from reconciliation pass to cleanup switch(es)
         // state by tag
         dataplane_client
@@ -928,7 +1141,7 @@ mod tests {
     use omicron_common::api::external::IdentityMetadataCreateParams;
 
     fn create_dpd_group(
-        sources: Option<Vec<dpd_client::types::IpSrc>>,
+        sources: Option<Vec<IpSrc>>,
     ) -> dpd_client::types::MulticastGroupExternalResponse {
         dpd_client::types::MulticastGroupExternalResponse {
             group_ip: "232.1.1.1".parse().unwrap(),
@@ -981,15 +1194,15 @@ mod tests {
 
         // DPD has matching sources
         let dpd_group = create_dpd_group(Some(vec![
-            dpd_client::types::IpSrc::Exact("10.0.0.1".parse().unwrap()),
-            dpd_client::types::IpSrc::Exact("10.0.0.2".parse().unwrap()),
+            IpSrc::Exact("10.0.0.1".parse().unwrap()),
+            IpSrc::Exact("10.0.0.2".parse().unwrap()),
         ]));
         assert!(dpd_state_matches_sources(&dpd_group, &source_filter, &group));
 
         // DPD has sources in different order (should still match)
         let dpd_group = create_dpd_group(Some(vec![
-            dpd_client::types::IpSrc::Exact("10.0.0.2".parse().unwrap()),
-            dpd_client::types::IpSrc::Exact("10.0.0.1".parse().unwrap()),
+            IpSrc::Exact("10.0.0.2".parse().unwrap()),
+            IpSrc::Exact("10.0.0.1".parse().unwrap()),
         ]));
         assert!(dpd_state_matches_sources(&dpd_group, &source_filter, &group));
 
@@ -999,8 +1212,8 @@ mod tests {
 
         // DPD has wrong sources (mismatch)
         let dpd_group = create_dpd_group(Some(vec![
-            dpd_client::types::IpSrc::Exact("10.0.0.1".parse().unwrap()),
-            dpd_client::types::IpSrc::Exact("10.0.0.3".parse().unwrap()), // wrong
+            IpSrc::Exact("10.0.0.1".parse().unwrap()),
+            IpSrc::Exact("10.0.0.3".parse().unwrap()), // wrong
         ]));
         assert!(!dpd_state_matches_sources(&dpd_group, &source_filter, &group));
     }
@@ -1023,8 +1236,8 @@ mod tests {
 
         // DPD should have specific sources (RFC 4607 compliance)
         let dpd_group = create_dpd_group(Some(vec![
-            dpd_client::types::IpSrc::Exact("10.0.0.1".parse().unwrap()),
-            dpd_client::types::IpSrc::Exact("10.0.0.2".parse().unwrap()),
+            IpSrc::Exact("10.0.0.1".parse().unwrap()),
+            IpSrc::Exact("10.0.0.2".parse().unwrap()),
         ]));
         assert!(dpd_state_matches_sources(&dpd_group, &source_filter, &group));
 
@@ -1034,9 +1247,8 @@ mod tests {
     }
 
     #[test]
-    fn test_dpd_state_matches_sources_asm_address() {
-        // ASM address with all members specifying sources: expect those
-        // sources in DPD.
+    fn test_dpd_state_matches_sources_asm_with_specific_sources() {
+        // ASM address with specific sources only (no any-source members)
         let source_filter = SourceFilterState {
             specific_sources: BTreeSet::from(["10.0.0.1"
                 .parse::<IpAddr>()
@@ -1044,23 +1256,27 @@ mod tests {
             has_any_source_member: false,
         };
 
-        let group = create_group("224.1.1.1"); // ASM address (not 232.x.x.x)
+        let group = create_group("224.1.1.1"); // ASM address
 
-        // DPD has matching sources (correct)
-        let dpd_group =
-            create_dpd_group(Some(vec![dpd_client::types::IpSrc::Exact(
-                "10.0.0.1".parse().unwrap(),
-            )]));
+        // DPD has matching specific sources
+        let dpd_group = create_dpd_group(Some(vec![IpSrc::Exact(
+            "10.0.0.1".parse().unwrap(),
+        )]));
         assert!(dpd_state_matches_sources(&dpd_group, &source_filter, &group));
 
-        // DPD has None (mismatch: ASM with all-specific should have sources)
+        // DPD has None (mismatch: should have specific sources)
         let dpd_group = create_dpd_group(None);
         assert!(!dpd_state_matches_sources(&dpd_group, &source_filter, &group));
+
+        // DPD has IpSrc::Any (mismatch: should have specific sources)
+        let dpd_group = create_dpd_group(Some(vec![IpSrc::Any]));
+        assert!(!dpd_state_matches_sources(&dpd_group, &source_filter, &group));
     }
 
     #[test]
     fn test_dpd_state_matches_sources_asm_with_any_source_member() {
-        // ASM address with has_any_source_member=true - expects None from DPD
+        // ASM address with has_any_source_member=true: we send None to DPD,
+        // and DPD canonicalizes any-source representations to None.
         let source_filter = SourceFilterState {
             specific_sources: BTreeSet::new(),
             has_any_source_member: true,
@@ -1068,15 +1284,35 @@ mod tests {
 
         let group = create_group("224.1.1.1"); // ASM address
 
-        // DPD has None (correct for ASM with any-source members)
+        // DPD has None (correct: any-source canonicalizes to None)
+        let dpd_group = create_dpd_group(None);
+        assert!(dpd_state_matches_sources(&dpd_group, &source_filter, &group));
+
+        // DPD has specific sources (mismatch)
+        let dpd_group = create_dpd_group(Some(vec![IpSrc::Exact(
+            "10.0.0.1".parse().unwrap(),
+        )]));
+        assert!(!dpd_state_matches_sources(&dpd_group, &source_filter, &group));
+    }
+
+    #[test]
+    fn test_dpd_state_matches_sources_asm_no_sources() {
+        // ASM with no source filters at all expects None
+        let source_filter = SourceFilterState {
+            specific_sources: BTreeSet::new(),
+            has_any_source_member: false,
+        };
+
+        let group = create_group("224.1.1.1"); // ASM address
+
+        // DPD has None (correct: no sources configured)
         let dpd_group = create_dpd_group(None);
         assert!(dpd_state_matches_sources(&dpd_group, &source_filter, &group));
 
-        // DPD has sources (mismatch: should be none)
-        let dpd_group =
-            create_dpd_group(Some(vec![dpd_client::types::IpSrc::Exact(
-                "10.0.0.1".parse().unwrap(),
-            )]));
+        // DPD has sources (mismatch)
+        let dpd_group = create_dpd_group(Some(vec![IpSrc::Exact(
+            "10.0.0.1".parse().unwrap(),
+        )]));
         assert!(!dpd_state_matches_sources(&dpd_group, &source_filter, &group));
     }
 }
diff --git a/nexus/src/app/background/tasks/multicast/members.rs b/nexus/src/app/background/tasks/multicast/members.rs
index 1b7f81c6ab3..0ec903caea5 100644
--- a/nexus/src/app/background/tasks/multicast/members.rs
+++ b/nexus/src/app/background/tasks/multicast/members.rs
@@ -42,6 +42,12 @@
 //! - **State transitions**: "Joining" → "Joined" → "Left" with reactivation
 //! - **Dataplane updates**: Applying and removing configuration via DPD
 //!   client(s) on switches
+//! - **M2P/forwarding propagation**: After join, leave, or migration, M2P
+//!   mappings and forwarding entries are propagated to all sleds via
+//!   sled-agent inline (not deferred to the next reconciliation pass)
+//! - **OPTE subscriptions**: Per-instance multicast group filters managed
+//!   via sled-agent on the hosting sled (keyed by the active VMM's
+//!   propolis ID)
 //! - **Sled migration**: Detecting moves and updating dataplane configuration
 //!   (no transition to "Left")
 //! - **Cleanup**: Removing orphaned switch state for deleted members
@@ -97,7 +103,8 @@
 //! | 3 | None | Valid | "Creating" | Wait for activation | "Left" |
 //! | 4 | None | Valid | "Active" | Reactivate member | "Joining" |
 
-use std::collections::{BTreeMap, BTreeSet, HashMap};
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::net::Ipv6Addr;
 use std::sync::Arc;
 use std::time::Instant;
 
@@ -106,6 +113,7 @@ use futures::stream::{self, StreamExt};
 use slog::{debug, info, trace, warn};
 use uuid::Uuid;
 
+use dpd_client::types::{BackplaneLink, Direction, LinkId, PortId, Rear};
 use nexus_db_model::{
     DbTypedUuid, MulticastGroup, MulticastGroupMember,
     MulticastGroupMemberState, MulticastGroupState, Sled,
@@ -124,15 +132,70 @@ use omicron_uuid_kinds::{
 
 use super::{MulticastGroupReconciler, StateTransition, SwitchBackplanePort};
 use crate::app::multicast::dataplane::MulticastDataplaneClient;
+use crate::app::multicast::sled::MulticastSledClient;
+use crate::app::multicast::switch_zone::MulticastSwitchZoneClient;
+
+/// Pre-fetched instance state for multicast reconciliation.
+#[derive(Clone, Copy, Debug, Default)]
+struct InstanceMulticastState {
+    /// Whether the instance is in a state that can receive multicast traffic.
+    valid: bool,
+    /// Current sled hosting the VMM, if any.
+    sled_id: Option<SledUuid>,
+}
+
+/// Context shared across member reconciliation operations.
+struct MemberReconcileCtx<'a> {
+    opctx: &'a OpContext,
+    group: &'a MulticastGroup,
+    member: &'a MulticastGroupMember,
+    instance_states: &'a InstanceStateMap,
+    dataplane_client: &'a MulticastDataplaneClient,
+    sled_client: &'a MulticastSledClient,
+    /// Sled-to-port mapping built once per reconciliation pass and shared
+    /// across all members in that pass (sled lookups in this map are O(1)
+    /// and never trigger I/O).
+    sled_to_ports: &'a HashMap<SledUuid, Vec<SwitchBackplanePort>>,
+}
+
+/// Maps instance_id to pre-fetched multicast-relevant state.
+type InstanceStateMap = HashMap<Uuid, InstanceMulticastState>;
+type MemberPortKey = (PortId, LinkId);
+
+/// Sled-to-port mapping for a single reconciliation pass.
+///
+/// `sled_to_ports` is the functional data we need. `ddm_inventory_drift` counts
+/// sleds whose DDM port mapping diverged from inventory during a pass and is
+/// reported for observability and (maybe) future signaling.
+///
+/// TODO: A future change could use sustained drift to signal an inventory
+/// refresh.
+struct SledPortMap {
+    sled_to_ports: HashMap<SledUuid, Vec<SwitchBackplanePort>>,
+    ddm_inventory_drift: usize,
+}
 
-/// Pre-fetched instance state data for batch processing.
-/// Maps instance_id -> (is_valid_for_multicast, current_sled_id).
-type InstanceStateMap = HashMap<Uuid, (bool, Option<SledUuid>)>;
+impl SledPortMap {
+    fn empty() -> Self {
+        Self { sled_to_ports: HashMap::new(), ddm_inventory_drift: 0 }
+    }
+}
+
+/// Outcome of a single [`MulticastGroupReconciler::reconcile_member_states`]
+/// pass.
+#[derive(Clone, Copy, Debug, Default)]
+pub(super) struct MemberReconcileCounts {
+    /// Members whose state advanced this pass (e.g., "Joining" → "Joined",
+    /// "Joining" → "Left").
+    pub(super) processed: usize,
+    /// Number of sleds whose DDM port mapping diverged from inventory.
+    /// DDM wins (live state); a non-zero count surfaces inventory lag.
+    pub(super) ddm_inventory_drift: usize,
+}
 
 /// Backplane port mapping from DPD-client.
 /// Maps switch port ID to backplane link configuration.
-type BackplaneMap =
-    BTreeMap<dpd_client::types::PortId, dpd_client::types::BackplaneLink>;
+type BackplaneMap = BTreeMap<PortId, BackplaneLink>;
 
 /// Result of computing the union of member ports across a group.
 ///
@@ -141,18 +204,18 @@ type BackplaneMap =
 /// the union is `Complete` to avoid disrupting members that failed resolution.
 enum MemberPortUnion {
     /// Union is complete: all "Joined" members were successfully resolved.
-    Complete(BTreeSet<dpd_client::types::PortId>),
+    Complete(HashSet<MemberPortKey>),
     /// Union is partial: some "Joined" members failed to resolve.
     /// The port set may be incomplete.
-    Partial(BTreeSet<dpd_client::types::PortId>),
+    Partial(HashSet<MemberPortKey>),
 }
 
 /// Check if a DPD member is a rear/underlay port (instance member).
 fn is_rear_underlay_member(
     member: &dpd_client::types::MulticastGroupMember,
 ) -> bool {
-    matches!(member.port_id, dpd_client::types::PortId::Rear(_))
-        && member.direction == dpd_client::types::Direction::Underlay
+    matches!(member.port_id, PortId::Rear(_))
+        && member.direction == Direction::Underlay
 }
 
 /// Represents a sled_id update for a multicast group member.
@@ -168,33 +231,21 @@ trait MemberStateProcessor {
     async fn process_joining(
         &self,
         reconciler: &MulticastGroupReconciler,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_states: &InstanceStateMap,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error>;
 
     /// Process a member in "Joined" state.
     async fn process_joined(
         &self,
         reconciler: &MulticastGroupReconciler,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_states: &InstanceStateMap,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error>;
 
     /// Process a member in "Left" state.
     async fn process_left(
         &self,
         reconciler: &MulticastGroupReconciler,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_states: &InstanceStateMap,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error>;
 }
 
@@ -205,61 +256,25 @@ impl MemberStateProcessor for InstanceMemberProcessor {
     async fn process_joining(
         &self,
         reconciler: &MulticastGroupReconciler,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_states: &InstanceStateMap,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error> {
-        reconciler
-            .handle_instance_joining(
-                opctx,
-                group,
-                member,
-                instance_states,
-                dataplane_client,
-            )
-            .await
+        reconciler.handle_instance_joining(ctx).await
     }
 
     async fn process_joined(
         &self,
         reconciler: &MulticastGroupReconciler,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_states: &InstanceStateMap,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error> {
-        reconciler
-            .handle_instance_joined(
-                opctx,
-                group,
-                member,
-                instance_states,
-                dataplane_client,
-            )
-            .await
+        reconciler.handle_instance_joined(ctx).await
     }
 
     async fn process_left(
         &self,
         reconciler: &MulticastGroupReconciler,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_states: &InstanceStateMap,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error> {
-        reconciler
-            .handle_instance_left(
-                opctx,
-                group,
-                member,
-                instance_states,
-                dataplane_client,
-            )
-            .await
+        reconciler.handle_instance_left(ctx).await
     }
 }
 
@@ -272,11 +287,13 @@ impl MulticastGroupReconciler {
     ];
 
     /// Process member state changes ("Joining"→"Joined"→"Left").
-    pub async fn reconcile_member_states(
+    pub(super) async fn reconcile_member_states(
         &self,
         opctx: &OpContext,
         dataplane_client: &MulticastDataplaneClient,
-    ) -> Result<usize, anyhow::Error> {
+        sled_client: &MulticastSledClient,
+        switch_zone_client: Option<&MulticastSwitchZoneClient>,
+    ) -> Result<MemberReconcileCounts, anyhow::Error> {
         trace!(opctx.log, "reconciling member state changes");
 
         let mut processed = 0;
@@ -284,9 +301,46 @@ impl MulticastGroupReconciler {
         // Get all groups that need member state processing ("Creating" and "Active")
         let groups = self.get_reconcilable_groups(opctx).await?;
 
+        // Build the reconciliation pass sled-to-port mapping once and share
+        // it across all members in this pass. Avoids per-member DDM RPCs
+        // and per-member inventory queries.
+        //
+        // A build failure (no DDM peers and no inventory yet) downgrades
+        // to an empty map: "Joining" → "Left" for stopped instances is a
+        // DB-only CAS that doesn't need a port lookup, so it still
+        // converges. Members that do need a port lookup (e.g. "Joining"
+        // → "Joined") fail their own processing this pass and retry on
+        // the next.
+        let SledPortMap { sled_to_ports, ddm_inventory_drift: drift_count } =
+            match self
+                .build_sled_port_map(
+                    opctx,
+                    dataplane_client,
+                    switch_zone_client,
+                )
+                .await
+            {
+                Ok(map) => map,
+                Err(e) => {
+                    warn!(
+                        opctx.log,
+                        "failed to build reconciliation pass sled-to-port \
+                         mapping, continuing with empty map";
+                        "error" => %e,
+                    );
+                    SledPortMap::empty()
+                }
+            };
+
         for group in groups {
             match self
-                .process_group_member_states(opctx, &group, dataplane_client)
+                .process_group_member_states(
+                    opctx,
+                    &group,
+                    dataplane_client,
+                    sled_client,
+                    &sled_to_ports,
+                )
                 .await
             {
                 Ok(count) => {
@@ -314,10 +368,14 @@ impl MulticastGroupReconciler {
         debug!(
             opctx.log,
             "member state reconciliation completed";
-            "members_processed" => processed
+            "members_processed" => processed,
+            "ddm_inventory_drift" => drift_count,
         );
 
-        Ok(processed)
+        Ok(MemberReconcileCounts {
+            processed,
+            ddm_inventory_drift: drift_count,
+        })
     }
 
     /// Process member state changes for a single group.
@@ -326,6 +384,8 @@ impl MulticastGroupReconciler {
         opctx: &OpContext,
         group: &MulticastGroup,
         dataplane_client: &MulticastDataplaneClient,
+        sled_client: &MulticastSledClient,
+        sled_to_ports: &HashMap<SledUuid, Vec<SwitchBackplanePort>>,
     ) -> Result<usize, anyhow::Error> {
         let mut processed = 0;
 
@@ -337,19 +397,21 @@ impl MulticastGroupReconciler {
             Arc::new(self.batch_fetch_instance_states(opctx, &members).await?);
 
         // Process members concurrently with configurable parallelism
-        let results = stream::iter(members)
+        let member_outcomes = stream::iter(members)
             .map(|member| {
                 let instance_states = Arc::clone(&instance_states);
                 async move {
-                    let res = self
-                        .process_member_state(
-                            opctx,
-                            group,
-                            &member,
-                            &instance_states,
-                            dataplane_client,
-                        )
-                        .await;
+                    let ctx = MemberReconcileCtx {
+                        opctx,
+                        group,
+                        member: &member,
+                        instance_states: &instance_states,
+                        dataplane_client,
+                        sled_client,
+                        sled_to_ports,
+                    };
+
+                    let res = self.process_member_state(&ctx).await;
                     (member, res)
                 }
             })
@@ -358,13 +420,13 @@ impl MulticastGroupReconciler {
             .await;
 
         // Process results and update counters
-        for (member, result) in results {
+        for (member, result) in member_outcomes {
             match result {
                 Ok(transition) => match transition {
                     StateTransition::StateChanged
                     | StateTransition::NoChange => {
                         processed += 1;
-                        debug!(
+                        trace!(
                             opctx.log,
                             "processed member state change";
                             "member" => ?member,
@@ -374,7 +436,7 @@ impl MulticastGroupReconciler {
                     }
                     StateTransition::NeedsCleanup => {
                         processed += 1;
-                        debug!(
+                        trace!(
                             opctx.log,
                             "member marked for cleanup";
                             "member" => ?member,
@@ -382,7 +444,7 @@ impl MulticastGroupReconciler {
                         );
                     }
                     StateTransition::EntityGone => {
-                        debug!(
+                        trace!(
                             opctx.log,
                             "member deleted during processing";
                             "member" => ?member,
@@ -407,15 +469,13 @@ impl MulticastGroupReconciler {
 
     /// Main dispatch function for processing member state changes.
     ///
-    /// Routes to appropriate node based on member type.
+    /// Routes to the appropriate handler based on member state.
     async fn process_member_state(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_states: &InstanceStateMap,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error> {
+        let MemberReconcileCtx { opctx, group, member, .. } = *ctx;
+
         // Check if the parent group has been deleted or is being deleted.
         // If so, delete the member so cleanup can proceed.
         //
@@ -447,40 +507,13 @@ impl MulticastGroupReconciler {
 
         match member.state {
             MulticastGroupMemberState::Joining => {
-                processor
-                    .process_joining(
-                        self,
-                        opctx,
-                        group,
-                        member,
-                        instance_states,
-                        dataplane_client,
-                    )
-                    .await
+                processor.process_joining(self, ctx).await
             }
             MulticastGroupMemberState::Joined => {
-                processor
-                    .process_joined(
-                        self,
-                        opctx,
-                        group,
-                        member,
-                        instance_states,
-                        dataplane_client,
-                    )
-                    .await
+                processor.process_joined(self, ctx).await
             }
             MulticastGroupMemberState::Left => {
-                processor
-                    .process_left(
-                        self,
-                        opctx,
-                        group,
-                        member,
-                        instance_states,
-                        dataplane_client,
-                    )
-                    .await
+                processor.process_left(self, ctx).await
             }
         }
     }
@@ -495,7 +528,7 @@ impl MulticastGroupReconciler {
     ) -> Result<StateTransition, anyhow::Error> {
         // Skip if member is already deleted
         if member.time_deleted.is_some() {
-            debug!(
+            trace!(
                 opctx.log,
                 "member already deleted, no action needed";
                 "member_id" => %member.id,
@@ -532,35 +565,23 @@ impl MulticastGroupReconciler {
     /// when ready. Uses CAS operations for concurrent-safe state updates.
     async fn handle_instance_joining(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_states: &InstanceStateMap,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error> {
-        // Extract pre-fetched instance state
-        let (instance_valid, current_sled_id) =
-            self.get_instance_state_from_cache(instance_states, member);
+        let instance_state =
+            self.get_instance_state_from_cache(ctx.instance_states, ctx.member);
 
-        // Execute reconciliation CAS operation
         let reconcile_res = self
             .execute_joining_reconciliation(
-                opctx,
-                group,
-                member,
-                instance_valid,
-                current_sled_id,
+                ctx,
+                instance_state.valid,
+                instance_state.sled_id,
             )
             .await?;
 
-        // Process reconciliation result
         self.process_joining_reconcile_result(
-            opctx,
-            group,
-            member,
-            instance_valid,
+            ctx,
+            instance_state,
             reconcile_res,
-            dataplane_client,
         )
         .await
     }
@@ -570,16 +591,14 @@ impl MulticastGroupReconciler {
         &self,
         instance_states: &InstanceStateMap,
         member: &MulticastGroupMember,
-    ) -> (bool, Option<SledUuid>) {
-        instance_states.get(&member.parent_id).copied().unwrap_or((false, None))
+    ) -> InstanceMulticastState {
+        instance_states.get(&member.parent_id).copied().unwrap_or_default()
     }
 
     /// Execute the reconciliation CAS operation for a member in "Joining" state.
     async fn execute_joining_reconciliation(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
+        ctx: &MemberReconcileCtx<'_>,
         instance_valid: bool,
         current_sled_id: Option<SledUuid>,
     ) -> Result<ReconcileJoiningResult, anyhow::Error> {
@@ -587,9 +606,9 @@ impl MulticastGroupReconciler {
 
         self.datastore
             .multicast_group_member_reconcile_joining(
-                opctx,
-                MulticastGroupUuid::from_untyped_uuid(group.id()),
-                InstanceUuid::from_untyped_uuid(member.parent_id),
+                ctx.opctx,
+                MulticastGroupUuid::from_untyped_uuid(ctx.group.id()),
+                InstanceUuid::from_untyped_uuid(ctx.member.parent_id),
                 instance_valid,
                 current_sled_id_db,
             )
@@ -600,39 +619,26 @@ impl MulticastGroupReconciler {
     /// Process the result of a "Joining" state reconciliation operation.
     async fn process_joining_reconcile_result(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_valid: bool,
+        ctx: &MemberReconcileCtx<'_>,
+        instance_state: InstanceMulticastState,
         reconcile_result: ReconcileJoiningResult,
-        dataplane_client: &MulticastDataplaneClient,
     ) -> Result<StateTransition, anyhow::Error> {
         match reconcile_result.action {
             ReconcileAction::TransitionedToLeft => {
-                self.handle_transitioned_to_left(opctx, group, member).await
+                self.handle_transitioned_to_left(ctx).await
             }
 
             ReconcileAction::UpdatedSledId { old, new } => {
                 self.handle_sled_id_updated(
-                    opctx,
-                    group,
-                    member,
-                    instance_valid,
+                    ctx,
+                    instance_state,
                     SledIdUpdate { old, new },
-                    dataplane_client,
                 )
                 .await
             }
 
             ReconcileAction::NotFound | ReconcileAction::NoChange => {
-                self.handle_no_change_or_not_found(
-                    opctx,
-                    group,
-                    member,
-                    instance_valid,
-                    dataplane_client,
-                )
-                .await
+                self.handle_no_change_or_not_found(ctx, instance_state).await
             }
         }
     }
@@ -640,18 +646,16 @@ impl MulticastGroupReconciler {
     /// Handle the case where a member was transitioned to "Left" state.
     async fn handle_transitioned_to_left(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error> {
         info!(
-            opctx.log,
+            ctx.opctx.log,
             "multicast member lifecycle transition: 'Joining' → 'Left'";
-            "member_id" => %member.id,
-            "instance_id" => %member.parent_id,
-            "group_id" => %group.id(),
-            "group_name" => group.name().as_str(),
-            "group_multicast_ip" => %group.multicast_ip,
+            "member_id" => %ctx.member.id,
+            "instance_id" => %ctx.member.parent_id,
+            "group_id" => %ctx.group.id(),
+            "group_name" => ctx.group.name().as_str(),
+            "group_multicast_ip" => %ctx.group.multicast_ip,
             "reason" => "instance_not_valid_for_multicast_traffic"
         );
         Ok(StateTransition::StateChanged)
@@ -660,63 +664,43 @@ impl MulticastGroupReconciler {
     /// Handle the case where a member's sled_id was updated.
     async fn handle_sled_id_updated(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_valid: bool,
+        ctx: &MemberReconcileCtx<'_>,
+        instance_state: InstanceMulticastState,
         sled_id_update: SledIdUpdate,
-        dataplane_client: &MulticastDataplaneClient,
     ) -> Result<StateTransition, anyhow::Error> {
-        debug!(
-            opctx.log,
+        trace!(
+            ctx.opctx.log,
             "updated member sled_id, checking if ready to join";
-            "member_id" => %member.id,
+            "member_id" => %ctx.member.id,
             "old_sled_id" => ?sled_id_update.old,
             "new_sled_id" => ?sled_id_update.new,
-            "group_state" => ?group.state,
-            "instance_valid" => instance_valid
+            "group_state" => ?ctx.group.state,
+            "instance_valid" => instance_state.valid
         );
 
-        self.try_complete_join_if_ready(
-            opctx,
-            group,
-            member,
-            instance_valid,
-            dataplane_client,
-        )
-        .await
+        self.try_complete_join_if_ready(ctx, instance_state).await
     }
 
     /// Handle the case where no changes were made or member was not found.
     async fn handle_no_change_or_not_found(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_valid: bool,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
+        instance_state: InstanceMulticastState,
     ) -> Result<StateTransition, anyhow::Error> {
         // Check if member is already in Joined state
-        if member.state == MulticastGroupMemberState::Joined {
-            debug!(
-                opctx.log,
+        if ctx.member.state == MulticastGroupMemberState::Joined {
+            trace!(
+                ctx.opctx.log,
                 "member already in 'Joined' state, no action needed";
-                "member_id" => %member.id,
-                "group_id" => %group.id(),
-                "group_name" => group.name().as_str()
+                "member_id" => %ctx.member.id,
+                "group_id" => %ctx.group.id(),
+                "group_name" => ctx.group.name().as_str()
             );
             return Ok(StateTransition::NoChange);
         }
 
         // Try to complete the join if conditions are met
-        self.try_complete_join_if_ready(
-            opctx,
-            group,
-            member,
-            instance_valid,
-            dataplane_client,
-        )
-        .await
+        self.try_complete_join_if_ready(ctx, instance_state).await
     }
 
     fn is_ready_to_join(
@@ -729,30 +713,25 @@ impl MulticastGroupReconciler {
 
     async fn try_complete_join_if_ready(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_valid: bool,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
+        instance_state: InstanceMulticastState,
     ) -> Result<StateTransition, anyhow::Error> {
-        if self.is_ready_to_join(group, instance_valid) {
-            self.complete_instance_member_join(
-                opctx,
-                group,
-                member,
-                dataplane_client,
-            )
-            .await?;
-            Ok(StateTransition::StateChanged)
+        if self.is_ready_to_join(ctx.group, instance_state.valid) {
+            let joined = self.complete_instance_member_join(ctx, None).await?;
+            if joined {
+                Ok(StateTransition::StateChanged)
+            } else {
+                Ok(StateTransition::NoChange)
+            }
         } else {
-            debug!(
-                opctx.log,
+            trace!(
+                ctx.opctx.log,
                 "member not ready to join: waiting for next run";
-                "member_id" => %member.id,
-                "group_id" => %group.id(),
-                "group_name" => group.name().as_str(),
-                "instance_valid" => instance_valid,
-                "group_state" => ?group.state
+                "member_id" => %ctx.member.id,
+                "group_id" => %ctx.group.id(),
+                "group_name" => ctx.group.name().as_str(),
+                "instance_valid" => instance_state.valid,
+                "group_state" => ?ctx.group.state
             );
             Ok(StateTransition::NoChange)
         }
@@ -761,82 +740,47 @@ impl MulticastGroupReconciler {
     /// Instance-specific handler for members in "Joined" state.
     async fn handle_instance_joined(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_states: &InstanceStateMap,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error> {
-        // Get pre-fetched instance state and sled_id
-        let (instance_valid, current_sled_id) = instance_states
-            .get(&member.parent_id)
+        let instance_state = ctx
+            .instance_states
+            .get(&ctx.member.parent_id)
             .copied()
-            .unwrap_or((false, None));
+            .unwrap_or_default();
 
-        match (instance_valid, current_sled_id) {
-            // Invalid instance -> remove from dataplane and transition to "Left"
-            (false, _) => {
-                self.handle_invalid_instance(
-                    opctx,
-                    group,
-                    member,
-                    dataplane_client,
-                )
-                .await
-            }
+        match (instance_state.valid, instance_state.sled_id) {
+            (false, _) => self.handle_invalid_instance(ctx).await,
 
-            // Valid instance with sled, but sled changed (migration)
-            (true, Some(sled_id)) if member.sled_id != Some(sled_id.into()) => {
-                self.handle_sled_migration(
-                    opctx,
-                    group,
-                    member,
-                    sled_id,
-                    dataplane_client,
-                )
-                .await
+            (true, Some(sled_id))
+                if ctx.member.sled_id != Some(sled_id.into()) =>
+            {
+                self.handle_sled_migration(ctx, sled_id).await
             }
 
-            // Valid instance with sled, sled unchanged -> verify configuration
             (true, Some(_)) => {
-                self.verify_members(opctx, group, member, dataplane_client)
-                    .await?;
+                self.verify_members(ctx).await?;
                 trace!(
-                    opctx.log,
+                    ctx.opctx.log,
                     "member configuration verified, no changes needed";
-                    "member_id" => %member.id,
-                    "group_id" => %group.id()
+                    "member_id" => %ctx.member.id,
+                    "group_id" => %ctx.group.id()
                 );
                 Ok(StateTransition::NoChange)
             }
 
-            // Valid instance but no sled_id (shouldn't typically happen in "Joined" state)
-            (true, None) => {
-                self.handle_joined_without_sled(
-                    opctx,
-                    group,
-                    member,
-                    dataplane_client,
-                )
-                .await
-            }
+            (true, None) => self.handle_joined_without_sled(ctx).await,
         }
     }
 
     /// Handle a joined member whose instance became invalid.
     async fn handle_invalid_instance(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error> {
+        let MemberReconcileCtx { opctx, group, member, sled_client, .. } = ctx;
         // Remove from dataplane first
-        if let Err(e) = self
-            .remove_member_from_dataplane(opctx, member, dataplane_client)
-            .await
-        {
-            debug!(
+        if let Err(e) = self.remove_member_from_dataplane(ctx).await {
+            warn!(
                 opctx.log,
                 "failed to remove member from dataplane, will retry";
                 "member_id" => %member.id,
@@ -845,6 +789,24 @@ impl MulticastGroupReconciler {
             return Err(e);
         }
 
+        // Unsubscribe the instance from the multicast group before the CAS
+        // clears the sled ID. Best-effort since the VMM may already be torn
+        // down.
+        if let Some(sled_id) = member.sled_id {
+            if let Err(e) = sled_client
+                .unsubscribe_instance(opctx, group, member, sled_id.into())
+                .await
+            {
+                warn!(
+                    opctx.log,
+                    "failed to unsubscribe instance during instance invalidation";
+                    "member_id" => %member.id,
+                    "sled_id" => %sled_id,
+                    "error" => %e
+                );
+            }
+        }
+
         // Update database state (atomically set "Left" and clear `sled_id`)
         let updated = self
             .datastore
@@ -870,6 +832,21 @@ impl MulticastGroupReconciler {
             return Ok(StateTransition::NoChange);
         }
 
+        // Propagate updated M2P/forwarding to all sleds so the
+        // dataplane reflects the member's departure. Best-effort since
+        // group reconciliation will converge if this fails.
+        if let Err(e) =
+            sled_client.propagate_m2p_and_forwarding(opctx, group).await
+        {
+            warn!(
+                opctx.log,
+                "failed to propagate M2P/forwarding after member leave";
+                "member_id" => %member.id,
+                "group_id" => %group.id(),
+                "error" => %e
+            );
+        }
+
         info!(
             opctx.log,
             "multicast member lifecycle transition: 'Joined' → 'Left' (instance invalid)";
@@ -877,7 +854,6 @@ impl MulticastGroupReconciler {
             "instance_id" => %member.parent_id,
             "group_id" => %group.id(),
             "group_multicast_ip" => %group.multicast_ip,
-            "dpd_operation" => "remove_member_from_underlay_group",
             "reason" => "instance_no_longer_valid_for_multicast_traffic"
         );
         Ok(StateTransition::StateChanged)
@@ -886,46 +862,50 @@ impl MulticastGroupReconciler {
     /// Handle sled migration for a "Joined" member.
     async fn handle_sled_migration(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
+        ctx: &MemberReconcileCtx<'_>,
         new_sled_id: SledUuid,
-        dataplane_client: &MulticastDataplaneClient,
     ) -> Result<StateTransition, anyhow::Error> {
         info!(
-            opctx.log,
+            ctx.opctx.log,
             "detected sled migration for 'Joined' member: re-applying configuration";
-            "member_id" => %member.id,
-            "instance_id" => %member.parent_id,
-            "group_id" => %group.id(),
-            "group_name" => group.name().as_str(),
-            "group_multicast_ip" => %group.multicast_ip,
-            "old_sled_id" => ?member.sled_id,
+            "member_id" => %ctx.member.id,
+            "instance_id" => %ctx.member.parent_id,
+            "group_id" => %ctx.group.id(),
+            "group_name" => ctx.group.name().as_str(),
+            "group_multicast_ip" => %ctx.group.multicast_ip,
+            "old_sled_id" => ?ctx.member.sled_id,
             "new_sled_id" => %new_sled_id
         );
 
         // Remove from old sled's dataplane first
-        if let Err(e) = self
-            .remove_member_from_dataplane(opctx, member, dataplane_client)
-            .await
-        {
-            debug!(
-                opctx.log,
+        if let Err(e) = self.remove_member_from_dataplane(ctx).await {
+            warn!(
+                ctx.opctx.log,
                 "failed to remove member from old sled, will retry";
-                "member_id" => %member.id,
-                "old_sled_id" => ?member.sled_id,
+                "member_id" => %ctx.member.id,
+                "old_sled_id" => ?ctx.member.sled_id,
                 "error" => ?e
             );
             return Err(e);
         }
 
-        // Update sled_id in database using CAS
+        // Source-sled OPTE cleanup (M2P, forwarding, port subscription)
+        // is handled by VMM teardown: remove_propolis_zone ->
+        // release_opte_ports -> PortTicket::release_inner, which
+        // clears multicast subscriptions along with V2P and firewall
+        // rules.
+        //
+        // This is consistent with all other OPTE state. Nexus
+        // never explicitly calls sled-agent for source-sled cleanup
+        // after migration.
+
+        // Update `sled_id` in database using CAS
         let updated = self
             .datastore
             .multicast_group_member_update_sled_id_if_current(
-                opctx,
-                InstanceUuid::from_untyped_uuid(member.parent_id),
-                member.sled_id,
+                ctx.opctx,
+                InstanceUuid::from_untyped_uuid(ctx.member.parent_id),
+                ctx.member.sled_id,
                 Some(new_sled_id.into()),
             )
             .await
@@ -935,49 +915,46 @@ impl MulticastGroupReconciler {
 
         if !updated {
             debug!(
-                opctx.log,
+                ctx.opctx.log,
                 "skipping sled_id update after migration due to concurrent change";
-                "member_id" => %member.id,
-                "group_id" => %group.id(),
-                "old_sled_id" => ?member.sled_id,
+                "member_id" => %ctx.member.id,
+                "group_id" => %ctx.group.id(),
+                "old_sled_id" => ?ctx.member.sled_id,
                 "new_sled_id" => %new_sled_id
             );
             return Ok(StateTransition::NoChange);
         }
 
-        // Re-apply configuration on new sled
-        // If this fails (e.g., sled not yet in inventory), transition to "Joining" for retry
-        match self
-            .complete_instance_member_join(
-                opctx,
-                group,
-                member,
-                dataplane_client,
-            )
-            .await
-        {
-            Ok(()) => {
+        // Re-apply configuration on new sled. Pass `new_sled_id` explicitly
+        // because the in-memory member struct still has the old sled_id.
+        match self.complete_instance_member_join(ctx, Some(new_sled_id)).await {
+            Ok(joined) => {
                 info!(
-                    opctx.log,
+                    ctx.opctx.log,
                     "member configuration re-applied after sled migration";
-                    "member_id" => %member.id,
-                    "instance_id" => %member.parent_id,
-                    "group_id" => %group.id(),
-                    "group_name" => group.name().as_str(),
-                    "group_multicast_ip" => %group.multicast_ip,
+                    "member_id" => %ctx.member.id,
+                    "instance_id" => %ctx.member.parent_id,
+                    "group_id" => %ctx.group.id(),
+                    "group_name" => ctx.group.name().as_str(),
+                    "group_multicast_ip" => %ctx.group.multicast_ip,
                     "new_sled_id" => %new_sled_id,
-                    "dpd_operation" => "re_add_member_to_underlay_multicast_group"
+                    "action" => "re_add_member_to_underlay_multicast_group",
+                    "joined" => joined
                 );
-                Ok(StateTransition::StateChanged)
+                if joined {
+                    Ok(StateTransition::StateChanged)
+                } else {
+                    Ok(StateTransition::NoChange)
+                }
             }
             Err(e) => {
                 // Failed to join on new sled. We transition to "Joining" and
                 // retry next cycle/run.
                 warn!(
-                    opctx.log,
+                    ctx.opctx.log,
                     "failed to complete join on new sled after migration: transitioning to 'Joining' for retry";
-                    "member_id" => %member.id,
-                    "group_id" => %group.id(),
+                    "member_id" => %ctx.member.id,
+                    "group_id" => %ctx.group.id(),
                     "new_sled_id" => %new_sled_id,
                     "error" => %e
                 );
@@ -1005,9 +982,9 @@ impl MulticastGroupReconciler {
                 let updated = self
                     .datastore
                     .multicast_group_member_set_state_if_current(
-                        opctx,
-                        MulticastGroupUuid::from_untyped_uuid(group.id()),
-                        InstanceUuid::from_untyped_uuid(member.parent_id),
+                        ctx.opctx,
+                        MulticastGroupUuid::from_untyped_uuid(ctx.group.id()),
+                        InstanceUuid::from_untyped_uuid(ctx.member.parent_id),
                         MulticastGroupMemberState::Joined,
                         MulticastGroupMemberState::Joining,
                     )
@@ -1018,10 +995,10 @@ impl MulticastGroupReconciler {
 
                 if updated {
                     info!(
-                        opctx.log,
+                        ctx.opctx.log,
                         "member transitioned to 'Joining': will retry on next reconciliation run";
-                        "member_id" => %member.id,
-                        "group_id" => %group.id(),
+                        "member_id" => %ctx.member.id,
+                        "group_id" => %ctx.group.id(),
                         "new_sled_id" => %new_sled_id
                     );
                     Ok(StateTransition::StateChanged)
@@ -1036,11 +1013,9 @@ impl MulticastGroupReconciler {
     /// Handle edge case where a "Joined" member has no sled_id.
     async fn handle_joined_without_sled(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error> {
+        let MemberReconcileCtx { opctx, group, member, .. } = ctx;
         warn!(
             opctx.log,
             "'Joined' member has no sled_id: transitioning to 'Left'";
@@ -1049,10 +1024,7 @@ impl MulticastGroupReconciler {
         );
 
         // Remove from dataplane and transition to "Left"
-        if let Err(e) = self
-            .remove_member_from_dataplane(opctx, member, dataplane_client)
-            .await
-        {
+        if let Err(e) = self.remove_member_from_dataplane(ctx).await {
             warn!(
                 opctx.log,
                 "failed to remove member with no sled_id from dataplane";
@@ -1094,7 +1066,7 @@ impl MulticastGroupReconciler {
             "instance_id" => %member.parent_id,
             "group_id" => %group.id(),
             "group_multicast_ip" => %group.multicast_ip,
-            "dpd_operation" => "remove_member_from_underlay_group",
+            "action" => "transition_to_left",
             "reason" => "inconsistent_state_sled_id_missing_in_joined_state"
         );
         Ok(StateTransition::StateChanged)
@@ -1103,22 +1075,20 @@ impl MulticastGroupReconciler {
     /// Instance-specific handler for members in "Left" state.
     async fn handle_instance_left(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        instance_states: &InstanceStateMap,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<StateTransition, anyhow::Error> {
-        // Get pre-fetched instance state and sled_id
-        let (instance_valid, current_sled_id) = instance_states
-            .get(&member.parent_id)
+        let InstanceMulticastState {
+            valid: instance_valid,
+            sled_id: current_sled_id,
+            ..
+        } = ctx
+            .instance_states
+            .get(&ctx.member.parent_id)
             .copied()
-            .unwrap_or((false, None));
+            .unwrap_or_default();
 
-        // Handle permanent deletion first
-        if member.time_deleted.is_some() {
-            self.cleanup_deleted_member(opctx, group, member, dataplane_client)
-                .await?;
+        if ctx.member.time_deleted.is_some() {
+            self.cleanup_deleted_member(ctx).await?;
 
             return Ok(StateTransition::NeedsCleanup);
         }
@@ -1128,28 +1098,43 @@ impl MulticastGroupReconciler {
         // The cleanup is idempotent and handles cases where:
         // - sled_id is None (uses fallback path)
         // - member was already removed from DPD
-        if let Err(e) = self
-            .remove_member_from_dataplane(opctx, member, dataplane_client)
-            .await
-        {
-            debug!(
-                opctx.log,
+        if let Err(e) = self.remove_member_from_dataplane(ctx).await {
+            warn!(
+                ctx.opctx.log,
                 "failed to clean up DPD state for 'Left' member (will retry)";
-                "member_id" => %member.id,
+                "member_id" => %ctx.member.id,
                 "error" => ?e
             );
-            // Continue to reactivation even on cleanup failure because
-            // the add operation may succeed if the port was already removed
         }
 
-        // Handle reactivation: instance valid and group active -> transition to "Joining"
-        if instance_valid && group.state == MulticastGroupState::Active {
-            return self
-                .reactivate_left_member(opctx, group, member, current_sled_id)
-                .await;
+        // Unsubscribe the instance's active VMM OPTE port from this multicast
+        // group. Best-effort since if the VMM is already gone, there's
+        // nothing to unsubscribe (the OPTE port was destroyed with the VMM).
+        if let Some(sled_id) = ctx.member.sled_id {
+            if let Err(e) = ctx
+                .sled_client
+                .unsubscribe_instance(
+                    ctx.opctx,
+                    ctx.group,
+                    ctx.member,
+                    sled_id.into(),
+                )
+                .await
+            {
+                warn!(
+                    ctx.opctx.log,
+                    "failed to unsubscribe instance from multicast group";
+                    "member_id" => %ctx.member.id,
+                    "sled_id" => %sled_id,
+                    "error" => %e
+                );
+            }
+        }
+
+        if instance_valid && ctx.group.state == MulticastGroupState::Active {
+            return self.reactivate_left_member(ctx, current_sled_id).await;
         }
 
-        // Stay in "Left" state
         Ok(StateTransition::NoChange)
     }
 
@@ -1157,11 +1142,10 @@ impl MulticastGroupReconciler {
     /// Transitions the member back to "Joining" state so it can rejoin the group.
     async fn reactivate_left_member(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
+        ctx: &MemberReconcileCtx<'_>,
         current_sled_id: Option<SledUuid>,
     ) -> Result<StateTransition, anyhow::Error> {
+        let MemberReconcileCtx { opctx, group, member, .. } = ctx;
         debug!(
             opctx.log,
             "transitioning member from 'Left' to 'Joining': instance became valid and group active";
@@ -1250,10 +1234,10 @@ impl MulticastGroupReconciler {
 
         // Build the state map from the fetched data
         state_map.extend(members.iter().map(|member| {
-            let (is_valid, sled_id) = if let Some((instance, vmm_opt)) =
+            let state = if let Some((instance, vmm_opt)) =
                 instance_vmm_data.get(&member.parent_id)
             {
-                let is_valid = matches!(
+                let valid = matches!(
                     instance.nexus_state.state(),
                     InstanceState::Creating
                         | InstanceState::Starting
@@ -1267,13 +1251,12 @@ impl MulticastGroupReconciler {
                     SledUuid::from_untyped_uuid(vmm.sled_id.into_untyped_uuid())
                 });
 
-                (is_valid, sled_id)
+                InstanceMulticastState { valid, sled_id }
             } else {
-                // Instance not found (mark as invalid)
-                (false, None)
+                InstanceMulticastState::default()
             };
 
-            (member.parent_id, (is_valid, sled_id))
+            (member.parent_id, state)
         }));
 
         debug!(
@@ -1292,9 +1275,9 @@ impl MulticastGroupReconciler {
     /// Returns `None` if the instance has no sled assignment or cannot be found.
     async fn lookup_and_update_member_sled_id(
         &self,
-        opctx: &OpContext,
-        member: &MulticastGroupMember,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<Option<DbTypedUuid<SledKind>>, anyhow::Error> {
+        let MemberReconcileCtx { opctx, member, .. } = ctx;
         debug!(
             opctx.log,
             "member has no sled_id, attempting to look up instance sled";
@@ -1319,13 +1302,13 @@ impl MulticastGroupReconciler {
                 return Ok(None);
             }
             Err(e) => {
-                debug!(
+                warn!(
                     opctx.log,
                     "failed to look up instance state";
                     "member" => ?member,
                     "error" => ?e
                 );
-                return Ok(None);
+                return Err(e.into());
             }
         };
 
@@ -1381,87 +1364,147 @@ impl MulticastGroupReconciler {
         }
     }
 
-    /// Complete a member join operation ("Joining" -> "Joined") for an instance.
+    /// Complete a member join by configuring the dataplane and subscribing
+    /// the VMM.
+    ///
+    /// When `sled_id_override` is provided (e.g., during migration), it
+    /// is used instead of the potentially stale `member.sled_id`.
+    ///
+    /// # Returns
+    ///
+    /// `Ok(true)` when the join completed successfully. `Ok(false)` when no
+    /// sled was available and the operation was a noop.
     async fn complete_instance_member_join(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        dataplane_client: &MulticastDataplaneClient,
-    ) -> Result<(), anyhow::Error> {
+        ctx: &MemberReconcileCtx<'_>,
+        sled_id_override: Option<SledUuid>,
+    ) -> Result<bool, anyhow::Error> {
         debug!(
-            opctx.log,
+            ctx.opctx.log,
             "completing member join";
-            "member" => ?member,
-            "group" => ?group
+            "member" => ?ctx.member,
+            "group" => ?ctx.group
         );
 
-        // Get sled_id from member record, or look it up and update if missing
-        let sled_id = match member.sled_id {
-            Some(id) => id,
-            None => {
-                match self
-                    .lookup_and_update_member_sled_id(opctx, member)
-                    .await?
-                {
-                    Some(id) => id,
-                    None => return Ok(()), // No sled available, cannot join
-                }
-            }
+        // Use the override if provided, then the member's cached sled_id,
+        // then look it up from the instance as a last resort.
+        let sled_id: SledUuid = if let Some(id) =
+            sled_id_override.or(ctx.member.sled_id.map(Into::into))
+        {
+            id
+        } else if let Some(id) =
+            self.lookup_and_update_member_sled_id(ctx).await?
+        {
+            id.into()
+        } else {
+            return Ok(false);
         };
 
-        self.add_member_to_dataplane(
-            opctx,
-            group,
-            member,
-            sled_id.into(),
-            dataplane_client,
-        )
-        .await?;
+        self.add_member_to_dataplane(ctx, sled_id).await?;
 
-        // Transition to "Joined" state (only if still in "Joining")
-        let updated = self
-            .datastore
-            .multicast_group_member_set_state_if_current(
-                opctx,
-                MulticastGroupUuid::from_untyped_uuid(group.id()),
-                InstanceUuid::from_untyped_uuid(member.parent_id),
-                MulticastGroupMemberState::Joining,
-                MulticastGroupMemberState::Joined,
-            )
+        // If the member is already in a "Joined" state (migration path), skip
+        // the state transition but still propagate and subscribe. During
+        // migration the caller updates the sled ID without changing state,
+        // so we must not gate propagation on this CAS.
+        if ctx.member.state != MulticastGroupMemberState::Joined {
+            let updated = self
+                .datastore
+                .multicast_group_member_set_state_if_current(
+                    ctx.opctx,
+                    MulticastGroupUuid::from_untyped_uuid(ctx.group.id()),
+                    InstanceUuid::from_untyped_uuid(ctx.member.parent_id),
+                    MulticastGroupMemberState::Joining,
+                    MulticastGroupMemberState::Joined,
+                )
+                .await
+                .context(
+                    "failed to conditionally transition member to 'Joined' state",
+                )?;
+
+            if !updated {
+                debug!(
+                    ctx.opctx.log,
+                    "skipping Joining→Joined transition due to concurrent update";
+                    "member_id" => %ctx.member.id,
+                    "group_id" => %ctx.group.id()
+                );
+                // Concurrent update moved the member away from the "Joining"
+                // state, so skip propagation and subscribe.
+                return Ok(false);
+            }
+        }
+
+        // Propagate M2P mappings and forwarding entries to all sleds.
+        //
+        // Athis point, the member is now "Joined" in the database, so propagate
+        // includes this sled in forwarding next-hops. If propagation or
+        // subscribe fails below, the member remains "Joined" with incomplete
+        // sled state. The reconciler's next pass converges via
+        // `handle_instance_joined` -> `verify_members`.
+        //
+        // Propagation failures are best-effort since the reconciler will
+        // re-converge all sleds on the next cycle. Subscribe failures
+        // below are treated as hard errors because the VMM cannot
+        // receive traffic without an OPTE port subscription.
+        if let Err(e) = ctx
+            .sled_client
+            .propagate_m2p_and_forwarding(ctx.opctx, ctx.group)
             .await
-            .context(
-                "failed to conditionally transition member to 'Joined' state",
-            )?;
-        if !updated {
-            debug!(
-                opctx.log,
-                "skipping Joining→Joined transition due to concurrent update";
-                "member_id" => %member.id,
-                "group_id" => %group.id()
+        {
+            warn!(
+                ctx.opctx.log,
+                "failed to propagate M2P/forwarding after member join";
+                "member_id" => %ctx.member.id,
+                "group_id" => %ctx.group.id(),
+                "error" => %e
             );
         }
 
+        // Subscribe the instance's active VMM OPTE port last. Propagation
+        // above is best-effort, and any sleds that failed will be converged
+        // by the reconciler on the next cycle.
+        if let Err(e) = ctx
+            .sled_client
+            .subscribe_instance(ctx.opctx, ctx.group, ctx.member, sled_id)
+            .await
+        {
+            warn!(
+                ctx.opctx.log,
+                "failed to subscribe instance to multicast group via sled-agent \
+                 (will retry next cycle)";
+                "member_id" => %ctx.member.id,
+                "group_id" => %ctx.group.id(),
+                "sled_id" => %sled_id,
+                "error" => %e
+            );
+            return Err(e);
+        }
+
         info!(
-            opctx.log,
+            ctx.opctx.log,
             "member join completed";
-            "member_id" => %member.id,
-            "group_id" => %group.id(),
+            "member_id" => %ctx.member.id,
+            "group_id" => %ctx.group.id(),
             "sled_id" => %sled_id
         );
 
-        Ok(())
+        Ok(true)
     }
 
     /// Apply member dataplane configuration (via DPD-client).
     async fn add_member_to_dataplane(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
+        ctx: &MemberReconcileCtx<'_>,
         sled_id: SledUuid,
-        dataplane_client: &MulticastDataplaneClient,
     ) -> Result<(), anyhow::Error> {
+        let MemberReconcileCtx {
+            opctx,
+            group,
+            member,
+            dataplane_client,
+            sled_to_ports,
+            ..
+        } = ctx;
         let underlay_group_id = group.underlay_group_id.with_context(|| {
             format!("no underlay group for external group {}", group.id())
         })?;
@@ -1475,10 +1518,9 @@ impl MulticastGroupReconciler {
             )?;
 
         // Resolve sled to switch port configurations
-        let port_configs = self
-            .resolve_sled_to_switch_ports(opctx, sled_id, dataplane_client)
-            .await
-            .context("failed to resolve sled to switch ports")?;
+        let port_configs =
+            Self::resolve_sled_to_switch_ports(sled_to_ports, sled_id)
+                .context("failed to resolve sled to switch ports")?;
 
         for port_config in &port_configs {
             let dataplane_member = dpd_client::types::MulticastGroupMember {
@@ -1528,18 +1570,84 @@ impl MulticastGroupReconciler {
         Ok(())
     }
 
-    /// Remove member from known port configurations.
-    async fn remove_from_known_ports(
-        &self,
-        opctx: &OpContext,
-        member: &MulticastGroupMember,
-        sled_id: DbTypedUuid<SledKind>,
-        port_configs: &[SwitchBackplanePort],
-        underlay_group: &nexus_db_model::UnderlayMulticastGroup,
-        dataplane_client: &MulticastDataplaneClient,
-    ) -> Result<(), anyhow::Error> {
-        // Remove member from DPD for each port on the sled
-        for port_config in port_configs {
+    /// Remove member from known port configurations.
+    ///
+    /// Multicast underlay membership is keyed by (port, link), not by
+    /// member: the DPD member table tracks one entry per
+    /// (group, port_id, link_id), so multiple members sharing a rear
+    /// port collapse to one entry per group.
+    ///
+    /// Compute the union of active rear ports across other "Joined" members
+    /// in the group and skip any port still in use, so that removing one
+    /// member does not tear down forwarding for siblings on the same sled.
+    async fn remove_from_known_ports(
+        &self,
+        ctx: &MemberReconcileCtx<'_>,
+        sled_id: DbTypedUuid<SledKind>,
+        port_configs: &[SwitchBackplanePort],
+        underlay_group: &nexus_db_model::UnderlayMulticastGroup,
+    ) -> Result<(), anyhow::Error> {
+        let MemberReconcileCtx {
+            opctx,
+            member,
+            dataplane_client,
+            sled_to_ports,
+            ..
+        } = *ctx;
+
+        let active_member_ports = match self
+            .compute_active_member_ports(
+                opctx,
+                member.external_group_id,
+                sled_to_ports,
+                Some(member.id.into_untyped_uuid()),
+            )
+            .await
+        {
+            Ok(MemberPortUnion::Complete(ports)) => Some(ports),
+            Ok(MemberPortUnion::Partial(_)) => {
+                // Some other "Joined" members failed to resolve. Skip
+                // pruning to avoid withdrawing ports that may still be in
+                // use (reconciliation will retry).
+                info!(
+                    opctx.log,
+                    "union incomplete: skipping known-port removal to avoid disrupting unresolved members";
+                    "member_id" => %member.id,
+                    "sled_id" => %sled_id,
+                    "reason" => "some_joined_members_failed_port_resolution"
+                );
+                return Ok(());
+            }
+            Err(e) => {
+                info!(
+                    opctx.log,
+                    "failed to compute active member ports: skipping known-port removal";
+                    "member_id" => %member.id,
+                    "sled_id" => %sled_id,
+                    "error" => %e
+                );
+                return Ok(());
+            }
+        };
+
+        let (to_retain, to_remove): (Vec<_>, Vec<_>) =
+            port_configs.iter().partition(|pc| {
+                active_member_ports.as_ref().is_some_and(|active| {
+                    active.contains(&(pc.port_id.clone(), pc.link_id))
+                })
+            });
+
+        for port_config in &to_retain {
+            debug!(
+                opctx.log,
+                "retaining shared rear port still in use by other group members";
+                "member_id" => %member.id,
+                "port_id" => %port_config.port_id,
+                "sled_id" => %sled_id,
+            );
+        }
+
+        for port_config in &to_remove {
             let dataplane_member = dpd_client::types::MulticastGroupMember {
                 port_id: port_config.port_id.clone(),
                 link_id: port_config.link_id,
@@ -1555,10 +1663,13 @@ impl MulticastGroupReconciler {
                 opctx.log,
                 "member removed from DPD";
                 "port_id" => %port_config.port_id,
-                "sled_id" => %sled_id
+                "sled_id" => %sled_id,
             );
         }
 
+        let removed = to_remove.len();
+        let retained = to_retain.len();
+
         info!(
             opctx.log,
             "multicast member configuration removed from switch forwarding tables";
@@ -1566,6 +1677,8 @@ impl MulticastGroupReconciler {
             "instance_id" => %member.parent_id,
             "sled_id" => %sled_id,
             "port_count" => port_configs.len(),
+            "ports_removed" => removed,
+            "ports_retained_shared" => retained,
             "dpd_operation" => "remove_member_from_underlay_multicast_group",
             "reason" => "instance_state_change_or_migration"
         );
@@ -1583,7 +1696,7 @@ impl MulticastGroupReconciler {
         &self,
         opctx: &OpContext,
         group_id: Uuid,
-        dataplane_client: &MulticastDataplaneClient,
+        sled_to_ports: &HashMap<SledUuid, Vec<SwitchBackplanePort>>,
         exclude_member_id: Option<Uuid>,
     ) -> Result<MemberPortUnion, anyhow::Error> {
         let group_members = self
@@ -1616,14 +1729,10 @@ impl MulticastGroupReconciler {
                 };
 
                 // Attempt to resolve sled to switch ports
-                match self
-                    .resolve_sled_to_switch_ports(
-                        opctx,
-                        mem_sled_id.into(),
-                        dataplane_client,
-                    )
-                    .await
-                {
+                match Self::resolve_sled_to_switch_ports(
+                    sled_to_ports,
+                    mem_sled_id.into(),
+                ) {
                     Ok(ports) => Some((mem, ports)),
                     Err(e) => {
                         warn!(
@@ -1656,9 +1765,10 @@ impl MulticastGroupReconciler {
                     link_id: cfg.link_id,
                     direction: cfg.direction,
                 };
-                is_rear_underlay_member(&member).then(|| cfg.port_id)
+                is_rear_underlay_member(&member)
+                    .then(|| (cfg.port_id, cfg.link_id))
             })
-            .collect::<BTreeSet<_>>();
+            .collect::<HashSet<_>>();
 
         // Return `Complete` or `Partial` based on whether all members resolved
         if failure_cnt == 0 {
@@ -1676,6 +1786,7 @@ impl MulticastGroupReconciler {
         member: &MulticastGroupMember,
         underlay_group: &nexus_db_model::UnderlayMulticastGroup,
         dataplane_client: &MulticastDataplaneClient,
+        sled_to_ports: &HashMap<SledUuid, Vec<SwitchBackplanePort>>,
     ) -> Result<(), anyhow::Error> {
         // Sled resolution failed or no sled_id available (e.g., removed
         // from inventory, or member.sled_id=NULL).
@@ -1704,7 +1815,7 @@ impl MulticastGroupReconciler {
             .compute_active_member_ports(
                 opctx,
                 member.external_group_id,
-                dataplane_client,
+                sled_to_ports,
                 Some(member.id.into_untyped_uuid()),
             )
             .await
@@ -1741,7 +1852,9 @@ impl MulticastGroupReconciler {
                 }
 
                 // Remove only if not in union of active member ports
-                if !active_member_ports.contains(&current_member.port_id) {
+                let member_key: MemberPortKey =
+                    (current_member.port_id.clone(), current_member.link_id);
+                if !active_member_ports.contains(&member_key) {
                     dataplane_client
                         .remove_member(underlay_group, current_member.clone())
                         .await
@@ -1764,18 +1877,16 @@ impl MulticastGroupReconciler {
     /// Remove member dataplane configuration (via DPD-client).
     async fn remove_member_from_dataplane(
         &self,
-        opctx: &OpContext,
-        member: &MulticastGroupMember,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<(), anyhow::Error> {
-        let group = self
-            .datastore
-            .multicast_group_fetch(
-                opctx,
-                MulticastGroupUuid::from_untyped_uuid(member.external_group_id),
-            )
-            .await
-            .context("failed to fetch group for member removal")?;
+        let MemberReconcileCtx {
+            opctx,
+            group,
+            member,
+            dataplane_client,
+            sled_to_ports,
+            ..
+        } = ctx;
 
         let underlay_group_id = group.underlay_group_id.with_context(|| {
             format!(
@@ -1792,21 +1903,15 @@ impl MulticastGroupReconciler {
 
         // Try to remove via known ports if we have a `sled_id` and can resolve it
         if let Some(sled_id) = member.sled_id {
-            if let Ok(port_configs) = self
-                .resolve_sled_to_switch_ports(
-                    opctx,
-                    sled_id.into(),
-                    dataplane_client,
-                )
-                .await
-            {
+            if let Ok(port_configs) = Self::resolve_sled_to_switch_ports(
+                sled_to_ports,
+                sled_id.into(),
+            ) {
                 self.remove_from_known_ports(
-                    opctx,
-                    member,
+                    ctx,
                     sled_id,
                     &port_configs,
                     &underlay_group,
-                    dataplane_client,
                 )
                 .await?;
                 return Ok(());
@@ -1820,6 +1925,7 @@ impl MulticastGroupReconciler {
             member,
             &underlay_group,
             dataplane_client,
+            sled_to_ports,
         )
         .await?;
 
@@ -1830,11 +1936,9 @@ impl MulticastGroupReconciler {
     /// Ensures dataplane consistency by failing if removal operations fail.
     async fn cleanup_member_from_dataplane(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<(), anyhow::Error> {
+        let MemberReconcileCtx { opctx, group, member, .. } = ctx;
         debug!(
             opctx.log,
             "cleaning up member from dataplane";
@@ -1846,11 +1950,9 @@ impl MulticastGroupReconciler {
         );
 
         // Strict removal from dataplane (fail on errors)
-        self.remove_member_from_dataplane(opctx, member, dataplane_client)
-            .await
-            .context(
-                "failed to remove member configuration via DPD during cleanup",
-            )?;
+        self.remove_member_from_dataplane(ctx).await.context(
+            "failed to remove member configuration via DPD during cleanup",
+        )?;
 
         info!(
             opctx.log,
@@ -1870,15 +1972,25 @@ impl MulticastGroupReconciler {
     /// - Removing the member from any unexpected/stale rear ports
     /// - Adding the member to expected ports
     ///
+    /// If the sled cannot be resolved (e.g., decommissioned), the member
+    /// is transitioned to "Left" and M2P/forwarding is propagated inline
+    /// to remove stale entries.
+    ///
     /// This handles cases like `sp_slot` changes where the sled's physical
     /// location changed but the `sled_id` stayed the same.
     async fn verify_members(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<(), anyhow::Error> {
+        let MemberReconcileCtx {
+            opctx,
+            group,
+            member,
+            dataplane_client,
+            sled_client,
+            sled_to_ports,
+            ..
+        } = ctx;
         debug!(
             opctx.log,
             "verifying joined member consistency";
@@ -1910,15 +2022,12 @@ impl MulticastGroupReconciler {
             .await
             .context("failed to fetch underlay group")?;
 
-        // Resolve expected member configurations (may refresh cache if TTL expired)
-        let expected_port_configs = match self
-            .resolve_sled_to_switch_ports(
-                opctx,
-                sled_id.into(),
-                dataplane_client,
-            )
-            .await
-        {
+        // Resolve expected member configurations from the reconciliation
+        // pass map.
+        let expected_port_configs = match Self::resolve_sled_to_switch_ports(
+            sled_to_ports,
+            sled_id.into(),
+        ) {
             Ok(configs) => configs,
             Err(e) => {
                 // If we can't resolve the sled anymore (e.g., removed from inventory),
@@ -1932,13 +2041,24 @@ impl MulticastGroupReconciler {
                 );
 
                 // Best effort removal on verification
-                let _ = self
-                    .remove_member_from_dataplane(
-                        opctx,
-                        member,
-                        dataplane_client,
-                    )
-                    .await;
+                let _ = self.remove_member_from_dataplane(ctx).await;
+
+                // Unsubscribe the instance before the CAS clears sled_id;
+                // otherwise, the OPTE subscription is stranded with no
+                // way to identify the sled on later passes. Best-effort
+                // since the VMM may already be torn down.
+                if let Err(e) = sled_client
+                    .unsubscribe_instance(opctx, group, member, sled_id.into())
+                    .await
+                {
+                    warn!(
+                        opctx.log,
+                        "failed to unsubscribe instance during port resolution failure";
+                        "member_id" => %member.id,
+                        "sled_id" => %sled_id,
+                        "error" => %e
+                    );
+                }
 
                 let updated = self
                     .datastore
@@ -1952,6 +2072,21 @@ impl MulticastGroupReconciler {
                     .context("failed to transition member to 'Left' after port resolution failure")?;
 
                 if updated {
+                    // Propagate updated M2P/forwarding to remove
+                    // stale entries for this now-Left member.
+                    if let Err(e) = sled_client
+                        .propagate_m2p_and_forwarding(opctx, group)
+                        .await
+                    {
+                        warn!(
+                            opctx.log,
+                            "failed to propagate M2P/forwarding after \
+                             member left due to unresolvable sled";
+                            "member_id" => %member.id,
+                            "group_id" => %group.id(),
+                            "error" => %e
+                        );
+                    }
                     info!(
                         opctx.log,
                         "member transitioned to 'Left': sled no longer resolvable";
@@ -1979,7 +2114,7 @@ impl MulticastGroupReconciler {
             .compute_active_member_ports(
                 opctx,
                 group.id(),
-                dataplane_client,
+                sled_to_ports,
                 None, // Don't exclude any member
             )
             .await
@@ -2023,7 +2158,11 @@ impl MulticastGroupReconciler {
                     }
 
                     // If this port is not in our active member set, it's stale
-                    if !active_ports.contains(&current_member.port_id) {
+                    let member_key: MemberPortKey = (
+                        current_member.port_id.clone(),
+                        current_member.link_id,
+                    );
+                    if !active_ports.contains(&member_key) {
                         stale_ports.push(current_member.clone());
                     }
                 }
@@ -2105,6 +2244,24 @@ impl MulticastGroupReconciler {
             }
         }
 
+        // Ensure the instance subscription is in place. Sled-agent resolves
+        // the active VMM under its per-instance state lock, which keeps this
+        // call correct across live-migration propolis_id changes when the
+        // sled_id stays the same. The call is idempotent.
+        if let Err(e) = sled_client
+            .subscribe_instance(opctx, group, member, sled_id.into())
+            .await
+        {
+            warn!(
+                opctx.log,
+                "failed to verify instance subscription during member verification";
+                "member_id" => %member.id,
+                "sled_id" => %sled_id,
+                "error" => %e
+            );
+            return Err(e);
+        }
+
         info!(
             opctx.log,
             "member verification completed";
@@ -2228,52 +2385,6 @@ impl MulticastGroupReconciler {
             .context("failed to list group members")
     }
 
-    /// Check cache for a sled mapping.
-    async fn check_sled_cache(
-        &self,
-        cache_key: SledUuid,
-    ) -> Option<Vec<SwitchBackplanePort>> {
-        let cache = self.sled_mapping_cache.read().await;
-        let (cached_at, mappings) = &*cache;
-        let elapsed = cached_at.elapsed();
-
-        if elapsed < self.sled_cache_ttl {
-            mappings.get(&cache_key).cloned()
-        } else {
-            None
-        }
-    }
-
-    /// Detect backplane topology change and invalidate sled cache if needed.
-    ///
-    /// Compares the full (PortId, BackplaneLink) pairs to detect changes in:
-    /// - Port count (sleds added/removed)
-    /// - Port IDs (different physical slots)
-    /// - Link attributes (speed, lanes, connector type changes)
-    async fn handle_backplane_topology_change(
-        &self,
-        opctx: &OpContext,
-        previous_map: &Option<BackplaneMap>,
-        new_map: &BackplaneMap,
-    ) {
-        if let Some(prev_map) = previous_map {
-            // Compare full maps (keys + values) to detect any topology changes
-            if prev_map != new_map {
-                info!(
-                    opctx.log,
-                    "backplane map topology change detected";
-                    "previous_port_count" => prev_map.len(),
-                    "new_port_count" => new_map.len()
-                );
-                info!(
-                    opctx.log,
-                    "invalidating sled mapping cache due to backplane topology change"
-                );
-                self.invalidate_sled_mapping_cache().await;
-            }
-        }
-    }
-
     /// Fetch the backplane map from DPD-client with caching.
     ///
     /// The client responds with the entire mapping of all cubbies in a rack.
@@ -2285,13 +2396,10 @@ impl MulticastGroupReconciler {
         opctx: &OpContext,
         dataplane_client: &MulticastDataplaneClient,
     ) -> Result<BackplaneMap, anyhow::Error> {
-        // Check cache first
-        let previous_map = {
+        {
             let cache = self.backplane_map_cache.read().await;
             if let Some((cached_at, ref map)) = *cache {
-                let elapsed = cached_at.elapsed();
-
-                if elapsed < self.backplane_cache_ttl {
+                if cached_at.elapsed() < self.backplane_cache_ttl {
                     trace!(
                         opctx.log,
                         "backplane map cache hit";
@@ -2299,14 +2407,9 @@ impl MulticastGroupReconciler {
                     );
                     return Ok(map.clone());
                 }
-                // Cache expired but keep reference to previous map for comparison
-                Some(map.clone())
-            } else {
-                None
             }
-        };
+        }
 
-        // Fetch from DPD via dataplane client on cache miss
         debug!(
             opctx.log,
             "fetching backplane map from DPD (cache miss or stale)"
@@ -2317,69 +2420,161 @@ impl MulticastGroupReconciler {
                 "failed to query backplane_map from DPD via dataplane client",
             )?;
 
-        // Detect topology change and invalidate sled cache if needed
-        self.handle_backplane_topology_change(
-            opctx,
-            &previous_map,
-            &backplane_map,
-        )
-        .await;
-
         info!(
             opctx.log,
             "fetched backplane map from DPD";
             "port_count" => backplane_map.len()
         );
 
-        // Update cache
         let mut cache = self.backplane_map_cache.write().await;
         *cache = Some((Instant::now(), backplane_map.clone()));
 
         Ok(backplane_map)
     }
 
-    /// Resolve a sled ID to switch ports for multicast traffic.
-    pub async fn resolve_sled_to_switch_ports(
+    /// Build the reconciliation pass sled-to-port mapping.
+    ///
+    /// Tries DDM peer topology first (live, authoritative for reachable
+    /// sleds) when a switch-zone client is available. Falls back to
+    /// inventory + DPD backplane validation when DDM is unavailable,
+    /// returns an empty result, or no switch-zone client could be built
+    /// this pass. The returned map is consumed by a single reconciler
+    /// pass and dropped afterward, so peer-state churn between passes
+    /// resolves on the next tick.
+    async fn build_sled_port_map(
         &self,
         opctx: &OpContext,
-        sled_id: SledUuid,
         dataplane_client: &MulticastDataplaneClient,
-    ) -> Result<Vec<SwitchBackplanePort>, anyhow::Error> {
-        // Check cache first
-        if let Some(port_configs) = self.check_sled_cache(sled_id).await {
-            return Ok(port_configs);
-        }
+        switch_zone_client: Option<&MulticastSwitchZoneClient>,
+    ) -> Result<SledPortMap, anyhow::Error> {
+        // Fetch DPD's backplane map once per reconciliation pass. It accounts
+        // for the enumeration of valid PortId values (regardless of how
+        // a peer's `if_name` ~ interface name ~ is shaped), so we use it to
+        // cross-validate parsed DDM peers and to ground the inventory
+        // fallback's slot lookups.
+        let backplane_map =
+            self.fetch_backplane_map(opctx, dataplane_client).await?;
 
-        // Refresh cache if stale or missing entry
-        if let Err(e) =
-            self.refresh_sled_mapping_cache(opctx, dataplane_client).await
-        {
-            warn!(
+        // List in-service sleds once per reconciliation pass and share with
+        // both resolution paths, avoiding duplicate DB queries.
+        let sleds = self
+            .datastore
+            .sled_list_all_batched(opctx, SledFilter::InService)
+            .await
+            .context("failed to list in-service sleds")?;
+
+        // Prefer DDM: it reflects live peer status (link state, cable
+        // up/down). Inventory is a periodic collection snapshot and can
+        // lag actual topology. DDM may also be partial (a flapping link
+        // can drop a sled out of peers temporarily, or test/sim
+        // populates DDM from an earlier inventory snapshot); when it
+        // is, fill gaps from inventory rather than treat the partial
+        // result as authoritative.
+        let mut mappings = match switch_zone_client {
+            Some(switch_zone_client) => self
+                .fetch_sled_mapping_from_ddm(
+                    opctx,
+                    switch_zone_client,
+                    &backplane_map,
+                    &sleds,
+                )
+                .await
+                .unwrap_or_else(|e| {
+                    debug!(
+                        opctx.log,
+                        "DDM peer resolution unavailable, relying on inventory";
+                        "error" => %e,
+                    );
+                    HashMap::new()
+                }),
+            None => HashMap::new(),
+        };
+        let mut drift_count = 0usize;
+
+        if mappings.len() < sleds.len() {
+            debug!(
                 opctx.log,
-                "failed to refresh sled mapping cache, using stale data";
-                "sled_id" => %sled_id,
-                "error" => %e
+                "supplementing DDM-derived mapping with inventory fallback";
+                "in_service_sleds" => sleds.len(),
+                "ddm_mapped_sleds" => mappings.len(),
             );
-            // Try cache again even with stale data
-            if let Some(port_configs) = self.check_sled_cache(sled_id).await {
-                return Ok(port_configs);
+            // If inventory itself fails, keep whatever DDM gave us.
+            // Discarding the partial DDM map on inventory failure would
+            // strand all members for this pass when DDM had useful data
+            // we could have used. Next pass retries.
+            match self
+                .fetch_sled_mapping_from_inventory(
+                    opctx,
+                    dataplane_client,
+                    backplane_map,
+                    &sleds,
+                )
+                .await
+            {
+                Ok(inventory_map) => {
+                    // Surface inventory-vs-DDM drift signals before
+                    // merging. (a) DDM-only: DDM lists a sled missing
+                    // from the latest inventory collection, typical
+                    // when inventory hasn't caught up to a
+                    // freshly-attached sled. (b) Disagreement: both
+                    // have the sled but with different port info; DDM
+                    // wins (live state), but the inventory lag is
+                    // worth flagging.
+                    //
+                    // TODO: surface this drift as an observability
+                    // signal rather than reconciliation pass logs.
+                    for (sled_id, ddm_ports) in &mappings {
+                        match inventory_map.get(sled_id) {
+                            None => info!(
+                                opctx.log,
+                                "DDM is ahead of inventory, as sled in DDM peers but not in latest inventory";
+                                "sled_id" => %sled_id,
+                            ),
+                            Some(inv_ports) if inv_ports != ddm_ports => {
+                                warn!(
+                                    opctx.log,
+                                    "DDM and inventory disagree on sled port mapping, preferring DDM";
+                                    "sled_id" => %sled_id,
+                                );
+                                drift_count += 1;
+                            }
+                            Some(_) => {}
+                        }
+                    }
+
+                    for (sled_id, ports) in inventory_map {
+                        mappings.entry(sled_id).or_insert(ports);
+                    }
+                }
+                Err(e) => {
+                    warn!(
+                        opctx.log,
+                        "inventory fallback failed, proceeding with partial DDM map";
+                        "ddm_mapped_sleds" => mappings.len(),
+                        "in_service_sleds" => sleds.len(),
+                        "error" => %e,
+                    );
+                }
             }
-            // If cache refresh failed and no stale data, propagate error
-            return Err(e.context("failed to refresh sled mapping cache and no cached data available"));
         }
 
-        // Try cache again after successful refresh
-        if let Some(port_configs) = self.check_sled_cache(sled_id).await {
-            return Ok(port_configs);
-        }
+        Ok(SledPortMap {
+            sled_to_ports: mappings,
+            ddm_inventory_drift: drift_count,
+        })
+    }
 
-        // Sled not found after successful cache refresh. We treat this as an error
-        // so callers can surface this condition rather than silently applying
-        // no changes.
-        Err(anyhow::Error::msg(format!(
-            "failed to resolve sled to switch ports: \
-             sled {sled_id} not found in mapping cache (not a scrimlet or removed)"
-        )))
+    /// Look up switch ports for a sled in the reconciliation pass mapping.
+    fn resolve_sled_to_switch_ports(
+        sled_to_ports: &HashMap<SledUuid, Vec<SwitchBackplanePort>>,
+        sled_id: SledUuid,
+    ) -> Result<Vec<SwitchBackplanePort>, anyhow::Error> {
+        sled_to_ports.get(&sled_id).cloned().ok_or_else(|| {
+            anyhow::Error::msg(format!(
+                "sled {sled_id} not found in reconciliation pass sled \
+                 mapping (not in DDM peers or inventory)"
+            ))
+        })
     }
 
     /// Find SP in inventory for a given sled's baseboard.
@@ -2414,8 +2609,8 @@ impl MulticastGroupReconciler {
         sp_slot: u32,
         backplane_map: &BackplaneMap,
     ) -> Result<Option<Vec<SwitchBackplanePort>>, anyhow::Error> {
-        let port_id = dpd_client::types::PortId::Rear(
-            dpd_client::types::Rear::try_from(format!("rear{sp_slot}"))
+        let port_id = PortId::Rear(
+            Rear::try_from(format!("rear{sp_slot}"))
                 .context("invalid rear port number")?,
         );
 
@@ -2443,8 +2638,8 @@ impl MulticastGroupReconciler {
 
         Ok(Some(vec![SwitchBackplanePort {
             port_id,
-            link_id: dpd_client::types::LinkId(0),
-            direction: dpd_client::types::Direction::Underlay,
+            link_id: LinkId(0),
+            direction: Direction::Underlay,
         }]))
     }
 
@@ -2514,12 +2709,95 @@ impl MulticastGroupReconciler {
     ///
     /// Where `entry.cubby` is the physical cubby/slot number (same as our `sp_slot`),
     /// and this maps it to a `PortId::Rear` that DPD can program on the Tofino ASIC.
-    async fn refresh_sled_mapping_cache(
+    /// Fetch the sled-to-port mapping from DDM peer topology.
+    ///
+    /// DDM peers provide live sled-to-port mapping via the `if_name`
+    /// field (e.g., `"tfportrear0_0"`, `"tfportqsfp0_0"`). More current
+    /// than inventory.
+    ///
+    /// Joins active DDM peers (by IPv6 address) against the in-service
+    /// sled list and parses each peer's `tfport<port_id>_<link>`
+    /// interface name into a [`SwitchBackplanePort`]. Any DPD port
+    /// variant (rear, qsfp, ...) is supported; direction is derived
+    /// from the port kind. Parsed `PortId`s are cross-validated against
+    /// the DPD backplane map: peers whose port is unknown to DPD are
+    /// dropped, so the prefix shape (`tfport`) is just a tokenizer and
+    /// correctness rides on DPD's authoritative port enumeration.
+    async fn fetch_sled_mapping_from_ddm(
+        &self,
+        opctx: &OpContext,
+        switch_zone_client: &MulticastSwitchZoneClient,
+        backplane_map: &BackplaneMap,
+        sleds: &[Sled],
+    ) -> Result<HashMap<SledUuid, Vec<SwitchBackplanePort>>, anyhow::Error>
+    {
+        let peers = switch_zone_client
+            .get_ddm_peers()
+            .await
+            .context("failed to get DDM peers")?;
+
+        let addr_to_sled: HashMap<Ipv6Addr, SledUuid> = sleds
+            .iter()
+            .map(|sled| (sled.ip(), SledUuid::from(sled.id())))
+            .collect();
+
+        let mappings: HashMap<SledUuid, Vec<SwitchBackplanePort>> = peers
+            .iter()
+            .filter(|p| {
+                matches!(
+                    p.status,
+                    omicron_ddm_admin_client::types::PeerStatus::Active
+                )
+            })
+            .filter_map(|p| {
+                let if_name = p.if_name.as_ref()?;
+                let sled_id = *addr_to_sled.get(&p.addr)?;
+                let port = parse_ddm_if_name_to_port(if_name)?;
+                if !backplane_map.contains_key(&port.port_id) {
+                    debug!(
+                        opctx.log,
+                        "dropping DDM peer: port_id not in DPD backplane map";
+                        "if_name" => %if_name,
+                        "port_id" => %port.port_id,
+                    );
+                    return None;
+                }
+                Some((sled_id, port))
+            })
+            .fold(HashMap::new(), |mut acc, (sled_id, port)| {
+                acc.entry(sled_id).or_default().push(port);
+                acc
+            });
+
+        if mappings.is_empty() {
+            return Err(anyhow::Error::msg(
+                "no sled-to-port mappings resolved from DDM peers",
+            ));
+        }
+
+        debug!(
+            opctx.log,
+            "fetched sled mapping from DDM peers";
+            "mapped_sleds" => mappings.len(),
+        );
+
+        Ok(mappings)
+    }
+
+    /// Fetch the sled-to-port mapping from inventory (fallback).
+    ///
+    /// Used when DDM peer topology is unavailable. Joins the latest
+    /// inventory collection's SP records against the in-service sled
+    /// list, validating each `sp_slot` against the DPD backplane map
+    /// passed in by [`Self::build_sled_port_map`].
+    async fn fetch_sled_mapping_from_inventory(
         &self,
         opctx: &OpContext,
         dataplane_client: &MulticastDataplaneClient,
-    ) -> Result<(), anyhow::Error> {
-        // Fetch required data
+        mut backplane_map: BackplaneMap,
+        sleds: &[Sled],
+    ) -> Result<HashMap<SledUuid, Vec<SwitchBackplanePort>>, anyhow::Error>
+    {
         let inventory = self
             .datastore
             .inventory_get_latest_collection(opctx)
@@ -2529,21 +2807,11 @@ impl MulticastGroupReconciler {
                 anyhow::Error::msg("no inventory collection available")
             })?;
 
-        // First attempt with current backplane map
-        let mut backplane_map =
-            self.fetch_backplane_map(opctx, dataplane_client).await?;
-
-        let sleds = self
-            .datastore
-            .sled_list_all_batched(opctx, SledFilter::InService)
-            .await
-            .context("failed to list in-service sleds for inventory mapping")?;
-
-        // Build sled → port mappings
-        let (mut mappings, mut validation_failures) = self
-            .build_sled_mappings(opctx, &sleds, &inventory, &backplane_map)?;
+        let (mut mappings, mut validation_failures) =
+            self.build_sled_mappings(opctx, sleds, &inventory, &backplane_map)?;
 
-        // If we had validation failures, invalidate backplane cache and retry once
+        // Validation failures may indicate stale backplane data, so we refresh
+        // and retry once before reporting.
         if validation_failures > 0 {
             info!(
                 opctx.log,
@@ -2551,10 +2819,8 @@ impl MulticastGroupReconciler {
                 "validation_failures" => validation_failures
             );
 
-            // Invalidate the backplane cache
             self.invalidate_backplane_cache().await;
 
-            // Fetch fresh backplane map
             backplane_map = self
                 .fetch_backplane_map(opctx, dataplane_client)
                 .await
@@ -2562,7 +2828,6 @@ impl MulticastGroupReconciler {
                     "failed to fetch fresh backplane map after invalidation",
                 )?;
 
-            // Retry mapping with fresh backplane data
             (mappings, validation_failures) = self.build_sled_mappings(
                 opctx,
                 &sleds,
@@ -2570,7 +2835,6 @@ impl MulticastGroupReconciler {
                 &backplane_map,
             )?;
 
-            // Log sleds that still fail with fresh backplane data
             if validation_failures > 0 {
                 warn!(
                     opctx.log,
@@ -2580,16 +2844,11 @@ impl MulticastGroupReconciler {
             }
         }
 
-        // Update cache
         let sled_count = mappings.len();
-        let mut cache = self.sled_mapping_cache.write().await;
-        *cache = (Instant::now(), mappings);
-
-        // Log results
         if validation_failures > 0 {
             warn!(
                 opctx.log,
-                "sled mapping cache refreshed with validation failures";
+                "fetched sled mapping from inventory with validation failures";
                 "total_sleds" => sleds.len(),
                 "mapped_sleds" => sled_count,
                 "validation_failures" => validation_failures
@@ -2597,31 +2856,42 @@ impl MulticastGroupReconciler {
         } else {
             info!(
                 opctx.log,
-                "sled mapping cache refreshed successfully";
+                "fetched sled mapping from inventory";
                 "total_sleds" => sleds.len(),
                 "mapped_sleds" => sled_count
             );
         }
 
-        Ok(())
+        Ok(mappings)
     }
 
     /// Cleanup a member that is marked for deletion (time_deleted set).
+    ///
+    /// This includes unsubscribing a member from its VMM, removing
+    /// it from the dataplane, and hard-deleting the DB row.
     async fn cleanup_deleted_member(
         &self,
-        opctx: &OpContext,
-        group: &MulticastGroup,
-        member: &MulticastGroupMember,
-        dataplane_client: &MulticastDataplaneClient,
+        ctx: &MemberReconcileCtx<'_>,
     ) -> Result<(), anyhow::Error> {
+        let MemberReconcileCtx { opctx, group, member, sled_client, .. } = ctx;
+        // Unsubscribe from sled-agent (best-effort, VMM may be gone).
+        if let Some(sled_id) = member.sled_id {
+            if let Err(e) = sled_client
+                .unsubscribe_instance(opctx, group, member, sled_id.into())
+                .await
+            {
+                debug!(
+                    opctx.log,
+                    "failed to unsubscribe instance during member cleanup";
+                    "member_id" => %member.id,
+                    "sled_id" => %sled_id,
+                    "error" => %e
+                );
+            }
+        }
+
         // Use the consolidated cleanup helper with strict error handling
-        self.cleanup_member_from_dataplane(
-            opctx,
-            group,
-            member,
-            dataplane_client,
-        )
-        .await
+        self.cleanup_member_from_dataplane(ctx).await
     }
 
     /// Get all multicast groups that need member reconciliation.
@@ -2642,3 +2912,88 @@ impl MulticastGroupReconciler {
             )
     }
 }
+
+/// Parse a DDM peer interface name (e.g., `"tfportrear0_0"`) into a
+/// `SwitchBackplanePort` for sled-bound multicast member programming.
+///
+/// The DDM peer `if_name` follows `tfport<port_id>_<link>`, where
+/// `<port_id>` is a DPD-recognized port name. This parser deliberately
+/// rejects any non-rear `PortId`. In production, a sled's only
+/// physical path to a switch is the rack backplane.
+///
+/// TODO: Egress (uplink) members are not yet implemented. When they
+/// land, they will come from group-level configuration applied
+/// directly via DPD rather than from DDM peer discovery. See the
+/// `TODO` in [`MulticastGroupReconciler::add_member_to_dataplane`].
+fn parse_ddm_if_name_to_port(if_name: &str) -> Option<SwitchBackplanePort> {
+    use std::str::FromStr;
+
+    let stripped = if_name.strip_prefix("tfport")?;
+    let (port_str, link_str) = stripped.rsplit_once('_')?;
+
+    let port_id = PortId::from_str(port_str).ok()?;
+    let PortId::Rear(_) = port_id else {
+        return None;
+    };
+    let link_id = LinkId(link_str.parse::<u8>().ok()?);
+
+    Some(SwitchBackplanePort {
+        port_id,
+        link_id,
+        direction: Direction::Underlay,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_valid_rear_port() {
+        let port = parse_ddm_if_name_to_port("tfportrear0_0").unwrap();
+        assert_eq!(
+            port.port_id,
+            PortId::Rear(Rear::try_from("rear0".to_string()).unwrap())
+        );
+        assert_eq!(port.link_id, LinkId(0));
+        assert_eq!(port.direction, Direction::Underlay);
+    }
+
+    #[test]
+    fn parse_higher_port_number() {
+        let port = parse_ddm_if_name_to_port("tfportrear31_0").unwrap();
+        assert_eq!(
+            port.port_id,
+            PortId::Rear(Rear::try_from("rear31".to_string()).unwrap())
+        );
+    }
+
+    #[test]
+    fn parse_nonzero_link() {
+        let port = parse_ddm_if_name_to_port("tfportrear5_2").unwrap();
+        assert_eq!(port.link_id, LinkId(2));
+    }
+
+    #[test]
+    fn parse_non_rear_port_returns_none() {
+        // Sleds only attach via rear ports; reject other variants.
+        assert!(parse_ddm_if_name_to_port("tfportqsfp0_0").is_none());
+    }
+
+    #[test]
+    fn parse_invalid_prefix_returns_none() {
+        assert!(parse_ddm_if_name_to_port("eth0").is_none());
+        assert!(parse_ddm_if_name_to_port("").is_none());
+    }
+
+    #[test]
+    fn parse_missing_underscore_returns_none() {
+        assert!(parse_ddm_if_name_to_port("tfportrear0").is_none());
+    }
+
+    #[test]
+    fn parse_non_numeric_returns_none() {
+        assert!(parse_ddm_if_name_to_port("tfportrearX_0").is_none());
+        assert!(parse_ddm_if_name_to_port("tfportrear0_Y").is_none());
+    }
+}
diff --git a/nexus/src/app/background/tasks/multicast/mod.rs b/nexus/src/app/background/tasks/multicast/mod.rs
index 8f592a41087..2f6ff6c0a0d 100644
--- a/nexus/src/app/background/tasks/multicast/mod.rs
+++ b/nexus/src/app/background/tasks/multicast/mod.rs
@@ -32,6 +32,8 @@
 //! - Dataplane state convergence
 //! - Group and Member state checks and transitions ("Joining" → "Joined" → "Left")
 //! - Drift detection and correction
+//! - Switch zone coordination: MRIB route programming through MGD,
+//!   peer topology lookups from DDM
 //! - Cleanup of orphaned resources
 //!
 //! ## Multicast Group Architecture
@@ -84,7 +86,7 @@
 //!   - Unlike linear probing (`h + i`), scattered outputs avoid clustering
 //! - **8-bit salt**: 256 unique underlay addresses per external IP
 //! - **Resolution**: Exhaustion requires 256 other groups to occupy exactly
-//!   those 256 scattered addresses—effectively impossible in 2^64 space
+//!   those 256 scattered addresses, effectively impossible in 2^64 space
 //!
 //! ### Forwarding Architecture (Incoming multicast traffic to guests)
 //!
@@ -105,7 +107,33 @@
 //! - **Group lifecycle**: "Creating" → "Active" → "Deleting" → hard-deleted
 //! - **Member lifecycle**: "Joining" → "Joined" → "Left" → soft-deleted → hard-deleted
 //! - **Dataplane updates**: DPD API calls for P4 table updates
-//! - **Topology mapping**: Sled-to-switch-port resolution (with caching)
+//! - **MRIB programming**: multicast routing entries written through
+//!   MGD, diffed against a per-pass snapshot and withdrawn when no
+//!   "Joined" members remain so DDM peers stop sending traffic
+//! - **Sled propagation**: M2P mappings and forwarding entries pushed to sled-agents
+//! - **OPTE subscriptions**: Per-instance multicast group subscriptions
+//!   on target sleds (keyed at the sled by the active VMM's propolis-id)
+//! - **Topology mapping**: Per-pass sled-to-switch-port resolution from
+//!   DDM peers (primary) or inventory + DPD backplane (fallback)
+//!
+//! ## RPW Saga Coordination
+//!
+//! The reconciler launches sagas for transactional operations
+//! (e.g. external+underlay group ensure). By default sagas retry
+//! independently and the next reconciler tick observes the resulting
+//! state.
+//!
+//! For group creation, the reconciler instead drains saga completion
+//! within the same pass so [`reconcile_member_states`] and
+//! [`reconcile_active_groups`] can converge in one tick. The motivation
+//! is operator-visible latency: members see multicast settle within a
+//! single reconciler interval of joining, rather than waiting an
+//! additional tick for the saga's effects to be observed. The drain is
+//! bounded by the enclosing `buffer_unordered` concurrency, so multiple
+//! groups still progress in parallel.
+//!
+//! This mirrors the `saga_run` + drain pattern used by
+//! [`instance_reincarnation`] and [`instance_updater`].
 //!
 //! ## Deletion Semantics: Groups vs Members
 //!
@@ -126,9 +154,13 @@
 //!   - Cleanup task eventually hard-deletes the row
 //!
 //! [RFC 7346]: https://www.rfc-editor.org/rfc/rfc7346
+//! [`UNDERLAY_MULTICAST_SUBNET`]: omicron_common::address::UNDERLAY_MULTICAST_SUBNET
+//! [`reconcile_member_states`]: MulticastGroupReconciler::reconcile_member_states
+//! [`reconcile_active_groups`]: MulticastGroupReconciler::reconcile_active_groups
+//! [`instance_reincarnation`]: crate::app::background::tasks::instance_reincarnation
+//! [`instance_updater`]: crate::app::background::tasks::instance_updater
 
-use std::collections::{BTreeMap, HashMap};
-use std::net::{IpAddr, Ipv6Addr};
+use std::collections::BTreeMap;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 
@@ -138,27 +170,21 @@ use internal_dns_resolver::Resolver;
 use serde_json::json;
 use slog::{error, info};
 use tokio::sync::RwLock;
-use tokio::sync::watch::Receiver;
 
 use nexus_db_model::MulticastGroup;
 use nexus_db_queries::context::OpContext;
 use nexus_db_queries::db::DataStore;
 use nexus_types::internal_api::background::MulticastGroupReconcilerStatus;
-use nexus_types::inventory::{Collection, SpType};
-use omicron_common::address::UNDERLAY_MULTICAST_SUBNET;
-use omicron_uuid_kinds::SledUuid;
-use sled_hardware_types::BaseboardId;
 
 use crate::app::background::BackgroundTask;
 use crate::app::multicast::dataplane::MulticastDataplaneClient;
+use crate::app::multicast::sled::MulticastSledClient;
+use crate::app::multicast::switch_zone::MulticastSwitchZoneClient;
 use crate::app::saga::StartSaga;
 
 pub(crate) mod groups;
 pub(crate) mod members;
-
-/// Type alias for the sled mapping cache.
-type SledMappingCache =
-    Arc<RwLock<(Instant, HashMap<SledUuid, Vec<SwitchBackplanePort>>)>>;
+mod mrib;
 
 /// Type alias for the backplane map cache.
 type BackplaneMapCache = Arc<
@@ -187,7 +213,7 @@ pub(crate) enum StateTransition {
 }
 
 /// Switch port configuration for multicast group members.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct SwitchBackplanePort {
     /// Switch port ID
     pub port_id: dpd_client::types::PortId,
@@ -203,12 +229,6 @@ pub(crate) struct MulticastGroupReconciler {
     datastore: Arc<DataStore>,
     resolver: Resolver,
     sagas: Arc<dyn StartSaga>,
-    /// Receiver for inventory updates from the inventory loader background task.
-    rx_inventory: Receiver<Option<Arc<Collection>>>,
-    /// Cache for sled-to-backplane-port mappings.
-    /// Maps sled_id → rear backplane ports for multicast traffic routing.
-    sled_mapping_cache: SledMappingCache,
-    sled_cache_ttl: Duration,
     /// Cache for backplane hardware topology from DPD.
     /// Maps PortId → BackplaneLink for platform-specific port validation.
     backplane_map_cache: BackplaneMapCache,
@@ -219,12 +239,6 @@ pub(crate) struct MulticastGroupReconciler {
     group_concurrency_limit: usize,
     /// Whether multicast functionality is enabled (or not).
     enabled: bool,
-    /// Last seen sled baseboard→sp_slot mappings for cache invalidation.
-    ///
-    /// We track sled locations (keyed by baseboard identity), as sled
-    /// physical locations rarely change. Caches are only invalidated
-    /// when `sp_slot` values differ.
-    last_seen_sled_slots: HashMap<Arc<BaseboardId>, u16>,
 }
 
 impl MulticastGroupReconciler {
@@ -232,27 +246,18 @@ impl MulticastGroupReconciler {
         datastore: Arc<DataStore>,
         resolver: Resolver,
         sagas: Arc<dyn StartSaga>,
-        rx_inventory: Receiver<Option<Arc<Collection>>>,
         enabled: bool,
-        sled_cache_ttl: Duration,
         backplane_cache_ttl: Duration,
     ) -> Self {
         Self {
             datastore,
             resolver,
             sagas,
-            rx_inventory,
-            sled_mapping_cache: Arc::new(RwLock::new((
-                Instant::now(),
-                HashMap::new(),
-            ))),
-            sled_cache_ttl,
             backplane_map_cache: Arc::new(RwLock::new(None)),
             backplane_cache_ttl,
             member_concurrency_limit: 100,
             group_concurrency_limit: 100,
             enabled,
-            last_seen_sled_slots: HashMap::new(),
         }
     }
 
@@ -273,174 +278,6 @@ impl MulticastGroupReconciler {
         let mut cache = self.backplane_map_cache.write().await;
         *cache = None; // Clear the cache entirely
     }
-
-    /// Invalidate the sled mapping cache, forcing refresh on next access.
-    ///
-    /// Called when:
-    /// - Backplane topology changes detected (different port count/layout)
-    /// - Need to re-validate sled mappings against new topology
-    pub(crate) async fn invalidate_sled_mapping_cache(&self) {
-        let mut cache = self.sled_mapping_cache.write().await;
-        // Set timestamp to past to force refresh on next check
-        *cache = (Instant::now() - self.sled_cache_ttl, cache.1.clone());
-    }
-
-    /// Check if sled locations changed and invalidate caches if so.
-    ///
-    /// Compares actual serial→sp_slot mappings since sled locations rarely
-    /// change. Uses the inventory watch channel for cheap access to latest
-    /// inventory.
-    async fn check_sled_locations_for_cache_invalidation(
-        &mut self,
-        opctx: &OpContext,
-    ) {
-        // Get inventory from watch channel (cheap Arc::clone, no DB query)
-        let Some(inventory) =
-            self.rx_inventory.borrow_and_update().as_ref().map(Arc::clone)
-        else {
-            debug!(
-                opctx.log,
-                "skipping cache invalidation check: no inventory available"
-            );
-            return;
-        };
-
-        // Build current baseboard→sp_slot mapping for sleds only
-        let current_sled_slots: HashMap<Arc<BaseboardId>, u16> = inventory
-            .sps
-            .iter()
-            .filter(|(_, sp)| sp.sp_type == SpType::Sled)
-            .map(|(baseboard, sp)| (Arc::clone(baseboard), sp.sp_slot))
-            .collect();
-
-        if current_sled_slots != self.last_seen_sled_slots {
-            // Skip invalidation on first run (just initializing)
-            if !self.last_seen_sled_slots.is_empty() {
-                info!(
-                    opctx.log,
-                    "invalidating multicast caches due to sled location change";
-                    "previous_sled_count" => self.last_seen_sled_slots.len(),
-                    "current_sled_count" => current_sled_slots.len()
-                );
-                self.invalidate_backplane_cache().await;
-                self.invalidate_sled_mapping_cache().await;
-            }
-            self.last_seen_sled_slots = current_sled_slots;
-        }
-    }
-}
-
-/// Maps an external multicast address to an underlay address in ff04::/64.
-///
-/// Maps external addresses into [`UNDERLAY_MULTICAST_SUBNET`] (ff04::/64,
-/// a subset of the admin-local scope ff04::/16 per RFC 7346) using XOR-fold. This prefix is static
-/// for consistency across racks.
-///
-/// See [RFC 7346] for IPv6 multicast admin-local scope.
-///
-/// # Salt Parameter (Collision Avoidance)
-///
-/// The `salt` enables collision avoidance via XOR perturbation. XOR is bijective:
-/// distinct salts produce distinct outputs (since `a ⊕ b = a ⊕ c` implies `b = c`),
-/// guaranteeing 256 unique addresses per external IP.
-///
-/// This is mathematically equivalent to [binary probing] in hash table literature
-/// (`h_i(x) := h(x) ⊕ i`), though the domain context differs in that we're mapping
-/// into a sparse 2^64 IPv6 address space rather than probing array slots.
-///
-/// ```text
-/// Salt perturbation example (h = 0xa):
-/// ┌──────┬─────────┬────────┐
-/// │ salt │ h ⊕ salt│ output │
-/// ├──────┼─────────┼────────┤
-/// │  0   │ 0xa ⊕ 0 │  0xa   │
-/// │  1   │ 0xa ⊕ 1 │  0xb   │
-/// │  2   │ 0xa ⊕ 2 │  0x8   │
-/// │  3   │ 0xa ⊕ 3 │  0x9   │
-/// │  4   │ 0xa ⊕ 4 │  0xe   │
-/// │  5   │ 0xa ⊕ 5 │  0xf   │
-/// │  6   │ 0xa ⊕ 6 │  0xc   │
-/// │  7   │ 0xa ⊕ 7 │  0xd   │
-/// └──────┴─────────┴────────┘
-/// Outputs: [a, b, 8, 9, e, f, c, d] — scattered, not sequential
-/// ```
-///
-/// On collision (i.e., underlay IP already in use), we increment salt and retry.
-/// This stores the successful salt with the group for deterministic
-/// reconstruction.
-///
-/// # Implementation
-///
-/// ```text
-/// underlay_ip = ff04:: | ((xor_fold(external_ip) ⊕ salt) & HOST_MASK)
-/// ```
-///
-/// - IPv4: embedded directly (32 bits fits in 64-bit host space)
-/// - IPv6: XOR upper and lower 64-bit halves to fold 128→64 bits
-/// - Salt ∈ [0, 255]: XORed into host bits for collision retry
-///
-/// The `& HOST_MASK` guarantees the result stays within ff04::/64, our static
-/// underlay subnet.
-///
-/// [RFC 7346]: https://www.rfc-editor.org/rfc/rfc7346
-/// [binary probing]: https://courses.grainger.illinois.edu/CS473/fa2025/notes/05-hashing.pdf
-fn map_external_to_underlay_ip(external_ip: IpAddr, salt: u8) -> IpAddr {
-    // Derive constants from the default underlay multicast subnet
-    const HOST_BITS: u32 = 128 - UNDERLAY_MULTICAST_SUBNET.width() as u32;
-    let prefix_base =
-        u128::from_be_bytes(UNDERLAY_MULTICAST_SUBNET.addr().octets());
-
-    map_external_to_underlay_ip_impl(prefix_base, HOST_BITS, external_ip, salt)
-}
-
-/// Core implementation: maps external multicast IP to underlay IPv6 address.
-///
-/// Separated for testing purposes.
-///
-/// Parameters:
-/// - `prefix_base`: Network prefix as u128 (e.g., ff04:: → 0xff04_0000_...)
-/// - `host_bits`: Number of host bits (e.g., 64 for a /64 prefix)
-/// - `external_ip`: The external multicast address to map
-/// - `salt`: XOR perturbation for collision avoidance (0-255)
-///
-/// Returns: The mapped underlay IPv6 address
-fn map_external_to_underlay_ip_impl(
-    prefix_base: u128,
-    host_bits: u32,
-    external_ip: IpAddr,
-    salt: u8,
-) -> IpAddr {
-    let host_mask: u128 =
-        if host_bits >= 128 { u128::MAX } else { (1u128 << host_bits) - 1 };
-
-    // Derive host value from external IP
-    let host_value: u128 = match external_ip {
-        IpAddr::V4(ipv4) => {
-            // IPv4 (32 bits) fits directly in host space
-            u128::from(u32::from_be_bytes(ipv4.octets()))
-        }
-        IpAddr::V6(ipv6) => {
-            // XOR-fold 128 bits → host_bits (upper ^ lower).
-            // This ensures different external addresses (even with identical
-            // lower bits but different scopes) map to different underlay IPs.
-            let full = u128::from_be_bytes(ipv6.octets());
-            if host_bits >= 128 {
-                full
-            } else {
-                (full >> host_bits) ^ (full & host_mask)
-            }
-        }
-    };
-
-    // XOR salt for collision avoidance retry, masked to stay in host bits.
-    // The salt is applied after folding, ensuring different salts produce
-    // different underlay IPs while staying within the prefix.
-    let salted = (host_value ^ u128::from(salt)) & host_mask;
-
-    // Combine prefix + host (masking guarantees result stays in prefix)
-    let underlay = prefix_base | salted;
-
-    IpAddr::V6(Ipv6Addr::from(underlay.to_be_bytes()))
 }
 
 impl BackgroundTask for MulticastGroupReconciler {
@@ -513,7 +350,23 @@ impl MulticastGroupReconciler {
 
         trace!(opctx.log, "starting multicast reconciliation pass");
 
-        self.check_sled_locations_for_cache_invalidation(opctx).await;
+        // Per-pass client construction policy:
+        //
+        // - DPD (dataplane): fail-closed. Required by every step. A
+        //   pass without DPD has nothing useful to do.
+        // - sled-agent: never fails. The wrapper builds per-sled
+        //   clients on demand, so construction is infallible.
+        // - MGD MRIB: fail-open. Only three steps are MRIB-coupled
+        //   (member states, active reconciliation, deleting
+        //   reconciliation). Creating-group reconciliation and the two
+        //   cleanup steps run regardless. Subsequent passes retry the
+        //   gated steps when MRIB returns.
+        //
+        // The non-gated cleanup steps never touch the dataplane.
+        // `cleanup_empty_groups` only marks "Deleting", and the terminal
+        // "Deleting" → "Deleted" transition lives in the gated
+        // `reconcile_deleting_groups`. A group therefore cannot vanish
+        // from the reconciler's view while its MRIB route still exists.
 
         // Create dataplane client (across switches) once for the entire
         // reconciliation pass (in case anything has changed)
@@ -533,6 +386,35 @@ impl MulticastGroupReconciler {
             }
         };
 
+        // Create sled-agent client for OPTE subscriptions and
+        // M2P/forwarding propagation.
+        let sled_client = MulticastSledClient::new(
+            self.datastore.clone(),
+            self.resolver.clone(),
+        );
+
+        // Create MGD MRIB client for multicast route distribution
+        // via DDM. `mg-lower` syncs MRIB changes to DDM automatically.
+        //
+        // Construction failure (e.g., transient DNS resolution returning
+        // no switch zones) skips MRIB-coupled work this pass but lets
+        // creating-group and cleanup paths progress. Subsequent passes
+        // will retry.
+        let switch_zone_client = match MulticastSwitchZoneClient::new(
+            self.resolver.clone(),
+            opctx.log.clone(),
+        )
+        .await
+        {
+            Ok(client) => Some(client),
+            Err(e) => {
+                let msg =
+                    format!("failed to create multicast MRIB client: {e:#}");
+                status.errors.push(msg);
+                None
+            }
+        };
+
         // Process creating groups
         match self.reconcile_creating_groups(opctx).await {
             Ok(count) => status.groups_created += count,
@@ -542,9 +424,25 @@ impl MulticastGroupReconciler {
             }
         }
 
-        // Process member state changes
-        match self.reconcile_member_states(opctx, &dataplane_client).await {
-            Ok(count) => status.members_processed += count,
+        // Process member state changes. The switch-zone client is optional.
+        // When absent, the per-pass sled-to-port map skips the DDM primary
+        // path and uses the inventory fallback. DB-only transitions
+        // ("Joining" → "Left") converge regardless. "Joining" → "Joined"
+        // transitions for stopped/migrating instances retries on the next pass
+        // once MGD/DDM are reachable.
+        match self
+            .reconcile_member_states(
+                opctx,
+                &dataplane_client,
+                &sled_client,
+                switch_zone_client.as_ref(),
+            )
+            .await
+        {
+            Ok(counts) => {
+                status.members_processed += counts.processed;
+                status.ddm_inventory_drift += counts.ddm_inventory_drift;
+            }
             Err(e) => {
                 let msg = format!("failed to reconcile member states: {e:#}");
                 status.errors.push(msg);
@@ -573,22 +471,48 @@ impl MulticastGroupReconciler {
             }
         }
 
-        // Reconcile active groups (verify state, update dataplane as needed)
-        match self.reconcile_active_groups(opctx, &dataplane_client).await {
-            Ok(count) => status.groups_verified += count,
-            Err(e) => {
-                let msg = format!("failed to reconcile active groups: {e:#}");
-                status.errors.push(msg);
+        // Reconcile active groups
+        if let Some(switch_zone_client) = &switch_zone_client {
+            match self
+                .reconcile_active_groups(
+                    opctx,
+                    &dataplane_client,
+                    &sled_client,
+                    switch_zone_client,
+                )
+                .await
+            {
+                Ok(count) => status.groups_verified += count,
+                Err(e) => {
+                    let msg =
+                        format!("failed to reconcile active groups: {e:#}");
+                    status.errors.push(msg);
+                }
             }
+        } else {
+            status.skipped.push("reconcile_active_groups".to_string());
         }
 
-        // Process deleting groups (DPD cleanup + hard-delete from DB)
-        match self.reconcile_deleting_groups(opctx, &dataplane_client).await {
-            Ok(count) => status.groups_deleted += count,
-            Err(e) => {
-                let msg = format!("failed to reconcile deleting groups: {e:#}");
-                status.errors.push(msg);
+        // Process deleting groups
+        if let Some(switch_zone_client) = &switch_zone_client {
+            match self
+                .reconcile_deleting_groups(
+                    opctx,
+                    &dataplane_client,
+                    &sled_client,
+                    switch_zone_client,
+                )
+                .await
+            {
+                Ok(count) => status.groups_deleted += count,
+                Err(e) => {
+                    let msg =
+                        format!("failed to reconcile deleting groups: {e:#}");
+                    status.errors.push(msg);
+                }
             }
+        } else {
+            status.skipped.push("reconcile_deleting_groups".to_string());
         }
 
         trace!(
@@ -609,11 +533,12 @@ impl MulticastGroupReconciler {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-
     use std::collections::HashSet;
-    use std::net::{Ipv4Addr, Ipv6Addr};
+    use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
 
+    use crate::app::multicast::{
+        map_external_to_underlay_ip, map_external_to_underlay_ip_impl,
+    };
     use ipnet::Ipv6Net;
     use omicron_common::address::IPV6_ADMIN_SCOPED_MULTICAST_PREFIX;
 
diff --git a/nexus/src/app/background/tasks/multicast/mrib.rs b/nexus/src/app/background/tasks/multicast/mrib.rs
new file mode 100644
index 00000000000..e2e621ad0e7
--- /dev/null
+++ b/nexus/src/app/background/tasks/multicast/mrib.rs
@@ -0,0 +1,186 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! MRIB route reconciliation for active multicast groups.
+//!
+//! This diffs the desired switch MRIB state, derived from group, member, and
+//! source filter records, against a per-pass snapshot fetched by the
+//! caller, then issues add/remove RPCs to converge. Best-effort:
+//! failures are logged and retried on the next reconciler pass.
+
+use std::collections::HashSet;
+use std::net::{IpAddr, Ipv6Addr};
+
+use slog::{debug, warn};
+use slog_error_chain::InlineErrorChain;
+use uuid::Uuid;
+
+use nexus_db_model::{MulticastGroup, MulticastGroupMemberState};
+use nexus_db_queries::context::OpContext;
+use nexus_db_queries::db::DataStore;
+use nexus_db_queries::db::datastore::multicast::members::SourceFilterState;
+use nexus_types::identity::Resource;
+use omicron_common::api::external::DataPageParams;
+use omicron_uuid_kinds::{GenericUuid, MulticastGroupUuid};
+
+use crate::app::multicast::switch_zone::{
+    MribRouteIndex, MulticastSwitchZoneClient,
+};
+
+/// Reconcile MRIB routes for a single active group against the per-pass
+/// switch snapshot. Withdraws routes when no "Joined" members remain so
+/// peer sleds stop sending traffic.
+pub(super) async fn reconcile_group(
+    opctx: &OpContext,
+    datastore: &DataStore,
+    switch_zone_client: &MulticastSwitchZoneClient,
+    mrib_route_index: Option<&MribRouteIndex>,
+    group: &MulticastGroup,
+    source_filter: &SourceFilterState,
+    underlay_group_id: Uuid,
+) {
+    let group_id = MulticastGroupUuid::from_untyped_uuid(group.id());
+
+    let members = match datastore
+        .multicast_group_members_list(
+            opctx,
+            group_id,
+            &DataPageParams::max_page(),
+        )
+        .await
+    {
+        Ok(m) => m,
+        Err(e) => {
+            warn!(
+                opctx.log,
+                "failed to list members for MRIB reconcile, skipping";
+                "group_id" => %group.id(),
+                "error" => InlineErrorChain::new(&e),
+            );
+            return;
+        }
+    };
+    let has_joined =
+        members.iter().any(|m| m.state == MulticastGroupMemberState::Joined);
+
+    let underlay_group = match datastore
+        .underlay_multicast_group_fetch(opctx, underlay_group_id)
+        .await
+    {
+        Ok(g) => g,
+        Err(e) => {
+            warn!(
+                opctx.log,
+                "failed to fetch underlay group for MRIB reconcile, skipping";
+                "group_id" => %group.id(),
+                "underlay_group_id" => %underlay_group_id,
+                "error" => InlineErrorChain::new(&e),
+            );
+            return;
+        }
+    };
+
+    let IpAddr::V6(underlay_ip) = underlay_group.multicast_ip.ip() else {
+        warn!(
+            opctx.log,
+            "underlay multicast group has non-IPv6 address";
+            "group_id" => %group.id(),
+            "underlay_ip" => %underlay_group.multicast_ip.ip(),
+        );
+        return;
+    };
+
+    converge_routes(
+        opctx,
+        switch_zone_client,
+        mrib_route_index,
+        group,
+        source_filter,
+        underlay_ip,
+        has_joined,
+    )
+    .await;
+}
+
+/// Diff the per-pass MRIB snapshot against the desired route set and
+/// issue add/remove RPCs to converge.
+async fn converge_routes(
+    opctx: &OpContext,
+    switch_zone_client: &MulticastSwitchZoneClient,
+    mrib_route_index: Option<&MribRouteIndex>,
+    group: &MulticastGroup,
+    source_filter: &SourceFilterState,
+    underlay_ip: Ipv6Addr,
+    has_joined: bool,
+) {
+    let group_ip = group.multicast_ip.ip();
+    let current = mrib_route_index
+        .and_then(|index| index.get(&group_ip))
+        .cloned()
+        .unwrap_or_default();
+    let current_sources = current.keys().copied().collect::<HashSet<_>>();
+    let desired: HashSet<Option<IpAddr>> = if has_joined {
+        source_filter
+            .specific_sources
+            .iter()
+            .map(|s| Some(*s))
+            .chain(source_filter.has_any_source_member.then_some(None))
+            .collect()
+    } else {
+        HashSet::new()
+    };
+
+    // Ensure desired routes exist.
+    for source in &desired {
+        let current_switches = current.get(source).cloned().unwrap_or_default();
+        if current_switches.len() == switch_zone_client.switch_count()
+            && current_switches.values().all(|c| *c == underlay_ip)
+        {
+            continue;
+        }
+        if let Err(e) =
+            switch_zone_client.add_route(group_ip, underlay_ip, *source).await
+        {
+            warn!(
+                opctx.log,
+                "failed to ensure MRIB route";
+                "group_id" => %group.id(),
+                "source" => ?source,
+                "error" => %e,
+            );
+        }
+    }
+
+    // Remove routes no longer desired. The per-pass snapshot lets us
+    // reconcile against current switch state without per-group RPCs.
+    for source in current_sources.difference(&desired) {
+        if let Err(e) = switch_zone_client.remove_route(group_ip, *source).await
+        {
+            warn!(
+                opctx.log,
+                "failed to remove stale MRIB route";
+                "group_id" => %group.id(),
+                "source" => ?source,
+                "error" => %e,
+            );
+        }
+    }
+
+    // Surface RPF flux for diagnostics. The route lands in `mrib_in`
+    // after `add_route` but only flows once promoted to `mrib_loc`.
+    for source in &desired {
+        if !switch_zone_client
+            .route_active_on_all_switches(group_ip, *source)
+            .await
+        {
+            debug!(
+                opctx.log,
+                "MRIB route not yet RPF-verified on all switches";
+                "group_id" => %group.id(),
+                "group_ip" => %group_ip,
+                "source" => ?source,
+            );
+        }
+    }
+}
diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
index d24a401b317..6792a4ff843 100644
--- a/nexus/src/app/instance.rs
+++ b/nexus/src/app/instance.rs
@@ -1007,12 +1007,11 @@ impl super::Nexus {
         {
             if let (InstanceStateChangeError::SledAgent(inner), Some(vmm)) =
                 (&e, state.vmm())
+                && inner.vmm_gone()
             {
-                if inner.vmm_gone() {
-                    let _ = self
-                        .mark_vmm_failed(opctx, authz_instance, vmm, inner)
-                        .await;
-                }
+                let _ = self
+                    .mark_vmm_failed(opctx, authz_instance, vmm, inner)
+                    .await;
             }
 
             return Err(e);
@@ -1097,20 +1096,6 @@ impl super::Nexus {
             )
             .await?;
 
-        // Update multicast member state for this instance to "Left" and clear
-        // `sled_id` - only if multicast is enabled
-        if self.multicast_enabled() {
-            self.db_datastore
-                .multicast_group_members_detach_by_instance(
-                    opctx,
-                    InstanceUuid::from_untyped_uuid(authz_instance.id()),
-                )
-                .await?;
-        }
-
-        // Activate multicast reconciler to handle switch-level changes
-        self.background_tasks.task_multicast_reconciler.activate();
-
         if let Err(e) = self
             .instance_request_state(
                 opctx,
@@ -1122,17 +1107,30 @@ impl super::Nexus {
         {
             if let (InstanceStateChangeError::SledAgent(inner), Some(vmm)) =
                 (&e, state.vmm())
+                && inner.vmm_gone()
             {
-                if inner.vmm_gone() {
-                    let _ = self
-                        .mark_vmm_failed(opctx, authz_instance, vmm, inner)
-                        .await;
-                }
+                let _ = self
+                    .mark_vmm_failed(opctx, authz_instance, vmm, inner)
+                    .await;
             }
 
             return Err(e);
         }
 
+        // Detach multicast members (state -> "Left", clear `sled_id`) only
+        // after sled-agent has acknowledged the Stop request. Doing it
+        // before the request would tear down M2P/forwarding for a guest
+        // that is still running if the request fails.
+        if self.multicast_enabled() {
+            self.db_datastore
+                .multicast_group_members_detach_by_instance(
+                    opctx,
+                    InstanceUuid::from_untyped_uuid(authz_instance.id()),
+                )
+                .await?;
+            self.background_tasks.task_multicast_reconciler.activate();
+        }
+
         self.db_datastore
             .instance_fetch_with_vmm(opctx, &authz_instance)
             .await
diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs
index 40c762b1c85..679a7b9f342 100644
--- a/nexus/src/app/mod.rs
+++ b/nexus/src/app/mod.rs
@@ -33,7 +33,6 @@ use nexus_types::deployment::PendingMgsUpdates;
 use nexus_types::deployment::ReconfiguratorConfigParam;
 
 use omicron_common::address::MGD_PORT;
-use omicron_common::address::MGS_PORT;
 use omicron_common::api::external::ByteCount;
 use omicron_common::api::external::Error;
 use omicron_uuid_kinds::OmicronZoneUuid;
@@ -1246,6 +1245,15 @@ pub(crate) async fn dpd_clients(
         }
     };
 
+    // Per-request bounds so a stalled DPD connection can't hang an RPW
+    // iteration or saga action indefinitely. Matches the timeout pair on
+    // the shared Nexus `reqwest_client`.
+    let reqwest_client = reqwest::ClientBuilder::new()
+        .connect_timeout(std::time::Duration::from_secs(15))
+        .timeout(std::time::Duration::from_secs(15))
+        .build()
+        .map_err(|e| format!("failed to build DPD reqwest client: {e}"))?;
+
     let clients: Vec<(SocketAddrV6, dpd_client::Client)> = dpd_socketaddrs
         .iter()
         .map(|socket_addr| {
@@ -1256,8 +1264,9 @@ pub(crate) async fn dpd_clients(
                 )),
             };
 
-            let client = dpd_client::Client::new(
+            let client = dpd_client::Client::new_with_client(
                 &format!("http://{socket_addr}"),
+                reqwest_client.clone(),
                 client_state,
             );
 
@@ -1325,29 +1334,28 @@ pub(crate) async fn lldpd_clients(
     Ok(clients)
 }
 
-/// Look up Dendrite addresses in DNS then determine the switch location of
-/// any addresses we're able to resolve the SwitchSlot for. If a switch
-/// zone is down, the resolution process will fail and the entry will be
-/// missing from the result.
+#[derive(Clone, Debug)]
+pub(crate) struct SwitchZoneTarget {
+    pub(crate) target: String,
+    pub(crate) addr: Ipv6Addr,
+}
+
+/// Look up switch zones in DNS, then determine the switch location of any
+/// zones we're able to resolve the `SwitchSlot` for. If a switch zone is down,
+/// the resolution process will fail and the entry will be missing from the
+/// result.
 ///
 /// # Errors
-/// If we fail to resolve the ipv6 addresses of the Dendrite service we
-/// return an error
+/// If we fail to resolve the MGS SRV records for switch zones, return an error.
 async fn switch_zone_address_mappings(
     resolver: &internal_dns_resolver::Resolver,
     log: &slog::Logger,
 ) -> Result<HashMap<SwitchSlot, Ipv6Addr>, String> {
-    let switch_zone_addresses = match resolver
-        .lookup_all_ipv6(ServiceName::Dendrite)
-        .await
-    {
-        Ok(addrs) => addrs,
-        Err(e) => {
-            error!(log, "failed to resolve addresses for Dendrite services"; "error" => %e);
-            return Err(e.to_string());
-        }
-    };
-    Ok(map_switch_zone_addrs(&log, switch_zone_addresses, resolver).await)
+    Ok(switch_zone_targets(resolver, log)
+        .await?
+        .into_iter()
+        .map(|(slot, endpoint)| (slot, endpoint.addr))
+        .collect())
 }
 
 // TODO: #3596 Allow updating of Nexus from `handoff_to_nexus()`
@@ -1359,40 +1367,52 @@ async fn switch_zone_address_mappings(
 // up switch addresses as a whole, since how DNS is currently setup for
 // Dendrite is insufficient for what we need.
 //
-/// Query MGS in each switch zone to learn which switch slot is being managed by
-/// the services located on a given ipv6 address. This information can be used
-/// along with the well known port numbers to target a specific switch + service
-/// combination.
+/// Query MGS in each switch zone to learn which switch slot is managed by each
+/// service target.
 ///
 /// We return whatever we're able to successfully resolve. In the event of
-/// a communication timeout or other failure with MGS, the SwitchSlot -> Ipv6Addr
-/// mapping will be missing from the returned HashMap. Callers will need to inspect
+/// a communication timeout or other failure with MGS, the corresponding entry
+/// will be missing from the returned `HashMap`. Callers will need to inspect
 /// the contents to ensure what they expect to be there is actually there.
-async fn map_switch_zone_addrs(
-    log: &Logger,
-    switch_zone_addresses: Vec<Ipv6Addr>,
+pub(crate) async fn switch_zone_targets(
     resolver: &internal_dns_resolver::Resolver,
-) -> HashMap<SwitchSlot, Ipv6Addr> {
+    log: &Logger,
+) -> Result<HashMap<SwitchSlot, SwitchZoneTarget>, String> {
     use gateway_client::Client as MgsClient;
+
     info!(log, "Determining switch slots managed by switch zones");
-    let mut switch_zone_addrs = HashMap::new();
-
-    for addr in switch_zone_addresses {
-        let port = match resolver
-            .lookup_all_socket_v6(ServiceName::ManagementGatewayService)
-            .await
-        {
-            Ok(addrs) => {
-                let port_map: HashMap<Ipv6Addr, u16> = addrs
-                    .into_iter()
-                    .map(|sockaddr| (*sockaddr.ip(), sockaddr.port()))
-                    .collect();
-
-                *port_map.get(&addr).unwrap_or(&MGS_PORT)
+    let mgs_targets = match resolver
+        .lookup_srv(ServiceName::ManagementGatewayService)
+        .await
+    {
+        Ok(targets) => targets,
+        Err(e) => {
+            error!(log, "failed to resolve MGS service targets"; "error" => %e);
+            return Err(e.to_string());
+        }
+    };
+
+    let mut switch_zone_targets = HashMap::new();
+
+    for (target, port) in mgs_targets {
+        let addr = match resolver.ipv6_lookup(&target).await {
+            Ok(Some(addr)) => addr,
+            Ok(None) => {
+                warn!(
+                    log,
+                    "MGS SRV target resolved without an IPv6 address";
+                    "target" => &target,
+                );
+                continue;
             }
             Err(e) => {
-                error!(log, "failed to resolve MGS addresses"; "error" => %e);
-                MGS_PORT
+                warn!(
+                    log,
+                    "failed to resolve IPv6 address for MGS target";
+                    "target" => &target,
+                    "error" => %e,
+                );
+                continue;
             }
         };
 
@@ -1401,14 +1421,22 @@ async fn map_switch_zone_addrs(
             log.new(o!("component" => "MgsClient")),
         );
 
-        info!(log, "determining switch slot managed by switch zone"; "zone_address" => #?addr);
+        info!(
+            log,
+            "determining switch slot managed by switch zone";
+            "target" => &target,
+            "zone_address" => #?addr,
+            "mgs_port" => port,
+        );
         let switch_slot = match mgs_client.sp_local_switch_id().await {
             Ok(switch) => {
                 info!(
                     log,
                     "identified switch slot for switch zone";
                     "slot" => #?switch,
-                    "zone_address" => #?addr
+                    "target" => &target,
+                    "zone_address" => #?addr,
+                    "mgs_port" => port,
                 );
                 switch.slot
             }
@@ -1416,19 +1444,22 @@ async fn map_switch_zone_addrs(
                 error!(
                     log,
                     "failed to identify switch slot for switch zone";
+                    "target" => &target,
                     "zone_address" => #?addr,
+                    "mgs_port" => port,
                     "reason" => #?e
                 );
                 continue;
             }
         };
 
+        let endpoint = SwitchZoneTarget { target, addr };
         match switch_slot {
             0 => {
-                switch_zone_addrs.insert(SwitchSlot::Switch0, addr);
+                switch_zone_targets.insert(SwitchSlot::Switch0, endpoint);
             }
             1 => {
-                switch_zone_addrs.insert(SwitchSlot::Switch1, addr);
+                switch_zone_targets.insert(SwitchSlot::Switch1, endpoint);
             }
             _ => {
                 warn!(
@@ -1442,10 +1473,10 @@ async fn map_switch_zone_addrs(
     info!(
         log,
         "completed mapping switch zones to switch slots";
-        "mappings" => #?switch_zone_addrs
+        "mappings" => #?switch_zone_targets
     );
 
-    switch_zone_addrs
+    Ok(switch_zone_targets)
 }
 
 /// Begin configuring an external HTTP client, returning a
diff --git a/nexus/src/app/multicast/dataplane.rs b/nexus/src/app/multicast/dataplane.rs
index 5d79df7d078..777f8f9f02e 100644
--- a/nexus/src/app/multicast/dataplane.rs
+++ b/nexus/src/app/multicast/dataplane.rs
@@ -40,6 +40,7 @@
 
 use std::collections::HashMap;
 use std::net::IpAddr;
+use std::time::Duration;
 
 use futures::future::try_join_all;
 use oxnet::MulticastMac;
@@ -113,7 +114,8 @@ trait IntoUnderlayMulticast {
 impl IntoUnderlayMulticast for IpAddr {
     fn into_underlay_multicast(self) -> Result<UnderlayMulticastIpv6, Error> {
         match self {
-            IpAddr::V6(ipv6) => Ok(UnderlayMulticastIpv6(ipv6)),
+            IpAddr::V6(ipv6) => UnderlayMulticastIpv6::try_from(ipv6)
+                .map_err(|e| Error::invalid_request(e.to_string())),
             IpAddr::V4(_) => Err(Error::invalid_request(
                 "underlay multicast groups must use IPv6 addresses",
             )),
@@ -138,7 +140,7 @@ pub(crate) type MulticastDataplaneResult<T> = Result<T, Error>;
 /// - Group-level uplink configuration (which front ports to use)
 /// - Uplink members with [`dpd_client::types::Direction::External`] added to
 ///   underlay groups
-/// - Integration with existing `switch_ports_with_uplinks()` for port discovery
+/// - Integration with existing `switch_ports_with_uplinks` for port discovery
 pub(crate) struct MulticastDataplaneClient {
     dpd_clients: HashMap<SwitchSlot, dpd_client::Client>,
     log: Logger,
@@ -153,6 +155,15 @@ pub(crate) struct GroupUpdateParams<'a> {
     pub source_filter: &'a SourceFilterState,
 }
 
+/// Bound DPD client construction. On timeout (or DNS failure) we yield
+/// an empty client map rather than failing the pass: group operations
+/// skip with no switches, but DB-only member-state transitions
+/// ("Joining" → "Left" when the instance is stopped) still proceed.
+const DPD_CLIENT_BUILD_TIMEOUT: Duration =
+    // Caps the internal-DNS retry budget for `_dendrite._tcp` so a DPD
+    // outage doesn't starve the bg task's idle window.
+    Duration::from_secs(5);
+
 impl MulticastDataplaneClient {
     /// Create a new client - builds fresh DPD clients for current switch
     /// topology.
@@ -160,31 +171,72 @@ impl MulticastDataplaneClient {
         resolver: Resolver,
         log: Logger,
     ) -> MulticastDataplaneResult<Self> {
-        let dpd_clients = dpd_clients(&resolver, &log).await.map_err(|e| {
-            error!(
-                log,
-                "failed to build DPD clients";
-                "error" => %e
-            );
-            Error::internal_error("failed to build DPD clients")
-        })?;
+        let dpd_clients = match tokio::time::timeout(
+            DPD_CLIENT_BUILD_TIMEOUT,
+            dpd_clients(&resolver, &log),
+        )
+        .await
+        {
+            Ok(Ok(clients)) => clients,
+            Ok(Err(e)) => {
+                error!(
+                    log,
+                    "failed to build DPD clients, continuing with empty \
+                     client map";
+                    "error" => %e,
+                );
+                HashMap::new()
+            }
+            Err(_) => {
+                error!(
+                    log,
+                    "timed out building DPD clients, continuing with empty \
+                     client map";
+                    "timeout" => ?DPD_CLIENT_BUILD_TIMEOUT,
+                );
+                HashMap::new()
+            }
+        };
         Ok(Self { dpd_clients, log })
     }
 
-    /// Select a single switch deterministically for read operations.
+    /// Iterate switches in deterministic (sorted by `SwitchSlot`) order.
     ///
-    /// Used when all switches should have identical state and we only need
-    /// to query one. Selects the first switch in sorted order by location
-    /// for consistency across invocations.
-    fn select_one_switch(
+    /// Used by read paths that need data from any one switch (since all
+    /// switches hold identical state for that read). Callers walk this
+    /// iterator and short-circuit on the first success, falling through
+    /// to subsequent switches on per-switch failure so a single
+    /// unhealthy switch doesn't fail the whole operation.
+    fn switches_in_order(
         &self,
-    ) -> MulticastDataplaneResult<(&SwitchSlot, &dpd_client::Client)> {
-        let mut switches: Vec<_> = self.dpd_clients.iter().collect();
-        switches.sort_by_key(|(loc, _)| *loc);
-        switches
-            .into_iter()
-            .next()
-            .ok_or_else(|| Error::internal_error("no DPD clients available"))
+    ) -> impl Iterator<Item = (&SwitchSlot, &dpd_client::Client)> {
+        let mut entries: Vec<_> = self.dpd_clients.iter().collect();
+        entries.sort_by_key(|(slot, _)| *slot);
+        entries.into_iter()
+    }
+
+    /// Compute DPD source filter from aggregated member source state.
+    ///
+    /// For SSM addresses, always returns specific sources. For ASM addresses,
+    /// returns `None` (any source) if any member omitted sources, otherwise
+    /// returns the union of all member sources.
+    fn compute_sources_for_dpd(
+        external_group_ip: IpAddr,
+        source_filter: &SourceFilterState,
+    ) -> Option<Vec<IpSrc>> {
+        if is_ssm_address(external_group_ip)
+            || !source_filter.has_any_source_member
+        {
+            Some(
+                source_filter
+                    .specific_sources
+                    .iter()
+                    .map(|ip| dpd_client::types::IpSrc::Exact(*ip))
+                    .collect(),
+            )
+        } else {
+            None
+        }
     }
 
     async fn dpd_ensure_underlay_created(
@@ -413,33 +465,9 @@ impl MulticastDataplaneClient {
             inner_mac: MacAddr { a: underlay_ipv6.derive_multicast_mac() },
             vni: Vni::from(u32::from(external_group.vni.0)),
         };
-
         let external_group_ip = external_group.multicast_ip.ip();
-
-        // Source filtering per RFC 4607:
-        // - SSM (232/8, ff3x::/32): always use specific sources. API
-        //   validation prevents SSM joins without sources.
-        // - ASM: use specific sources when all members specify sources,
-        //   otherwise None to allow any source at the switch level.
-        let sources_dpd = if is_ssm_address(external_group_ip) {
-            Some(
-                source_filter
-                    .specific_sources
-                    .iter()
-                    .map(|ip| IpSrc::Exact(*ip))
-                    .collect::<Vec<_>>(),
-            )
-        } else if source_filter.has_any_source_member {
-            None
-        } else {
-            Some(
-                source_filter
-                    .specific_sources
-                    .iter()
-                    .map(|ip| IpSrc::Exact(*ip))
-                    .collect::<Vec<_>>(),
-            )
-        };
+        let sources_dpd =
+            Self::compute_sources_for_dpd(external_group_ip, source_filter);
 
         let create_operations =
             dpd_clients.into_iter().map(|(switch_slot, client)| {
@@ -570,36 +598,12 @@ impl MulticastDataplaneClient {
             inner_mac: MacAddr { a: underlay_ipv6.derive_multicast_mac() },
             vni: Vni::from(u32::from(params.external_group.vni.0)),
         };
-
         let new_name_str = params.new_name.to_string();
         let external_group_ip = params.external_group.multicast_ip.ip();
-
-        // Source filtering per RFC 4607:
-        // - SSM (232/8, ff3x::/32): always use specific sources. API
-        //   validation prevents SSM joins without sources.
-        // - ASM: use specific sources when all members specify sources,
-        //   otherwise None to allow any source at the switch level.
-        let sources_dpd = if is_ssm_address(external_group_ip) {
-            Some(
-                params
-                    .source_filter
-                    .specific_sources
-                    .iter()
-                    .map(|ip| IpSrc::Exact(*ip))
-                    .collect::<Vec<_>>(),
-            )
-        } else if params.source_filter.has_any_source_member {
-            None
-        } else {
-            Some(
-                params
-                    .source_filter
-                    .specific_sources
-                    .iter()
-                    .map(|ip| IpSrc::Exact(*ip))
-                    .collect::<Vec<_>>(),
-            )
-        };
+        let sources_dpd = Self::compute_sources_for_dpd(
+            external_group_ip,
+            params.source_filter,
+        );
 
         let update_operations =
             dpd_clients.into_iter().map(|(switch_slot, client)| {
@@ -1028,9 +1032,13 @@ impl MulticastDataplaneClient {
 
     /// Detect and log cross-switch drift for multicast groups.
     ///
-    /// We logs errors if:
+    /// Detection-only. Logs errors when:
     /// - Group is present on some switches but missing on others (presence drift)
     /// - Group has different configurations across switches (config drift)
+    ///
+    /// Drift correction is handled separately by the active-group reconciler
+    /// (`groups.rs::reconcile_active_groups`), which re-pushes the
+    /// authoritative DB state to all switches on the next pass.
     fn log_drift_issues<'a>(
         &self,
         group_ip: IpAddr,
@@ -1077,9 +1085,11 @@ impl MulticastDataplaneClient {
     /// Fetch external multicast group DPD state for RPW drift detection.
     ///
     /// Queries all switches to detect configuration drift. If any switch has
-    /// different state (missing group, different config), it will return the
-    /// found state, so the reconciler can initiate an UPDATE
-    /// saga that will fix all switches atomically.
+    /// different state (missing group, different config), returns the found
+    /// state so the reconciler can re-issue the dataplane operations on the
+    /// next pass and converge to the intended configuration. Drift repair
+    /// follows the RPW convergence model rather than an atomic cross-switch
+    /// saga, so callers should expect N-pass convergence on partial failure.
     pub(crate) async fn fetch_external_group_for_drift_check(
         &self,
         group_ip: IpAddr,
@@ -1190,63 +1200,65 @@ impl MulticastDataplaneClient {
             dpd_client::types::BackplaneLink,
         >,
     > {
-        let (switch_slot, client) = self.select_one_switch()?;
-
-        debug!(
-            self.log,
-            "fetching backplane map from DPD for topology validation";
-            "switch" => ?switch_slot,
-            "query_scope" => "single_switch",
-            "dpd_operation" => "fetch_backplane_map"
-        );
-
-        match client.backplane_map().await {
-            Ok(response) => {
-                let backplane_map_raw = response.into_inner();
+        let mut errors: Vec<(SwitchSlot, String)> = Vec::new();
+        for (switch_slot, client) in self.switches_in_order() {
+            debug!(
+                self.log,
+                "fetching backplane map from DPD for topology validation";
+                "switch" => ?switch_slot,
+                "dpd_operation" => "fetch_backplane_map"
+            );
 
-                // Convert HashMap<String, BackplaneLink> to BTreeMap<PortId, BackplaneLink>
-                // DPD returns string keys like "rear0", "rear1" - parse them to PortId
-                let backplane_map: std::collections::BTreeMap<_, _> = backplane_map_raw
-                    .into_iter()
-                    .filter_map(|(port_str, link)| {
-                        match dpd_client::types::PortId::try_from(port_str.as_str()) {
-                            Ok(port_id) => Some((port_id, link)),
-                            Err(e) => {
-                                error!(
-                                    self.log,
-                                    "failed to parse port ID from backplane map";
-                                    "port_str" => %port_str,
-                                    "error" => %e,
-                                    "dpd_operation" => "fetch_backplane_map"
-                                );
-                                None
+            match client.backplane_map().await {
+                Ok(response) => {
+                    let backplane_map_raw = response.into_inner();
+
+                    // Convert HashMap<String, BackplaneLink> to BTreeMap<PortId, BackplaneLink>.
+                    // DPD returns string keys like "rear0", "rear1"; parse them to PortId.
+                    let backplane_map: std::collections::BTreeMap<_, _> = backplane_map_raw
+                        .into_iter()
+                        .filter_map(|(port_str, link)| {
+                            match dpd_client::types::PortId::try_from(port_str.as_str()) {
+                                Ok(port_id) => Some((port_id, link)),
+                                Err(e) => {
+                                    error!(
+                                        self.log,
+                                        "failed to parse port ID from backplane map";
+                                        "port_str" => %port_str,
+                                        "error" => %e,
+                                        "dpd_operation" => "fetch_backplane_map"
+                                    );
+                                    None
+                                }
                             }
-                        }
-                    })
-                    .collect();
+                        })
+                        .collect();
 
-                debug!(
-                    self.log,
-                    "backplane map fetched from DPD";
-                    "switch" => ?switch_slot,
-                    "port_count" => backplane_map.len(),
-                    "dpd_operation" => "fetch_backplane_map"
-                );
-                Ok(backplane_map)
-            }
-            Err(e) => {
-                error!(
-                    self.log,
-                    "backplane map fetch failed";
-                    "switch" => ?switch_slot,
-                    "error" => %e,
-                    "dpd_operation" => "fetch_backplane_map"
-                );
-                Err(Error::internal_error(&format!(
-                    "failed to fetch backplane map from DPD: {e}"
-                )))
+                    debug!(
+                        self.log,
+                        "backplane map fetched from DPD";
+                        "switch" => ?switch_slot,
+                        "port_count" => backplane_map.len(),
+                        "dpd_operation" => "fetch_backplane_map"
+                    );
+                    return Ok(backplane_map);
+                }
+                Err(e) => {
+                    warn!(
+                        self.log,
+                        "backplane map fetch failed on switch, trying next";
+                        "switch" => ?switch_slot,
+                        "error" => %e,
+                        "dpd_operation" => "fetch_backplane_map"
+                    );
+                    errors.push((*switch_slot, format!("{e}")));
+                }
             }
         }
+
+        Err(Error::internal_error(&format!(
+            "failed to fetch backplane map from any switch: {errors:?}",
+        )))
     }
 
     /// Fetch current underlay group members from a single switch.
@@ -1261,60 +1273,63 @@ impl MulticastDataplaneClient {
         &self,
         underlay_ip: IpAddr,
     ) -> MulticastDataplaneResult<Option<Vec<MulticastGroupMember>>> {
-        let (switch_slot, client) = self.select_one_switch()?;
-
-        debug!(
-            self.log,
-            "fetching underlay group members from DPD for drift detection";
-            "underlay_ip" => %underlay_ip,
-            "switch" => ?switch_slot,
-            "dpd_operation" => "fetch_underlay_members"
-        );
+        let mut errors: Vec<(SwitchSlot, String)> = Vec::new();
+        for (switch_slot, client) in self.switches_in_order() {
+            debug!(
+                self.log,
+                "fetching underlay group members from DPD for drift detection";
+                "underlay_ip" => %underlay_ip,
+                "switch" => ?switch_slot,
+                "dpd_operation" => "fetch_underlay_members"
+            );
 
-        match client
-            .multicast_group_get_underlay(
-                &underlay_ip.into_underlay_multicast()?,
-            )
-            .await
-        {
-            Ok(response) => {
-                let members = response.into_inner().members;
-                debug!(
-                    self.log,
-                    "underlay group members fetched from DPD";
-                    "underlay_ip" => %underlay_ip,
-                    "switch" => ?switch_slot,
-                    "member_count" => members.len(),
-                    "dpd_operation" => "fetch_underlay_members"
-                );
-                Ok(Some(members))
-            }
-            Err(DpdError::ErrorResponse(resp))
-                if resp.status() == reqwest::StatusCode::NOT_FOUND =>
+            match client
+                .multicast_group_get_underlay(
+                    &underlay_ip.into_underlay_multicast()?,
+                )
+                .await
             {
-                debug!(
-                    self.log,
-                    "underlay group not found on switch";
-                    "underlay_ip" => %underlay_ip,
-                    "switch" => ?switch_slot,
-                    "dpd_operation" => "fetch_underlay_members"
-                );
-                Ok(None)
-            }
-            Err(e) => {
-                error!(
-                    self.log,
-                    "underlay group fetch failed";
-                    "underlay_ip" => %underlay_ip,
-                    "switch" => ?switch_slot,
-                    "error" => %e,
-                    "dpd_operation" => "fetch_underlay_members"
-                );
-                Err(Error::internal_error(&format!(
-                    "failed to fetch underlay group from DPD: {e}"
-                )))
+                Ok(response) => {
+                    let members = response.into_inner().members;
+                    debug!(
+                        self.log,
+                        "underlay group members fetched from DPD";
+                        "underlay_ip" => %underlay_ip,
+                        "switch" => ?switch_slot,
+                        "member_count" => members.len(),
+                        "dpd_operation" => "fetch_underlay_members"
+                    );
+                    return Ok(Some(members));
+                }
+                Err(DpdError::ErrorResponse(resp))
+                    if resp.status() == reqwest::StatusCode::NOT_FOUND =>
+                {
+                    debug!(
+                        self.log,
+                        "underlay group not found on switch";
+                        "underlay_ip" => %underlay_ip,
+                        "switch" => ?switch_slot,
+                        "dpd_operation" => "fetch_underlay_members"
+                    );
+                    return Ok(None);
+                }
+                Err(e) => {
+                    warn!(
+                        self.log,
+                        "underlay group fetch failed on switch, trying next";
+                        "underlay_ip" => %underlay_ip,
+                        "switch" => ?switch_slot,
+                        "error" => %e,
+                        "dpd_operation" => "fetch_underlay_members"
+                    );
+                    errors.push((*switch_slot, format!("{e}")));
+                }
             }
         }
+
+        Err(Error::internal_error(&format!(
+            "failed to fetch underlay group from any switch: {errors:?}",
+        )))
     }
 
     pub(crate) async fn remove_groups(
diff --git a/nexus/src/app/multicast/mod.rs b/nexus/src/app/multicast/mod.rs
index 629d1253c89..4a49ec095b2 100644
--- a/nexus/src/app/multicast/mod.rs
+++ b/nexus/src/app/multicast/mod.rs
@@ -47,7 +47,7 @@
 //!
 //! [`UNDERLAY_MULTICAST_SUBNET`]: omicron_common::address::UNDERLAY_MULTICAST_SUBNET
 
-use std::net::IpAddr;
+use std::net::{IpAddr, Ipv6Addr};
 use std::sync::Arc;
 
 use ref_cast::RefCast;
@@ -61,7 +61,7 @@ use nexus_db_queries::db::datastore::multicast::ExternalMulticastGroupWithSource
 use nexus_db_queries::{authz, db};
 use nexus_types::external_api::multicast;
 use nexus_types::multicast::MulticastGroupCreate;
-use omicron_common::address::is_ssm_address;
+use omicron_common::address::{UNDERLAY_MULTICAST_SUBNET, is_ssm_address};
 use omicron_common::api::external::{
     self, CreateResult, DataPageParams, DeleteResult,
     IdentityMetadataCreateParams, ListResultVec, LookupResult,
@@ -70,6 +70,8 @@ use omicron_common::api::external::{
 use omicron_uuid_kinds::{GenericUuid, InstanceUuid, MulticastGroupUuid};
 
 pub(crate) mod dataplane;
+pub(crate) mod sled;
+pub(crate) mod switch_zone;
 
 /// Validate that SSM addresses have source IPs.
 ///
@@ -858,6 +860,76 @@ fn generate_group_name_from_ip(
     })
 }
 
+/// Maps an external multicast address to an underlay address in ff04::/64.
+///
+/// Maps external addresses into [`UNDERLAY_MULTICAST_SUBNET`] (ff04::/64,
+/// a subset of the admin-local scope ff04::/16 per RFC 7346) using XOR-fold.
+/// This prefix is static for consistency across racks.
+///
+/// See [RFC 7346] for IPv6 multicast admin-local scope.
+///
+/// # Salt Parameter (Collision Avoidance)
+///
+/// The `salt` enables collision avoidance via XOR perturbation. XOR is
+/// bijective: distinct salts produce distinct outputs (since
+/// `a ^ b = a ^ c` implies `b = c`), guaranteeing 256 unique addresses
+/// per external IP.
+///
+/// On collision (underlay IP already in use), the caller increments
+/// salt and retries. The successful salt is stored with the group for
+/// deterministic reconstruction.
+///
+/// # Implementation
+///
+/// ```text
+/// underlay_ip = ff04:: | ((xor_fold(external_ip) ^ salt) & HOST_MASK)
+/// ```
+///
+/// - IPv4: embedded directly (32 bits fits in 64-bit host space)
+/// - IPv6: XOR upper and lower 64-bit halves to fold 128 to 64 bits
+/// - Salt in [0, 255]: XORed into host bits for collision retry
+///
+/// The `& HOST_MASK` guarantees the result stays within ff04::/64.
+///
+/// [RFC 7346]: https://www.rfc-editor.org/rfc/rfc7346
+pub(crate) fn map_external_to_underlay_ip(
+    external_ip: IpAddr,
+    salt: u8,
+) -> IpAddr {
+    const HOST_BITS: u32 = 128 - UNDERLAY_MULTICAST_SUBNET.width() as u32;
+    let prefix_base =
+        u128::from_be_bytes(UNDERLAY_MULTICAST_SUBNET.addr().octets());
+
+    map_external_to_underlay_ip_impl(prefix_base, HOST_BITS, external_ip, salt)
+}
+
+/// Core implementation separated for testing with custom prefix/host_bits.
+pub(crate) fn map_external_to_underlay_ip_impl(
+    prefix_base: u128,
+    host_bits: u32,
+    external_ip: IpAddr,
+    salt: u8,
+) -> IpAddr {
+    let host_mask: u128 =
+        if host_bits >= 128 { u128::MAX } else { (1u128 << host_bits) - 1 };
+
+    let host_value: u128 = match external_ip {
+        IpAddr::V4(ipv4) => u128::from(u32::from_be_bytes(ipv4.octets())),
+        IpAddr::V6(ipv6) => {
+            let full = u128::from_be_bytes(ipv6.octets());
+            if host_bits >= 128 {
+                full
+            } else {
+                (full >> host_bits) ^ (full & host_mask)
+            }
+        }
+    };
+
+    let salted = (host_value ^ u128::from(salt)) & host_mask;
+    let underlay = prefix_base | salted;
+    IpAddr::V6(Ipv6Addr::from(underlay.to_be_bytes()))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -887,4 +959,32 @@ mod tests {
             0xff1e, 0, 0, 0, 0, 0, 0, 1
         ))));
     }
+
+    #[test]
+    fn test_generate_group_name_from_ip() {
+        let v4 = IpAddr::V4(Ipv4Addr::new(224, 1, 2, 3));
+        assert_eq!(
+            generate_group_name_from_ip(v4).unwrap().as_str(),
+            "mcast-224-1-2-3"
+        );
+
+        let v4_zeros = IpAddr::V4(Ipv4Addr::new(224, 0, 0, 1));
+        assert_eq!(
+            generate_group_name_from_ip(v4_zeros).unwrap().as_str(),
+            "mcast-224-0-0-1"
+        );
+
+        let v6: IpAddr = IpAddr::V6(Ipv6Addr::new(0xff0e, 0, 0, 0, 0, 0, 0, 1));
+        assert_eq!(
+            generate_group_name_from_ip(v6).unwrap().as_str(),
+            "mcast-ff0e-0-0-0-0-0-0-1"
+        );
+
+        let v6_ssm: IpAddr =
+            IpAddr::V6(Ipv6Addr::new(0xff3e, 0, 0, 0, 0, 0, 0, 0xabcd));
+        assert_eq!(
+            generate_group_name_from_ip(v6_ssm).unwrap().as_str(),
+            "mcast-ff3e-0-0-0-0-0-0-abcd"
+        );
+    }
 }
diff --git a/nexus/src/app/multicast/sled.rs b/nexus/src/app/multicast/sled.rs
new file mode 100644
index 00000000000..df66c2d2bd3
--- /dev/null
+++ b/nexus/src/app/multicast/sled.rs
@@ -0,0 +1,595 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Sled-agent multicast operations for OPTE subscriptions, M2P mappings,
+//! and forwarding entries.
+//!
+//! Parallel to [`dataplane`] which handles DPD switch operations, this
+//! module manages sled-local multicast state via sled-agent:
+//!
+//! - **OPTE subscriptions**: Per-VMM multicast group filters on the
+//!   hosting sled
+//! - **M2P mappings**: Overlay multicast IP to underlay IPv6 address
+//!   translation, installed on all sleds
+//! - **Forwarding entries**: Underlay multicast address to switch next-hop,
+//!   installed on all sleds so OPTE forwards to the switch for replication
+//!
+//! [`dataplane`]: super::dataplane
+
+use std::collections::hash_map::DefaultHasher;
+use std::collections::{BTreeSet, HashMap};
+use std::hash::{Hash, Hasher};
+use std::net::{IpAddr, Ipv6Addr};
+use std::sync::Arc;
+
+use anyhow::Context;
+use futures::future::join_all;
+use omicron_common::api::external;
+use sled_agent_types::early_networking::SwitchSlot;
+use slog::{debug, info, warn};
+
+use nexus_db_model::{
+    MulticastGroup, MulticastGroupMember, MulticastGroupMemberState,
+};
+use nexus_db_queries::context::OpContext;
+use nexus_db_queries::db::DataStore;
+use nexus_types::deployment::SledFilter;
+use nexus_types::identity::{Asset, Resource};
+use omicron_common::api::external::DataPageParams;
+use omicron_uuid_kinds::{
+    GenericUuid, InstanceUuid, MulticastGroupUuid, SledUuid,
+};
+use sled_agent_client::types::{
+    ClearMcast2Phys, ClearMcastForwarding, Mcast2PhysMapping, McastFilterMode,
+    McastForwardingEntry, McastForwardingNextHop, McastReplication,
+    McastSourceFilter,
+};
+
+/// Utility methods for sled-agent multicast operations used by the
+/// background task reconciler.
+///
+/// Groups sled-agent HTTP calls (OPTE subscriptions, M2P mappings,
+/// forwarding entries) behind a single type to keep the reconciler
+/// logic focused on state transitions rather than client construction.
+///
+/// Unlike [`MulticastDataplaneClient`] which pre-builds per-switch
+/// clients, sled clients are constructed on demand since the target
+/// sled set varies per group.
+///
+/// [`MulticastDataplaneClient`]: super::dataplane::MulticastDataplaneClient
+pub(crate) struct MulticastSledClient {
+    datastore: Arc<DataStore>,
+    resolver: internal_dns_resolver::Resolver,
+}
+
+impl MulticastSledClient {
+    pub(crate) fn new(
+        datastore: Arc<DataStore>,
+        resolver: internal_dns_resolver::Resolver,
+    ) -> Self {
+        Self { datastore, resolver }
+    }
+
+    /// Create a sled-agent client for the given sled.
+    ///
+    /// Looks up the sled's address in the database and constructs an HTTP
+    /// client. Follows the same pattern as V2P mapping propagation.
+    async fn sled_client(
+        &self,
+        opctx: &OpContext,
+        sled_id: SledUuid,
+    ) -> Result<sled_agent_client::Client, omicron_common::api::external::Error>
+    {
+        nexus_networking::sled_client(
+            &self.datastore,
+            opctx,
+            sled_id,
+            &opctx.log,
+        )
+        .await
+    }
+
+    /// Build the membership descriptor sent to sled-agent for
+    /// subscribe/unsubscribe calls.
+    fn membership_for(
+        group: &MulticastGroup,
+        member: &MulticastGroupMember,
+    ) -> sled_agent_client::types::InstanceMulticastMembership {
+        sled_agent_client::types::InstanceMulticastMembership {
+            group_ip: group.multicast_ip.ip(),
+            sources: member.source_ips.iter().map(|s| s.ip()).collect(),
+        }
+    }
+
+    /// Subscribe an instance's active VMM OPTE port to a multicast group.
+    ///
+    /// Sled-agent resolves the active Propolis under its per-instance state
+    /// lock and configures OPTE port-level multicast filters. The member's
+    /// per-instance source IPs are passed for SSM filtering. If no active
+    /// VMM is registered the call is a noop since the OPTE port is gone.
+    pub(crate) async fn subscribe_instance(
+        &self,
+        opctx: &OpContext,
+        group: &MulticastGroup,
+        member: &MulticastGroupMember,
+        sled_id: SledUuid,
+    ) -> Result<(), anyhow::Error> {
+        let instance_id = InstanceUuid::from_untyped_uuid(member.parent_id);
+
+        let client = self
+            .sled_client(opctx, sled_id)
+            .await
+            .context("failed to create sled-agent client")?;
+
+        let membership = Self::membership_for(group, member);
+
+        client
+            .instance_join_multicast_group(&instance_id, &membership)
+            .await
+            .context("sled-agent instance_join_multicast_group call failed")?;
+
+        debug!(
+            opctx.log,
+            "subscribed instance to multicast group via sled-agent";
+            "member_id" => %member.id,
+            "instance_id" => %instance_id,
+            "sled_id" => %sled_id,
+            "group_ip" => %group.multicast_ip
+        );
+
+        Ok(())
+    }
+
+    /// Unsubscribe an instance's active VMM OPTE port from a multicast group.
+    ///
+    /// Best-effort since if the VMM or sled is already gone, the unsubscribe
+    /// is effectively a noop because the OPTE port was destroyed.
+    pub(crate) async fn unsubscribe_instance(
+        &self,
+        opctx: &OpContext,
+        group: &MulticastGroup,
+        member: &MulticastGroupMember,
+        sled_id: SledUuid,
+    ) -> Result<(), anyhow::Error> {
+        let instance_id = InstanceUuid::from_untyped_uuid(member.parent_id);
+
+        let client = self
+            .sled_client(opctx, sled_id)
+            .await
+            .context("failed to create sled-agent client")?;
+
+        let membership = Self::membership_for(group, member);
+
+        client
+            .instance_leave_multicast_group(&instance_id, &membership)
+            .await
+            .context("sled-agent instance_leave_multicast_group call failed")?;
+
+        debug!(
+            opctx.log,
+            "unsubscribed instance from multicast group via sled-agent";
+            "member_id" => %member.id,
+            "instance_id" => %instance_id,
+            "sled_id" => %sled_id,
+            "group_ip" => %group.multicast_ip
+        );
+
+        Ok(())
+    }
+
+    /// Propagate M2P mappings and forwarding entries to all VPC-routing sleds.
+    ///
+    /// Performs convergent per-sled propagation: each sled's current state
+    /// is queried and diffed against desired state. New entries are added
+    /// and stale state is removed (member leaves, instance stops). When no
+    /// joined members remain, every sled has stale state and it is cleared.
+    ///
+    /// # Scope
+    ///
+    /// M2P mappings and forwarding entries are pushed to all VPC-routing
+    /// sleds, not just member sleds. Any instance on any sled may send to
+    /// a multicast group address. Hence, without the M2P mapping, OPTE's
+    /// overlay layer silently drops the packet. Forwarding entries point
+    /// each sled at a switch, which replicates to member ports via DPD
+    /// multicast group config. Subscriptions (per-port group membership) remain
+    /// member-sled-only.
+    pub(crate) async fn propagate_m2p_and_forwarding(
+        &self,
+        opctx: &OpContext,
+        group: &MulticastGroup,
+    ) -> Result<(), anyhow::Error> {
+        let underlay_ip = self
+            .resolve_underlay_ip(opctx, group)
+            .await
+            .with_context(|| {
+                format!(
+                    "failed to resolve underlay multicast address for group {}",
+                    group.id()
+                )
+            })?;
+
+        let group_ip = group.multicast_ip.ip();
+
+        // Compute desired state from DB, determining which sleds should have
+        // M2P and forwarding entries for this group.
+        let group_id = MulticastGroupUuid::from_untyped_uuid(group.id());
+        let members = self
+            .datastore
+            .multicast_group_members_list(
+                opctx,
+                group_id,
+                &DataPageParams::max_page(),
+            )
+            .await
+            .context("failed to list group members")?;
+
+        let member_sled_ids: BTreeSet<SledUuid> = members
+            .iter()
+            .filter(|m| m.state == MulticastGroupMemberState::Joined)
+            .filter_map(|m| m.sled_id.map(SledUuid::from))
+            .collect();
+
+        // Build desired M2P entry.
+        let desired_m2p =
+            Mcast2PhysMapping { group: group_ip, underlay: underlay_ip };
+
+        // The group is active if any members are "Joined". M2P and
+        // forwarding are pushed to all sleds when active, cleared
+        // from all sleds when inactive.
+        let group_is_active = !member_sled_ids.is_empty();
+
+        // Query all VPC-routing sleds for current state and converge.
+        let all_sleds = self
+            .datastore
+            .sled_list_all_batched(opctx, SledFilter::VpcRouting)
+            .await
+            .context("failed to enumerate sleds")?;
+
+        // Select one of the available switches as the forwarding next hop.
+        //
+        // OPTE treats each next hop as a duplication it performs itself, so
+        // pointing at individual member sleds would cause O(n) copies over
+        // cxgbe per sender.
+        //
+        // A single switch next hop means one copy to the switch, which
+        // replicates to member sled ports via DPD multicast group membership.
+        // ECMP over both switches is the more correct longer-term answer,
+        // but OPTE and mgd lack the tooling to express that today.
+        let switch_zone_addrs = crate::app::switch_zone_address_mappings(
+            &self.resolver,
+            &opctx.log,
+        )
+        .await
+        .map_err(|e| anyhow::anyhow!(e))
+        .context("failed to resolve switch zone addresses")?;
+
+        let switch_ip =
+            select_forwarding_switch_ip(group_id, &switch_zone_addrs)
+                .context("no switch zone found for forwarding next hop")?;
+
+        let convergence_params = GroupConvergenceParams {
+            group_ip,
+            underlay_ip,
+            group_is_active,
+            desired_m2p: &desired_m2p,
+            switch_ip,
+        };
+
+        // Fan out per-sled convergence so a 32-sled rack doesn't pay
+        // N-sequential RPC round-trips. Each sled's RPC is independent,
+        // we accumulate per-sled failures rather than fail-fast.
+        let convergence_params = &convergence_params;
+        let results = join_all(all_sleds.iter().map(|sled| async move {
+            let sled_id: SledUuid = sled.id();
+            let client = match self.sled_client(opctx, sled_id).await {
+                Ok(c) => c,
+                Err(e) => {
+                    warn!(
+                        opctx.log,
+                        "failed to create sled-agent client for \
+                         M2P/forwarding convergence";
+                        "sled_id" => %sled_id,
+                        "error" => %e
+                    );
+                    return Err(());
+                }
+            };
+            if let Err(e) =
+                converge_sled_m2p_and_forwarding(&client, convergence_params)
+                    .await
+            {
+                warn!(
+                    opctx.log,
+                    "failed to converge M2P/forwarding on sled";
+                    "sled_id" => %sled_id,
+                    "group_ip" => %group_ip,
+                    "error" => %e
+                );
+                return Err(());
+            }
+            Ok(())
+        }))
+        .await;
+
+        let failed_sleds = results.iter().filter(|r| r.is_err()).count();
+
+        info!(
+            opctx.log,
+            "converged M2P and forwarding state";
+            "group_id" => %group.id(),
+            "group_ip" => %group_ip,
+            "underlay_ip" => %underlay_ip,
+            "member_sleds" => member_sled_ids.len(),
+            "total_sleds_checked" => all_sleds.len(),
+            "failed_sleds" => failed_sleds
+        );
+
+        if failed_sleds > 0 {
+            anyhow::bail!(
+                "failed to converge M2P/forwarding: \
+                 {failed_sleds} sled convergence failures \
+                 (out of {} sleds)",
+                all_sleds.len()
+            );
+        }
+
+        Ok(())
+    }
+
+    async fn resolve_underlay_ip(
+        &self,
+        opctx: &OpContext,
+        group: &MulticastGroup,
+    ) -> Result<Ipv6Addr, anyhow::Error> {
+        let underlay_group_id = group
+            .underlay_group_id
+            .context("group missing underlay_group_id")?;
+
+        match self
+            .datastore
+            .underlay_multicast_group_fetch(opctx, underlay_group_id)
+            .await
+        {
+            Ok(underlay_group) => match underlay_group.multicast_ip.ip() {
+                IpAddr::V6(v6) => Ok(v6),
+                other => anyhow::bail!(
+                    "underlay multicast address for group {} is {other}, \
+                     expected IPv6",
+                    group.id()
+                ),
+            },
+            Err(external::Error::ObjectNotFound { .. }) => {
+                let salt = group.underlay_salt.map_or(0, |s| *s);
+                match super::map_external_to_underlay_ip(
+                    group.multicast_ip.ip(),
+                    salt,
+                ) {
+                    IpAddr::V6(v6) => Ok(v6),
+                    IpAddr::V4(_) => anyhow::bail!(
+                        "computed IPv4 underlay address for group {}",
+                        group.id()
+                    ),
+                }
+            }
+            Err(e) => Err(e).context("failed to fetch underlay group"),
+        }
+    }
+
+    /// Clear M2P mappings and forwarding entries from all sleds for
+    /// this group.
+    ///
+    /// Delegates to the convergent [`propagate_m2p_and_forwarding`] which
+    /// will detect that no joined members remain and clear stale state
+    /// from all sleds.
+    ///
+    /// [`propagate_m2p_and_forwarding`]: Self::propagate_m2p_and_forwarding
+    pub(crate) async fn clear_m2p_and_forwarding(
+        &self,
+        opctx: &OpContext,
+        group: &MulticastGroup,
+    ) -> Result<(), anyhow::Error> {
+        self.propagate_m2p_and_forwarding(opctx, group).await
+    }
+}
+
+/// Resolved group state used to converge M2P and forwarding on each sled.
+struct GroupConvergenceParams<'a> {
+    group_ip: IpAddr,
+    underlay_ip: Ipv6Addr,
+    group_is_active: bool,
+    desired_m2p: &'a Mcast2PhysMapping,
+    /// Switch zone underlay IP chosen as the forwarding next hop.
+    /// The switch replicates to member sled ports via DPD config.
+    switch_ip: Ipv6Addr,
+}
+
+/// Per-sled convergence of M2P and forwarding state.
+///
+/// # Errors
+///
+/// Returns an error when any sled-agent RPC fails (list, set, or clear).
+/// The caller increments `failed_sleds` and continues to the next sled.
+async fn converge_sled_m2p_and_forwarding(
+    client: &sled_agent_client::Client,
+    params: &GroupConvergenceParams<'_>,
+) -> Result<(), anyhow::Error> {
+    converge_m2p(client, params).await?;
+    converge_forwarding(client, params).await?;
+    Ok(())
+}
+
+/// Converge a single sled's M2P mapping for one group.
+///
+/// Sets the mapping when the group is active and missing, clears it
+/// when the group is inactive and present. Already-correct state
+/// is left alone.
+async fn converge_m2p(
+    client: &sled_agent_client::Client,
+    params: &GroupConvergenceParams<'_>,
+) -> Result<(), anyhow::Error> {
+    let found = client
+        .list_mcast_m2p()
+        .await
+        .context("failed to list M2P mappings on sled")?
+        .into_inner();
+
+    let has_m2p = found.iter().any(|m| {
+        m.group == params.group_ip && m.underlay == params.underlay_ip
+    });
+
+    match (params.group_is_active, has_m2p) {
+        // Active group missing M2P: install it.
+        (true, false) => {
+            client
+                .set_mcast_m2p(params.desired_m2p)
+                .await
+                .context("failed to add M2P mapping to sled")?;
+        }
+        // Inactive group has stale M2P: remove it.
+        (false, true) => {
+            let clear = ClearMcast2Phys {
+                group: params.group_ip,
+                underlay: params.underlay_ip,
+            };
+            client
+                .clear_mcast_m2p(&clear)
+                .await
+                .context("failed to clear stale M2P from sled")?;
+        }
+        // Already converged.
+        _ => {}
+    }
+
+    Ok(())
+}
+
+/// Converge a single sled's forwarding entries for one group.
+///
+/// When the group is active, this sets a single next hop to the switch
+/// zone. The switch replicates to member sled ports via its DPD
+/// multicast group membership. When inactive, this clears any stale
+/// entries.
+async fn converge_forwarding(
+    client: &sled_agent_client::Client,
+    params: &GroupConvergenceParams<'_>,
+) -> Result<(), anyhow::Error> {
+    let found = client
+        .list_mcast_fwd()
+        .await
+        .context("failed to list forwarding on sled")?
+        .into_inner();
+
+    let current_entry = found.iter().find(|f| f.underlay == params.underlay_ip);
+
+    if !params.group_is_active {
+        if current_entry.is_some() {
+            let clear = ClearMcastForwarding { underlay: params.underlay_ip };
+            client
+                .clear_mcast_fwd(&clear)
+                .await
+                .context("failed to clear stale forwarding from sled")?;
+        }
+        return Ok(());
+    }
+
+    let desired_next_hops = vec![McastForwardingNextHop {
+        next_hop: params.switch_ip,
+        replication: McastReplication::Underlay,
+        filter: McastSourceFilter {
+            mode: McastFilterMode::Exclude,
+            sources: Vec::new(),
+        },
+    }];
+
+    let needs_update = match current_entry {
+        Some(f) => f.next_hops != desired_next_hops,
+        None => true,
+    };
+
+    if needs_update {
+        // OPTE's set_mcast_fwd handler is additive: it inserts next
+        // hops but never removes stale ones. Clear first so the
+        // subsequent set produces an exact replacement.
+        if current_entry.is_some() {
+            let clear = ClearMcastForwarding { underlay: params.underlay_ip };
+            client
+                .clear_mcast_fwd(&clear)
+                .await
+                .context("failed to clear forwarding before update")?;
+        }
+        let desired_fwd = McastForwardingEntry {
+            underlay: params.underlay_ip,
+            next_hops: desired_next_hops,
+        };
+        client
+            .set_mcast_fwd(&desired_fwd)
+            .await
+            .context("failed to set forwarding on sled")?;
+    }
+
+    Ok(())
+}
+
+fn select_forwarding_switch_ip(
+    group_id: MulticastGroupUuid,
+    switch_zone_addrs: &HashMap<SwitchSlot, Ipv6Addr>,
+) -> Option<Ipv6Addr> {
+    let mut ordered_switches: Vec<_> = switch_zone_addrs.iter().collect();
+    ordered_switches.sort_by_key(|(slot, _)| **slot);
+
+    if ordered_switches.is_empty() {
+        return None;
+    }
+
+    // Hash the group UUID to distribute switch selection across both
+    // switches. Ordering by slot keeps the selection stable across
+    // reconciliation passes and Nexus instances.
+    let mut hasher = DefaultHasher::new();
+    group_id.hash(&mut hasher);
+    let idx = (hasher.finish() as usize) % ordered_switches.len();
+    Some(*ordered_switches[idx].1)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::select_forwarding_switch_ip;
+
+    use std::collections::HashMap;
+    use std::net::Ipv6Addr;
+
+    use omicron_uuid_kinds::{GenericUuid, MulticastGroupUuid};
+    use sled_agent_types::early_networking::SwitchSlot;
+    use uuid::Uuid;
+
+    #[test]
+    fn select_forwarding_switch_ip_returns_none_when_empty() {
+        let group_id = MulticastGroupUuid::from_untyped_uuid(Uuid::new_v4());
+        let switch_zone_addrs = HashMap::new();
+
+        assert_eq!(
+            select_forwarding_switch_ip(group_id, &switch_zone_addrs),
+            None
+        );
+    }
+
+    #[test]
+    fn select_forwarding_switch_ip_is_stable_across_map_order() {
+        let group_id = MulticastGroupUuid::from_untyped_uuid(Uuid::new_v4());
+        let switch0 = Ipv6Addr::LOCALHOST;
+        let switch1 = Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 2);
+
+        let mut first = HashMap::new();
+        first.insert(SwitchSlot::Switch0, switch0);
+        first.insert(SwitchSlot::Switch1, switch1);
+
+        let mut second = HashMap::new();
+        second.insert(SwitchSlot::Switch1, switch1);
+        second.insert(SwitchSlot::Switch0, switch0);
+
+        assert_eq!(
+            select_forwarding_switch_ip(group_id, &first),
+            select_forwarding_switch_ip(group_id, &second)
+        );
+    }
+}
diff --git a/nexus/src/app/multicast/switch_zone.rs b/nexus/src/app/multicast/switch_zone.rs
new file mode 100644
index 00000000000..15d65811212
--- /dev/null
+++ b/nexus/src/app/multicast/switch_zone.rs
@@ -0,0 +1,421 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Switch zone service clients for multicast operations.
+//!
+//! Wraps MGD (for MRIB programming) and DDM (for peer topology)
+//! on the switch zone. Built per reconciliation pass.
+//!
+//! - **MRIB**: Nexus → MGD MRIB → mg-lower → DDM → peer sleds
+//! - **Peers**: DDM peer info provides live sled-to-port mapping
+
+use std::collections::HashMap;
+use std::net::{IpAddr, Ipv6Addr, SocketAddrV6};
+use std::time::Duration;
+
+use anyhow::anyhow;
+use futures::future::try_join_all;
+use internal_dns_resolver::Resolver;
+use sled_agent_types::early_networking::SwitchSlot;
+use slog::{Logger, debug, warn};
+
+use internal_dns_types::names::ServiceName;
+use mg_admin_client::types::{
+    MribAddStaticRequest, MribDeleteStaticRequest, MulticastRouteKey,
+    MulticastRouteKeyV4, MulticastRouteKeyV6, StaticMulticastRouteInput,
+};
+use omicron_common::address::{DDMD_PORT, MGD_PORT};
+use omicron_ddm_admin_client::types::PeerInfo;
+
+use crate::app::switch_zone_targets;
+
+/// Client for switch zone services used by the multicast reconciler.
+///
+/// Provides access to MGD (MRIB route programming) and DDM (peer
+/// topology for sled-to-port liveness).
+///
+/// Built per reconciliation pass, similar to [`MulticastDataplaneClient`].
+///
+/// Note: per [omicron#10167], system-level networking (uplinkd, system-zone
+/// NAT, BGP, BFD) is migrating from Nexus RPWs to sled-agent reconcilers
+/// that operate based on data in the bootstore. Multicast is
+/// **instance networking** (group state derives from per-instance memberships),
+/// so this client's direct-to-MGD path is intentional and should be preserved
+/// by the migration.
+///
+/// If a future iteration tightens around  MRIB writes, the
+/// reconciler logic stays in Nexus and only the wire surface changes
+/// (Nexus calls a sled-agent endpoint that fronts MGD).
+///
+/// [`MulticastDataplaneClient`]: super::dataplane::MulticastDataplaneClient
+/// [omicron#10167]: https://github.com/oxidecomputer/omicron/issues/10167
+pub(crate) struct MulticastSwitchZoneClient {
+    mgd_clients: HashMap<SwitchSlot, mg_admin_client::Client>,
+    ddm_clients: HashMap<SwitchSlot, omicron_ddm_admin_client::Client>,
+    log: Logger,
+}
+
+pub(crate) type MribRouteIndex =
+    HashMap<IpAddr, HashMap<Option<IpAddr>, HashMap<SwitchSlot, Ipv6Addr>>>;
+
+// Mirrors `MulticastDataplaneClient::new`'s timeout.
+const SWITCH_ZONE_BUILD_TIMEOUT: Duration = Duration::from_secs(5);
+
+impl MulticastSwitchZoneClient {
+    /// Build MGD and DDM clients for all switch zones.
+    ///
+    /// Resolves service ports from DNS rather than hardcoding them,
+    /// falling back to the well-known port constants when DNS lookup
+    /// fails. This allows the test harness to run MGD and DDM on
+    /// dynamic ports.
+    ///
+    /// Returns an error when no switch zones resolve, so the reconciler
+    /// retries rather than silently treating writes as noops.
+    pub(crate) async fn new(
+        resolver: Resolver,
+        log: Logger,
+    ) -> Result<Self, String> {
+        match tokio::time::timeout(
+            SWITCH_ZONE_BUILD_TIMEOUT,
+            Self::build(resolver, log.clone()),
+        )
+        .await
+        {
+            Ok(result) => result,
+            Err(_) => Err(format!(
+                "timed out building switch-zone clients after \
+                 {SWITCH_ZONE_BUILD_TIMEOUT:?}"
+            )),
+        }
+    }
+
+    async fn build(resolver: Resolver, log: Logger) -> Result<Self, String> {
+        let switch_zones = switch_zone_targets(&resolver, &log).await?;
+
+        if switch_zones.is_empty() {
+            return Err(
+                "no switch zones resolved for multicast operations".to_string()
+            );
+        }
+
+        // Resolve MGD and DDM sockets from DNS, keyed by SRV target. This
+        // preserves distinct switch zones that share an IPv6 address in tests
+        // and differ only by port.
+        let mgd_socket_map =
+            resolve_service_sockets(&resolver, &log, ServiceName::Mgd).await;
+        let ddm_socket_map =
+            resolve_service_sockets(&resolver, &log, ServiceName::Ddm).await;
+
+        let mgd_clients = switch_zones
+            .iter()
+            .map(|(slot, endpoint)| {
+                let socketaddr = mgd_socket_map
+                    .get(&endpoint.target)
+                    .copied()
+                    .unwrap_or_else(|| {
+                        SocketAddrV6::new(endpoint.addr, MGD_PORT, 0, 0)
+                    });
+                (
+                    *slot,
+                    mg_admin_client::Client::new(
+                        &format!("http://{socketaddr}"),
+                        log.clone(),
+                    ),
+                )
+            })
+            .collect();
+
+        let ddm_clients = switch_zones
+            .iter()
+            .filter_map(|(slot, endpoint)| {
+                let socketaddr = ddm_socket_map
+                    .get(&endpoint.target)
+                    .copied()
+                    .unwrap_or_else(|| {
+                        SocketAddrV6::new(endpoint.addr, DDMD_PORT, 0, 0)
+                    });
+                match omicron_ddm_admin_client::Client::new(&log, socketaddr) {
+                    Ok(c) => Some((*slot, c)),
+                    Err(e) => {
+                        warn!(
+                            log,
+                            "failed to build DDM client for switch zone";
+                            "switch" => ?slot,
+                            "error" => %e,
+                        );
+                        None
+                    }
+                }
+            })
+            .collect();
+
+        Ok(Self { mgd_clients, ddm_clients, log })
+    }
+
+    /// Add a multicast route to the MRIB on all switches in parallel.
+    ///
+    /// `mg-lower` watches the MRIB and automatically advertises the
+    /// route via DDM to peer sleds. Short-circuits on the first switch
+    /// failure as the reconciler retries the full set on the next pass.
+    pub(crate) async fn add_route(
+        &self,
+        group_ip: IpAddr,
+        underlay_ip: Ipv6Addr,
+        source: Option<IpAddr>,
+    ) -> Result<(), anyhow::Error> {
+        let route_key = make_route_key(group_ip, source);
+
+        let request = MribAddStaticRequest {
+            routes: vec![StaticMulticastRouteInput {
+                key: route_key,
+                underlay_group: underlay_ip,
+            }],
+        };
+
+        try_join_all(self.mgd_clients.iter().map(|(slot, client)| {
+            let request = &request;
+            async move {
+                client.static_add_mcast_route(request).await.map_err(|e| {
+                    warn!(
+                        self.log,
+                        "mgd static_add_mcast_route failed";
+                        "switch" => ?slot,
+                        "group_ip" => %group_ip,
+                        "error" => %e,
+                    );
+                    anyhow!(
+                        "mgd static_add_mcast_route failed on switch {slot:?}: {e}"
+                    )
+                })?;
+                debug!(
+                    self.log,
+                    "added multicast route to MRIB";
+                    "switch" => ?slot,
+                    "group_ip" => %group_ip,
+                    "underlay_ip" => %underlay_ip,
+                );
+                Ok::<(), anyhow::Error>(())
+            }
+        }))
+        .await?;
+        Ok(())
+    }
+
+    /// Remove a multicast route from the MRIB on all switches in parallel.
+    ///
+    /// `mg-lower` detects the removal and withdraws the DDM
+    /// advertisement from peer sleds. Short-circuits on the first
+    /// switch failure as the reconciler retries on the next pass.
+    pub(crate) async fn remove_route(
+        &self,
+        group_ip: IpAddr,
+        source: Option<IpAddr>,
+    ) -> Result<(), anyhow::Error> {
+        let route_key = make_route_key(group_ip, source);
+
+        let request = MribDeleteStaticRequest { keys: vec![route_key] };
+
+        try_join_all(self.mgd_clients.iter().map(|(slot, client)| {
+            let request = &request;
+            async move {
+                client.static_remove_mcast_route(request).await.map_err(
+                    |e| {
+                        warn!(
+                            self.log,
+                            "mgd static_remove_mcast_route failed";
+                            "switch" => ?slot,
+                            "group_ip" => %group_ip,
+                            "error" => %e,
+                        );
+                        anyhow!(
+                            "mgd static_remove_mcast_route failed on switch {slot:?}: {e}"
+                        )
+                    },
+                )?;
+                debug!(
+                    self.log,
+                    "removed multicast route from MRIB";
+                    "switch" => ?slot,
+                    "group_ip" => %group_ip,
+                );
+                Ok::<(), anyhow::Error>(())
+            }
+        }))
+        .await?;
+        Ok(())
+    }
+
+    /// List static multicast routes from all reachable switches and
+    /// index them by group/source/switch.
+    pub(crate) async fn list_routes_indexed(
+        &self,
+    ) -> Result<MribRouteIndex, anyhow::Error> {
+        let mut index = MribRouteIndex::new();
+
+        for (slot, client) in &self.mgd_clients {
+            match client.static_list_mcast_routes().await {
+                Ok(routes) => {
+                    for route in routes.into_inner() {
+                        let (group_ip, source) = route_identifier(&route.key);
+                        index
+                            .entry(group_ip)
+                            .or_default()
+                            .entry(source)
+                            .or_default()
+                            .insert(*slot, route.underlay_group);
+                    }
+                }
+                Err(e) => {
+                    warn!(
+                        self.log,
+                        "failed to list multicast routes from switch zone";
+                        "switch" => ?slot,
+                        "error" => %e,
+                    );
+                }
+            }
+        }
+
+        Ok(index)
+    }
+
+    pub(crate) fn switch_count(&self) -> usize {
+        self.mgd_clients.len()
+    }
+
+    /// Whether a multicast route is present in `mrib_loc` (RPF-verified)
+    /// on every configured switch.
+    ///
+    /// Returns `false` when the route is missing on any switch, including
+    /// switches that fail the RPC. The reconciler interprets `false` as
+    /// not-yet-forwarding (still in `mrib_in`, de-promoted by the RPF
+    /// revalidator, or simply unreachable) and retries on the next pass.
+    pub(crate) async fn route_active_on_all_switches(
+        &self,
+        group_ip: IpAddr,
+        source: Option<IpAddr>,
+    ) -> bool {
+        let vni = mg_admin_client::types::Vni(u32::from(
+            omicron_common::api::external::Vni::DEFAULT_MULTICAST_VNI,
+        ));
+
+        for (slot, client) in &self.mgd_clients {
+            match client
+                .get_mrib_selected(
+                    None,
+                    Some(&group_ip),
+                    None,
+                    source.as_ref(),
+                    Some(&vni),
+                )
+                .await
+            {
+                Ok(resp) => {
+                    if resp.into_inner().is_empty() {
+                        return false;
+                    }
+                }
+                Err(e) => {
+                    warn!(
+                        self.log,
+                        "mgd get_mrib_selected failed";
+                        "switch" => ?slot,
+                        "group_ip" => %group_ip,
+                        "error" => %e,
+                    );
+                    return false;
+                }
+            }
+        }
+
+        true
+    }
+
+    /// Query DDM peers from all switch zones.
+    ///
+    /// Returns all peers from both switches. A sled connected to both
+    /// switches appears twice with different `if_name` (interface name) values,
+    /// one per switch port.
+    pub(crate) async fn get_ddm_peers(
+        &self,
+    ) -> Result<Vec<PeerInfo>, anyhow::Error> {
+        let mut all_peers = Vec::new();
+
+        for (slot, client) in &self.ddm_clients {
+            match client.get_peers().await {
+                Ok(peers) => {
+                    all_peers.extend(peers.into_values());
+                }
+                Err(e) => {
+                    warn!(
+                        self.log,
+                        "failed to get DDM peers from switch zone";
+                        "switch" => ?slot,
+                        "error" => %e,
+                    );
+                }
+            }
+        }
+
+        Ok(all_peers)
+    }
+}
+
+fn make_route_key(
+    group_ip: IpAddr,
+    source: Option<IpAddr>,
+) -> MulticastRouteKey {
+    let vni = mg_admin_client::types::Vni(u32::from(
+        omicron_common::api::external::Vni::DEFAULT_MULTICAST_VNI,
+    ));
+    match group_ip {
+        IpAddr::V4(v4) => MulticastRouteKey::V4(MulticastRouteKeyV4 {
+            group: v4,
+            source: source.and_then(|s| match s {
+                IpAddr::V4(s4) => Some(s4),
+                _ => None,
+            }),
+            vni,
+        }),
+        IpAddr::V6(v6) => MulticastRouteKey::V6(MulticastRouteKeyV6 {
+            group: v6,
+            source: source.and_then(|s| match s {
+                IpAddr::V6(s6) => Some(s6),
+                _ => None,
+            }),
+            vni,
+        }),
+    }
+}
+
+/// Resolve service sockets from DNS, returning a map of SRV target to socket.
+async fn resolve_service_sockets(
+    resolver: &Resolver,
+    log: &Logger,
+    service: ServiceName,
+) -> HashMap<String, SocketAddrV6> {
+    match resolver.lookup_all_socket_v6_by_target(service).await {
+        Ok(pairs) => pairs.into_iter().collect(),
+        Err(e) => {
+            warn!(
+                log,
+                "failed to resolve service sockets from DNS, using defaults";
+                "service" => ?service,
+                "error" => %e,
+            );
+            HashMap::new()
+        }
+    }
+}
+
+fn route_identifier(key: &MulticastRouteKey) -> (IpAddr, Option<IpAddr>) {
+    match key {
+        MulticastRouteKey::V4(k) => {
+            (IpAddr::V4(k.group), k.source.map(IpAddr::V4))
+        }
+        MulticastRouteKey::V6(k) => {
+            (IpAddr::V6(k.group), k.source.map(IpAddr::V6))
+        }
+    }
+}
diff --git a/nexus/src/app/sagas/multicast_group_dpd_ensure.rs b/nexus/src/app/sagas/multicast_group_dpd_ensure.rs
index 17c1fc2b3a1..48c34e1b95d 100644
--- a/nexus/src/app/sagas/multicast_group_dpd_ensure.rs
+++ b/nexus/src/app/sagas/multicast_group_dpd_ensure.rs
@@ -150,19 +150,21 @@ async fn mgde_fetch_group_data(
         .await
         .map_err(saga_action_failed)?;
 
-    // Validate groups are in correct state
+    // "Active" is allowed for crash recovery. Rejecting would tear
+    // down correctly-applied DPD state.
     match external_group.state {
-        nexus_db_model::MulticastGroupState::Creating => {}
+        nexus_db_model::MulticastGroupState::Creating
+        | nexus_db_model::MulticastGroupState::Active => {}
         other_state => {
             warn!(
                 osagactx.log(),
-                "external group not in 'Creating' state for DPD";
+                "external group not in 'Creating' or 'Active' state for DPD";
                 "external_group_id" => %params.external_group_id,
                 "external_group_name" => external_group.name().as_str(),
                 "current_state" => ?other_state
             );
             return Err(saga_action_failed(Error::internal_error(&format!(
-                "External group {} is in state {other_state:?}, expected 'Creating'",
+                "External group {} is in state {other_state:?}, expected 'Creating' or 'Active'",
                 params.external_group_id
             ))));
         }
@@ -454,12 +456,16 @@ mod test {
         );
     }
 
-    /// Test that the saga rejects external groups that are not in "Creating" state.
+    /// Test that the saga accepts "Active" groups (idempotent crash recovery)
+    /// but still rejects groups that are no longer in flight.
     ///
-    /// The saga validates that external groups are in "Creating" state before applying
-    /// DPD configuration. This test verifies that validation works correctly.
+    /// `mgde_fetch_group_data` allows "Creating" and "Active". Re-running the
+    /// saga over a group whose `mgde_update_group_state` already committed
+    /// must succeed through the original DAG so recovery does not roll back
+    /// correctly-applied DPD state. Other states (e.g., "Deleted") are still
+    /// out of scope and must be rejected.
     #[nexus_test(server = crate::Server)]
-    async fn test_saga_rejects_non_creating_state(
+    async fn test_saga_accepts_active_rejects_terminal_state(
         cptestctx: &ControlPlaneTestContext,
     ) {
         let client = &cptestctx.external_client;
@@ -539,19 +545,44 @@ mod test {
             .await
             .expect("Group should transition to Active state");
 
-        // Try to run saga on Active group - should fail
+        // Re-running the saga on an "Active" group simulates crash-recovery
+        // re-execution: every action is idempotent, so the saga must succeed
+        // through the original DAG rather than triggering rollback that would
+        // tear down correctly-applied DPD state.
         let params = Params {
             serialized_authn: Serialized::for_opctx(&opctx),
             external_group_id: external_group.id(),
             underlay_group_id: underlay_group.id,
         };
 
+        nexus
+            .sagas
+            .saga_execute::<SagaMulticastGroupDpdEnsure>(params)
+            .await
+            .expect("Saga should re-run idempotently against an Active group");
+
+        // Transition the group to "Deleting" and re-run the saga. The saga
+        // must refuse to run against a group that is no longer in "Creating"
+        // or "Active".
+        let marked = datastore
+            .mark_multicast_group_for_removal_if_no_members(&opctx, group_id)
+            .await
+            .expect("group should mark for removal");
+        assert!(marked, "group should transition to Deleting");
+
+        let params = Params {
+            serialized_authn: Serialized::for_opctx(&opctx),
+            external_group_id: external_group.id(),
+            underlay_group_id: underlay_group.id,
+        };
         let result = nexus
             .sagas
             .saga_execute::<SagaMulticastGroupDpdEnsure>(params)
             .await;
-
-        // Saga should reject Active group
-        assert!(result.is_err(), "Saga should reject group in Active state");
+        assert!(
+            result.is_err(),
+            "Saga should reject group that is no longer in 'Creating' or \
+             'Active'",
+        );
     }
 }
diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs
index 1d0925527a2..596797e9534 100644
--- a/nexus/test-utils/src/lib.rs
+++ b/nexus/test-utils/src/lib.rs
@@ -9,6 +9,7 @@ use omicron_common::api::external::IdentityMetadata;
 use omicron_sled_agent::sim;
 use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition};
 use omicron_uuid_kinds::GenericUuid;
+use std::collections::BTreeMap;
 use std::fmt::Debug;
 use std::net::Ipv6Addr;
 use std::time::Duration;
@@ -20,6 +21,7 @@ pub use sim::TEST_RESERVOIR_RAM;
 pub mod background;
 pub mod db;
 pub mod http_testing;
+pub mod multicast;
 mod nexus_test;
 pub mod resource_helpers;
 pub mod sql;
@@ -117,21 +119,36 @@ async fn wait_for_producer_impl(
     .expect("Failed to find producer within time limit");
 }
 
-/// Build a DPD client for test validation using the first running dendrite instance
+/// Build a DPD client for `Switch0` in the test fixture.
+///
+/// Deterministic by default. Tests that need to validate state on every
+/// switch in a multi-switch fixture should use [`dpd_clients_by_switch`]
+/// instead and iterate, since each switch independently programs its own
+/// underlay group / NAT / forwarding state.
 pub fn dpd_client<N: NexusServer>(
     cptestctx: &ControlPlaneTestContext<N>,
 ) -> dpd_client::Client {
-    // Get the first available dendrite instance and extract the values we need
-    let dendrite_guard = cptestctx.dendrite.read().unwrap();
-    let (switch_slot, dendrite_instance) = dendrite_guard
-        .iter()
-        .next()
-        .expect("No dendrite instances running for test");
+    use sled_agent_types::early_networking::SwitchSlot;
+    dpd_client_for(cptestctx, SwitchSlot::Switch0)
+}
 
-    // Copy the values we need while the guard is still alive
-    let switch_slot = *switch_slot;
-    let port = dendrite_instance.port;
-    drop(dendrite_guard);
+/// Build a DPD client targeting a specific switch slot.
+pub fn dpd_client_for<N: NexusServer>(
+    cptestctx: &ControlPlaneTestContext<N>,
+    switch_slot: sled_agent_types::early_networking::SwitchSlot,
+) -> dpd_client::Client {
+    let port = {
+        let dendrite = cptestctx.dendrite.read().unwrap();
+        dendrite
+            .get(&switch_slot)
+            .unwrap_or_else(|| {
+                panic!(
+                    "no dendrite instance running for {switch_slot:?} in \
+                     test fixture",
+                )
+            })
+            .port
+    };
 
     let client_state = dpd_client::ClientState {
         tag: String::from("nexus-test"),
@@ -145,6 +162,40 @@ pub fn dpd_client<N: NexusServer>(
     dpd_client::Client::new(&format!("http://[{addr}]:{port}"), client_state)
 }
 
+/// Build DPD clients for every switch slot in the test fixture, ordered by
+/// `SwitchSlot`.
+///
+/// Use this when validating a per-switch invariant (e.g., "every switch has
+/// the full underlay-member set"). Iterates the dendrite map deterministically
+/// so log output and assertions are stable across test passes.
+pub fn dpd_clients_by_switch<N: NexusServer>(
+    cptestctx: &ControlPlaneTestContext<N>,
+) -> BTreeMap<sled_agent_types::early_networking::SwitchSlot, dpd_client::Client>
+{
+    let dendrite = cptestctx.dendrite.read().unwrap();
+    dendrite
+        .iter()
+        .map(|(slot, instance)| (*slot, instance.port))
+        .collect::<BTreeMap<_, _>>()
+        .into_iter()
+        .map(|(slot, port)| {
+            let client_state = dpd_client::ClientState {
+                tag: String::from("nexus-test"),
+                log: cptestctx.logctx.log.new(slog::o!(
+                    "component" => "DpdClient",
+                    "switch_slot" => format!("{slot:?}"),
+                )),
+            };
+            let addr = Ipv6Addr::LOCALHOST;
+            let client = dpd_client::Client::new(
+                &format!("http://[{addr}]:{port}"),
+                client_state,
+            );
+            (slot, client)
+        })
+        .collect()
+}
+
 #[cfg(test)]
 mod test {
     use crate::TEST_SUITE_PASSWORD;
diff --git a/nexus/test-utils/src/multicast.rs b/nexus/test-utils/src/multicast.rs
new file mode 100644
index 00000000000..2d9179c95e2
--- /dev/null
+++ b/nexus/test-utils/src/multicast.rs
@@ -0,0 +1,224 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Multicast-specific Nexus integration test helpers.
+//!
+//! Wraps the long-running sim instances exposed by the test starter
+//! (`DdmInstance`) with function-style helpers that synchronize them
+//! against state in the datastore.
+
+use std::collections::{BTreeMap, BTreeSet, HashSet};
+use std::sync::Arc;
+use std::time::Duration;
+
+use nexus_db_queries::context::OpContext;
+use nexus_test_interface::NexusServer;
+use nexus_types::deployment::SledFilter;
+use nexus_types::identity::Asset;
+use nexus_types::inventory::Collection;
+use omicron_test_utils::dev::maghemite::{
+    PeerMap, SimPeerStatus, sim_peer_info,
+};
+use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition};
+use omicron_uuid_kinds::GenericUuid;
+use slog::warn;
+
+use crate::ControlPlaneTestContext;
+
+const READY_POLL_INTERVAL: Duration = Duration::from_millis(100);
+const READY_TIMEOUT: Duration = Duration::from_secs(120);
+
+/// Populate every switch zone's `DdmInstance` peer table from the in-service
+/// sleds recorded in the datastore.
+///
+/// The multicast reconciler prefers DDM peer topology and falls back to
+/// inventory only when DDM is empty or unreachable. Production runs the
+/// real `ddmd`, which populates peers; tests run the in-process
+/// `DdmInstance` simulator, which starts with an empty peer table.
+///
+/// This util synthesizes the production primary path: it waits for
+/// inventory to report SP entries for every in-service sled, looks up
+/// each sled's `sp_slot` from inventory the same way the reconciler's
+/// fallback does (`find_sp_for_sled` matches by serial number), and
+/// injects a peer per switch with the synthetic interface name
+/// `tfportrear<sp_slot>_0` matching `parse_ddm_if_name_to_port`'s
+/// expected format and the rear port the inventory fallback would
+/// resolve to.
+///
+/// Both paths agree on port info by construction. Deriving `sp_slot`
+/// from the same inventory the fallback uses guarantees that toggling
+/// between the primary path and the fallback yields an identical
+/// sled-to-port mapping.
+///
+/// `DdmInstance::set_peers` has replace semantics, so calling this
+/// multiple times always yields a fresh map. Removed sleds drop and
+/// new ones appear.
+///
+/// Tests that explicitly want to exercise the inventory fallback should
+/// follow this call with [`clear_ddm_peers`] (or skip the helper entirely).
+pub async fn populate_ddm_peers<N: NexusServer>(
+    cptestctx: &ControlPlaneTestContext<N>,
+) {
+    let log = &cptestctx.logctx.log;
+    let datastore = cptestctx.server.datastore();
+    let opctx = OpContext::for_tests(log.clone(), datastore.clone());
+
+    let sleds = datastore
+        .sled_list_all_batched(&opctx, SledFilter::InService)
+        .await
+        .expect("failed to list in-service sleds for DDM peer population");
+    let current_ids: BTreeSet<uuid::Uuid> =
+        sleds.iter().map(|sled| sled.id().into_untyped_uuid()).collect();
+
+    // Snapshot the cache without holding the lock across the inventory
+    // wait. On a hit, we're done; on a miss we drop the lock, do the
+    // wait + build, then take the lock again to publish. Concurrent
+    // misses may each build their own (idempotent) `PeerMap`; last
+    // writer wins, which is harmless because builds converge on the
+    // same answer for a given sled-set.
+    let cached = cptestctx.multicast_ddm_peers.lock().unwrap().clone();
+    let peers = match cached {
+        Some((ids, peers)) if ids == current_ids => peers,
+        _ => {
+            // Wait until inventory has both a sled-agent record and an
+            // SP entry for every in-service sled, then capture that
+            // collection so we can resolve `sp_slot` per sled below.
+            let expected_serials: HashSet<String> = sleds
+                .iter()
+                .map(|sled| sled.serial_number().to_string())
+                .collect();
+            let expected_sled_ids: HashSet<uuid::Uuid> =
+                current_ids.iter().copied().collect();
+            let collection = wait_for_inventory_with_sleds(
+                cptestctx,
+                &expected_sled_ids,
+                &expected_serials,
+            )
+            .await;
+
+            // Build the peer map. Match SPs to sleds by serial number
+            // in the same way the reconciler's inventory fallback does.
+            // The synthetic interface name `tfportrear<sp_slot>_0`
+            // round-trips through our parser to the same rear port the
+            // fallback would resolve to.
+            let new_peers: PeerMap = sleds
+                .iter()
+                .map(|sled| {
+                    let sp = collection
+                        .sps
+                        .iter()
+                        .find(|(bb, _)| {
+                            bb.serial_number == sled.serial_number()
+                                && bb.part_number == sled.part_number()
+                        })
+                        .or_else(|| {
+                            collection.sps.iter().find(|(bb, _)| {
+                                bb.serial_number == sled.serial_number()
+                            })
+                        })
+                        .map(|(_, sp)| sp)
+                        .unwrap_or_else(|| {
+                            panic!(
+                                "no inventory SP entry for sled {} (serial \
+                                 {}); inventory subset check should have \
+                                 caught this",
+                                sled.id(),
+                                sled.serial_number(),
+                            )
+                        });
+                    let host = sled.serial_number().to_string();
+                    let if_name = format!("tfportrear{}_0", sp.sp_slot);
+                    (
+                        host.clone(),
+                        sim_peer_info(
+                            sled.ip(),
+                            &host,
+                            &if_name,
+                            0, // kind: 0 = server router
+                            SimPeerStatus::Active,
+                        ),
+                    )
+                })
+                .collect();
+
+            *cptestctx.multicast_ddm_peers.lock().unwrap() =
+                Some((current_ids, new_peers.clone()));
+            new_peers
+        }
+    };
+
+    // Iterate switches in `SwitchSlot` order so log output across test
+    // passes is deterministic. `set_peers` has replace semantics, so
+    // cloning per switch is safe and supports tests that interleave
+    // `clear_ddm_peers`.
+    let switches: BTreeMap<_, _> = cptestctx.ddm.iter().collect();
+    for ddm in switches.values() {
+        ddm.set_peers(peers.clone());
+    }
+}
+
+/// Clear every switch zone's `DdmInstance` peer table.
+///
+/// Typically, use this in tests that exercise `fetch_sled_mapping_from_inventory`.
+///
+/// The reconciler treats an empty DDM peer response as having "no live topology",
+/// which forces the inventory lookup production uses when DDM is genuinely
+/// down.
+pub fn clear_ddm_peers<N: NexusServer>(cptestctx: &ControlPlaneTestContext<N>) {
+    for ddm in cptestctx.ddm.values() {
+        ddm.set_peers(PeerMap::new());
+    }
+}
+
+/// Wait until inventory contains both a sled-agent record *and* an SP entry
+/// for every sled in `expected_sled_ids`, then return the collection.
+///
+/// Both checks are required: `populate_ddm_peers` synthesizes peers from
+/// `collection.sps`, so a sled-agent-only check could let the helper exit
+/// early and panic in the SP lookup if MGS hasn't published the SP yet.
+async fn wait_for_inventory_with_sleds<N: NexusServer>(
+    cptestctx: &ControlPlaneTestContext<N>,
+    expected_sled_ids: &HashSet<uuid::Uuid>,
+    expected_serials: &HashSet<String>,
+) -> Arc<Collection> {
+    let log = cptestctx.logctx.log.clone();
+    let server = &cptestctx.server;
+    wait_for_condition::<_, (), _, _>(
+        || async {
+            match server.inventory_collect_and_get_latest_collection().await {
+                Ok(Some(collection)) => {
+                    let inv_sled_ids: HashSet<_> = collection
+                        .sled_agents
+                        .iter()
+                        .map(|sled_agent| {
+                            sled_agent.sled_id.into_untyped_uuid()
+                        })
+                        .collect();
+                    let inv_sp_serials: HashSet<_> = collection
+                        .sps
+                        .keys()
+                        .map(|bb| bb.serial_number.to_string())
+                        .collect();
+
+                    if expected_sled_ids.is_subset(&inv_sled_ids)
+                        && expected_serials.is_subset(&inv_sp_serials)
+                    {
+                        Ok(Arc::new(collection))
+                    } else {
+                        Err(CondCheckError::NotYet)
+                    }
+                }
+                Ok(None) => Err(CondCheckError::NotYet),
+                Err(e) => {
+                    warn!(log, "inventory fetch failed: {e}");
+                    Err(CondCheckError::NotYet)
+                }
+            }
+        },
+        &READY_POLL_INTERVAL,
+        &READY_TIMEOUT,
+    )
+    .await
+    .expect("inventory did not catch up to in-service sleds and SPs")
+}
diff --git a/nexus/test-utils/src/nexus_test.rs b/nexus/test-utils/src/nexus_test.rs
index 693aea88732..c5139b7dc42 100644
--- a/nexus/test-utils/src/nexus_test.rs
+++ b/nexus/test-utils/src/nexus_test.rs
@@ -35,10 +35,12 @@ use oximeter_collector::Oximeter;
 use oximeter_producer::Server as ProducerServer;
 use sled_agent_types::early_networking::SwitchSlot;
 use std::collections::BTreeMap;
+use std::collections::BTreeSet;
 use std::collections::HashMap;
-use std::sync::{Arc, RwLock};
+use std::sync::{Arc, Mutex, RwLock};
 use std::time::Duration;
 use transient_dns_server::TransientDnsServer;
+use uuid::Uuid;
 
 pub struct ControlPlaneBuilder<'a> {
     // required
@@ -117,6 +119,15 @@ pub struct ControlPlaneTestContext<N> {
     /// Ports of stopped dendrite instances (for use by start_dendrite)
     pub stopped_dendrite_ports: RwLock<HashMap<SwitchSlot, u16>>,
     pub mgd: HashMap<SwitchSlot, dev::maghemite::MgdInstance>,
+    pub ddm: HashMap<SwitchSlot, dev::maghemite::DdmInstance>,
+    /// Cache used by [`crate::multicast::populate_ddm_peers`] so the
+    /// inventory collection used to derive `sp_slot` for every sled runs
+    /// once per fixture per stable sled-set instead of on every call.
+    ///
+    /// This is keyed by the in-service sled-id set so the cache rebuilds
+    /// whenever a sled transitions in or is deemed out of service.
+    pub multicast_ddm_peers:
+        Mutex<Option<(BTreeSet<Uuid>, dev::maghemite::PeerMap)>>,
     pub external_dns_zone_name: String,
     pub external_dns: TransientDnsServer,
     pub internal_dns: TransientDnsServer,
@@ -320,6 +331,9 @@ impl<N: NexusServer> ControlPlaneTestContext<N> {
         for (_, mut mgd) in self.mgd {
             mgd.cleanup().await.unwrap();
         }
+        for (_, mut ddm) in self.ddm {
+            ddm.cleanup().await;
+        }
         self.logctx.cleanup_successful();
     }
 }
diff --git a/nexus/test-utils/src/starter.rs b/nexus/test-utils/src/starter.rs
index d05e24f10a2..81be5dbb10b 100644
--- a/nexus/test-utils/src/starter.rs
+++ b/nexus/test-utils/src/starter.rs
@@ -108,7 +108,7 @@ use std::collections::HashMap;
 use std::fmt::Debug;
 use std::iter::{once, repeat, zip};
 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6};
-use std::sync::{Arc, RwLock};
+use std::sync::{Arc, Mutex, RwLock};
 use std::time::Duration;
 use transient_dns_server::TransientDnsServer;
 use uuid::Uuid;
@@ -146,6 +146,7 @@ pub struct ControlPlaneStarter<'a, N: NexusServer> {
     pub gateway: BTreeMap<SwitchSlot, GatewayTestContext>,
     pub dendrite: RwLock<HashMap<SwitchSlot, dev::dendrite::DendriteInstance>>,
     pub mgd: HashMap<SwitchSlot, dev::maghemite::MgdInstance>,
+    pub ddm: HashMap<SwitchSlot, dev::maghemite::DdmInstance>,
 
     // NOTE: Only exists after starting Nexus, until external Nexus is
     // initialized.
@@ -203,6 +204,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
             gateway: BTreeMap::new(),
             dendrite: RwLock::new(HashMap::new()),
             mgd: HashMap::new(),
+            ddm: HashMap::new(),
             nexus_internal: None,
             nexus_internal_addr: None,
             external_dns_zone_name: None,
@@ -461,6 +463,17 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
         self.config.pkg.mgd.insert(switch_slot, config);
     }
 
+    pub async fn start_ddm(&mut self, switch_slot: SwitchSlot) {
+        let log = &self.logctx.log;
+        debug!(log, "Starting DDM sim"; "switch_slot" => ?switch_slot);
+
+        let ddm = dev::maghemite::DdmInstance::start().await.unwrap();
+        let port = ddm.port;
+        self.ddm.insert(switch_slot, ddm);
+
+        debug!(log, "DDM sim port is {port}");
+    }
+
     pub async fn record_switch_dns(
         &mut self,
         sled_id: SledUuid,
@@ -482,6 +495,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
                 self.dendrite.read().unwrap().get(&switch_slot).unwrap().port,
                 self.gateway.get(&switch_slot).unwrap().port,
                 self.mgd.get(&switch_slot).unwrap().port,
+                self.ddm.get(&switch_slot).unwrap().port,
             )
             .unwrap()
     }
@@ -1249,6 +1263,8 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
             dendrite: RwLock::new(self.dendrite.into_inner().unwrap()),
             stopped_dendrite_ports: RwLock::new(HashMap::new()),
             mgd: self.mgd,
+            ddm: self.ddm,
+            multicast_ddm_peers: Mutex::new(None),
             external_dns_zone_name: self.external_dns_zone_name.unwrap(),
             external_dns: self.external_dns.unwrap(),
             internal_dns: self.internal_dns.unwrap(),
@@ -1290,6 +1306,9 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
         for (_, mut mgd) in self.mgd {
             mgd.cleanup().await.unwrap();
         }
+        for (_, mut ddm) in self.ddm {
+            ddm.cleanup().await;
+        }
         self.logctx.cleanup_successful();
     }
 
@@ -1630,6 +1649,12 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>(
                         builder.start_mgd(SwitchSlot::Switch0).boxed()
                     }),
                 ),
+                (
+                    "start_ddm_switch0",
+                    Box::new(|builder| {
+                        builder.start_ddm(SwitchSlot::Switch0).boxed()
+                    }),
+                ),
                 (
                     "record_switch_dns",
                     Box::new(|builder| {
@@ -1674,6 +1699,12 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>(
                             builder.start_mgd(SwitchSlot::Switch1).boxed()
                         }),
                     ),
+                    (
+                        "start_ddm_switch1",
+                        Box::new(|builder| {
+                            builder.start_ddm(SwitchSlot::Switch1).boxed()
+                        }),
+                    ),
                     (
                         "record_switch_dns",
                         Box::new(|builder| {
diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml
index a32a9b86081..88e775aa266 100644
--- a/nexus/tests/config.test.toml
+++ b/nexus/tests/config.test.toml
@@ -212,7 +212,6 @@ fm.rendezvous_period_secs = 300
 probe_distributor.period_secs = 60
 multicast_reconciler.period_secs = 60
 # Use shorter TTLs for tests to ensure cache invalidation logic is exercised
-multicast_reconciler.sled_cache_ttl_secs = 60
 multicast_reconciler.backplane_cache_ttl_secs = 120
 trust_quorum.period_secs = 60
 attached_subnet_manager.period_secs = 60
diff --git a/nexus/tests/integration_tests/initialization.rs b/nexus/tests/integration_tests/initialization.rs
index 350757cf1de..714880feb37 100644
--- a/nexus/tests/integration_tests/initialization.rs
+++ b/nexus/tests/integration_tests/initialization.rs
@@ -158,6 +158,11 @@ async fn test_nexus_boots_before_dendrite() {
     starter.start_mgd(SwitchSlot::Switch1).await;
     info!(log, "Started mgd");
 
+    info!(log, "Starting ddm");
+    starter.start_ddm(SwitchSlot::Switch0).await;
+    starter.start_ddm(SwitchSlot::Switch1).await;
+    info!(log, "Started ddm");
+
     info!(log, "Populating internal DNS records");
     starter
         .record_switch_dns(
@@ -197,6 +202,8 @@ async fn nexus_schema_test_setup(
     starter.start_dendrite(SwitchSlot::Switch1).await;
     starter.start_mgd(SwitchSlot::Switch0).await;
     starter.start_mgd(SwitchSlot::Switch1).await;
+    starter.start_ddm(SwitchSlot::Switch0).await;
+    starter.start_ddm(SwitchSlot::Switch1).await;
     starter.populate_internal_dns().await;
 }
 
diff --git a/nexus/tests/integration_tests/multicast/cache_invalidation.rs b/nexus/tests/integration_tests/multicast/cache_invalidation.rs
deleted file mode 100644
index de744c19d9d..00000000000
--- a/nexus/tests/integration_tests/multicast/cache_invalidation.rs
+++ /dev/null
@@ -1,645 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Integration tests for multicast reconciler cache invalidation.
-//!
-//! Tests inventory and backplane caches used by the multicast reconciler:
-//!
-//! - Sled move detection: When a sled moves to a different switch port, the
-//!   reconciler detects this via inventory and updates DPD port mappings
-//! - Cache TTL refresh: Verifies caches are refreshed when TTL expires
-//! - Backplane cache expiry: Tests that stale backplane mappings are cleaned up
-
-use http::{Method, StatusCode};
-
-use gateway_client::types::{PowerState, RotState, SpState};
-use nexus_db_lookup::LookupPath;
-use nexus_db_queries::context::OpContext;
-use nexus_test_utils::resource_helpers::{
-    create_default_ip_pools, create_project,
-};
-use nexus_test_utils_macros::nexus_test;
-use nexus_types::deployment::SledFilter;
-use nexus_types::external_api::sled;
-use nexus_types::inventory::SpType;
-use omicron_nexus::Server;
-use omicron_nexus::TestInterfaces;
-use omicron_uuid_kinds::{GenericUuid, InstanceUuid, MulticastGroupUuid};
-
-use super::*;
-use crate::integration_tests::instances::instance_wait_for_state;
-
-/// Test that multicast operations can handle physical sled movement.
-///
-/// This test simulates a sled being physically moved to a different rack slot:
-/// - Create a multicast group and instance, wait for member to join
-/// - Verify the member is programmed on the correct rear port (based on original `sp_slot`)
-/// - Run reconciler multiple times without inventory change to verify no spurious invalidation
-/// - Insert a new inventory collection with a different `sp_slot` for the same sled
-/// - Reconciler detects sled location change and invalidates caches automatically
-/// - Verify DPD now uses the new rear port matching the new `sp_slot`
-#[nexus_test(server = Server)]
-async fn test_sled_move_updates_multicast_port_mapping(
-    cptestctx: &ControlPlaneTestContext,
-) {
-    const PROJECT_NAME: &str = "test-project";
-    const GROUP_NAME: &str = "sled-move-test-group";
-    const INSTANCE_NAME: &str = "sled-move-test-instance";
-
-    ensure_multicast_test_ready(cptestctx).await;
-
-    let client = &cptestctx.external_client;
-    let nexus = &cptestctx.server.server_context().nexus;
-    let datastore = nexus.datastore();
-    let log = &cptestctx.logctx.log;
-    let opctx = OpContext::for_tests(log.clone(), datastore.clone());
-
-    // Create project and pools in parallel
-    ops::join3(
-        create_default_ip_pools(client),
-        create_project(client, PROJECT_NAME),
-        create_multicast_ip_pool(client, "sled-move-pool"),
-    )
-    .await;
-
-    // Create instance (no multicast groups at creation - implicit model)
-    let instance = instance_for_multicast_groups(
-        cptestctx,
-        PROJECT_NAME,
-        INSTANCE_NAME,
-        true,
-        &[],
-    )
-    .await;
-
-    // Add instance to multicast group via instance-centric API
-    multicast_group_attach(&cptestctx, PROJECT_NAME, INSTANCE_NAME, GROUP_NAME)
-        .await;
-    wait_for_group_active(client, GROUP_NAME).await;
-
-    let instance_uuid = InstanceUuid::from_untyped_uuid(instance.identity.id);
-
-    // Wait for member to join
-    wait_for_member_state(
-        cptestctx,
-        GROUP_NAME,
-        instance.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Joined,
-    )
-    .await;
-
-    // Verify initial port mapping (based on current inventory `sp_slot`)
-    verify_inventory_based_port_mapping(cptestctx, &instance_uuid)
-        .await
-        .expect("Should verify initial port mapping");
-
-    // Run reconciler again without new inventory to establish the
-    // baseline collection ID in the reconciler. Running it twice ensures the
-    // first run sets `last_seen_collection_id`, and the second run confirms
-    // no unnecessary cache invalidation occurs when collection is unchanged.
-    wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-    wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
-    // Verify port mapping is unchanged (no spurious cache invalidation)
-    verify_inventory_based_port_mapping(cptestctx, &instance_uuid)
-        .await
-        .expect("Port mapping should be unchanged when inventory unchanged");
-
-    // Assert that the member is in "Joined" state
-    let members_before = list_multicast_group_members(client, GROUP_NAME).await;
-    assert_eq!(members_before.len(), 1, "should have exactly one member");
-    assert_eq!(
-        members_before[0].state, "Joined",
-        "member should be in Joined state before sled move"
-    );
-
-    // Get the sled this instance is running on
-    let sled_id = nexus
-        .active_instance_info(&instance_uuid, None)
-        .await
-        .expect("Active instance info should be available")
-        .expect("Instance should be on a sled")
-        .sled_id;
-
-    // Get sled baseboard information
-    let sleds = datastore
-        .sled_list_all_batched(&opctx, SledFilter::InService)
-        .await
-        .expect("Should list in-service sleds");
-    let sled = sleds
-        .into_iter()
-        .find(|s| s.id() == sled_id)
-        .expect("Should find sled in database");
-
-    // Get current inventory to see the original sp_slot
-    let original_inventory = datastore
-        .inventory_get_latest_collection(&opctx)
-        .await
-        .expect("Should fetch latest inventory collection")
-        .expect("Inventory collection should exist");
-
-    let original_sp = original_inventory
-        .sps
-        .iter()
-        .find(|(bb, _)| bb.serial_number == sled.serial_number())
-        .map(|(_, sp)| sp)
-        .expect("Should find SP for sled in original inventory");
-
-    let original_slot = original_sp.sp_slot;
-    let sled_serial = sled.serial_number().to_string();
-    let sled_part_number = sled.part_number().to_string();
-
-    // Verify DPD has the original port before the move
-    let dpd = nexus_test_utils::dpd_client(cptestctx);
-    let original_port_id = dpd_client::types::PortId::Rear(
-        dpd_client::types::Rear::try_from(format!("rear{original_slot}"))
-            .expect("Should be valid rear port string"),
-    );
-
-    // Determine a valid target slot by querying DPD's backplane map.
-    // Prefer a different slot if available; otherwise fall back to the same.
-    let backplane = dpd
-        .backplane_map()
-        .await
-        .expect("Should fetch backplane map")
-        .into_inner();
-    let mut valid_slots: Vec<u16> = backplane
-        .keys()
-        .filter_map(|k| {
-            k.strip_prefix("rear").and_then(|s| s.parse::<u16>().ok())
-        })
-        .collect();
-    valid_slots.sort_unstable();
-    valid_slots.dedup();
-    let new_slot = valid_slots
-        .iter()
-        .copied()
-        .find(|s| *s != original_slot)
-        .unwrap_or(original_slot);
-
-    // Build a new inventory collection with the sled in a different slot
-    let mut builder = nexus_inventory::CollectionBuilder::new("sled-move-test");
-    builder.found_sp_state(
-        "test-sp",
-        SpType::Sled,
-        new_slot,
-        SpState {
-            serial_number: sled_serial,
-            model: sled_part_number,
-            power_state: PowerState::A0,
-            revision: 0,
-            base_mac_address: [0; 6],
-            hubris_archive_id: "test-hubris".to_string(),
-            rot: RotState::CommunicationFailed {
-                message: "test-rot-state".to_string(),
-            },
-        },
-    );
-
-    let new_collection = builder.build();
-
-    // Insert the new inventory collection
-    datastore
-        .inventory_insert_collection(&opctx, &new_collection)
-        .await
-        .expect("Should insert new inventory collection");
-
-    // Activate the inventory loader to update the watch channel with the new
-    // collection, then activate the reconciler which will detect the sled
-    // location change and invalidate caches.
-    activate_inventory_loader(&cptestctx.lockstep_client).await;
-    nexus.invalidate_multicast_caches();
-    wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
-    // Verify that DPD now uses the new rear port (matching new `sp_slot`)
-    // This helper reads the latest inventory and asserts DPD has a member
-    // on rear{`sp_slot`}, so it will verify the new mapping is right
-    verify_inventory_based_port_mapping(cptestctx, &instance_uuid)
-        .await
-        .expect("Port mapping should be updated after cache invalidation");
-
-    // Assert that the member is still in "Joined" state after the move
-    let members_after = list_multicast_group_members(client, GROUP_NAME).await;
-    assert_eq!(members_after.len(), 1, "should still have exactly one member");
-    assert_eq!(
-        members_after[0].state, "Joined",
-        "member should still be in Joined state after sled move"
-    );
-    assert_eq!(
-        members_after[0].instance_id, instance.identity.id,
-        "member should still reference the same instance"
-    );
-
-    // Verify stale port cleanup: fetch DPD state and ensure old port was removed
-    let members = datastore
-        .multicast_group_members_list_by_instance(
-            &opctx,
-            instance_uuid,
-            &DataPageParams::max_page(),
-        )
-        .await
-        .expect("Should list multicast members for instance");
-    let member = members
-        .first()
-        .expect("Instance should have at least one multicast membership");
-
-    let external_group = datastore
-        .multicast_group_fetch(
-            &opctx,
-            MulticastGroupUuid::from_untyped_uuid(member.external_group_id),
-        )
-        .await
-        .expect("Should fetch external multicast group");
-    let underlay_group_id = external_group
-        .underlay_group_id
-        .expect("External group should have underlay_group_id");
-
-    let underlay_group = datastore
-        .underlay_multicast_group_fetch(&opctx, underlay_group_id)
-        .await
-        .expect("Should fetch underlay multicast group");
-
-    let dpd_client = nexus_test_utils::dpd_client(cptestctx);
-    let underlay_group_response = dpd_client
-        .multicast_group_get(&underlay_group.multicast_ip.ip())
-        .await
-        .expect("DPD multicast_group_get should succeed")
-        .into_inner();
-
-    let dpd_members = match underlay_group_response {
-        dpd_client::types::MulticastGroupResponse::Underlay {
-            members, ..
-        } => members,
-        dpd_client::types::MulticastGroupResponse::External { .. } => {
-            panic!("Expected Underlay group, got External");
-        }
-    };
-
-    // Verify that the old port membership has been removed (stale port cleanup)
-    let has_old_port_member = dpd_members.iter().any(|m| {
-        matches!(m.direction, dpd_client::types::Direction::Underlay)
-            && m.port_id == original_port_id
-    });
-
-    assert!(
-        !has_old_port_member,
-        "Old underlay member with rear{original_slot} should have been removed after sled move"
-    );
-}
-
-/// Test for cache TTL behavior.
-///
-/// This test verifies that both sled and backplane cache TTL expiry work correctly:
-///
-/// Sled cache TTL with inventory change:
-/// - Start test server with short TTLs (sled=2s, backplane=1s)
-/// - Create multicast group and instance, wait for member to join
-/// - Insert new inventory with different `sp_slot` (simulating sled move)
-/// - Wait for sled cache TTL to expire
-/// - Verify DPD uses the new rear port after reconciler refreshes cache
-///
-/// Backplane cache TTL without change:
-/// - Wait for backplane cache TTL to expire (tests independent expiry)
-/// - Activate reconciler (refreshes expired backplane cache from DPD)
-/// - Verify port mapping still works after cache refresh
-#[tokio::test]
-async fn test_cache_ttl_behavior() {
-    const PROJECT_NAME: &str = "ttl-test-project";
-    const GROUP_NAME: &str = "ttl-test-group";
-    const INSTANCE_NAME: &str = "ttl-test-instance";
-
-    // Start test server with custom config
-    let cptestctx =
-        nexus_test_utils::ControlPlaneBuilder::new("test_cache_ttl_behavior")
-            .customize_nexus_config(&|config| {
-                // Set short cache TTLs for testing
-                config
-                    .pkg
-                    .background_tasks
-                    .multicast_reconciler
-                    .sled_cache_ttl_secs =
-                    chrono::TimeDelta::seconds(2).to_std().unwrap();
-                config
-                    .pkg
-                    .background_tasks
-                    .multicast_reconciler
-                    .backplane_cache_ttl_secs =
-                    chrono::TimeDelta::seconds(1).to_std().unwrap();
-
-                // Ensure multicast is enabled
-                config.pkg.multicast.enabled = true;
-            })
-            .start::<omicron_nexus::Server>()
-            .await;
-
-    ensure_multicast_test_ready(&cptestctx).await;
-
-    // Local handles for DB and opctx
-    let nexus = &cptestctx.server.server_context().nexus;
-    let datastore = nexus.datastore();
-    let opctx =
-        OpContext::for_tests(cptestctx.logctx.log.clone(), datastore.clone());
-
-    let client = &cptestctx.external_client;
-
-    // Create project and pools in parallel
-    ops::join3(
-        create_default_ip_pools(client),
-        create_project(client, PROJECT_NAME),
-        create_multicast_ip_pool(client, "ttl-test-pool"),
-    )
-    .await;
-
-    // Create instance (no multicast groups at creation - implicit model)
-    let instance = instance_for_multicast_groups(
-        &cptestctx,
-        PROJECT_NAME,
-        INSTANCE_NAME,
-        true,
-        &[],
-    )
-    .await;
-
-    // Add instance to multicast group via instance-centric API
-    multicast_group_attach(&cptestctx, PROJECT_NAME, INSTANCE_NAME, GROUP_NAME)
-        .await;
-    wait_for_group_active(client, GROUP_NAME).await;
-
-    let instance_uuid = InstanceUuid::from_untyped_uuid(instance.identity.id);
-
-    // Wait for member to join
-    wait_for_member_state(
-        &cptestctx,
-        GROUP_NAME,
-        instance.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Joined,
-    )
-    .await;
-
-    // Verify initial port mapping (this populates both caches)
-    verify_inventory_based_port_mapping(&cptestctx, &instance_uuid)
-        .await
-        .expect("Should verify initial port mapping");
-
-    // Test sled cache TTL with inventory change
-
-    // Get the sled this instance is running on
-    let sled_id = nexus
-        .active_instance_info(&instance_uuid, None)
-        .await
-        .expect("Active instance info should be available")
-        .expect("Instance should be on a sled")
-        .sled_id;
-
-    // Get sled baseboard information
-    let sleds = datastore
-        .sled_list_all_batched(&opctx, SledFilter::InService)
-        .await
-        .expect("Should list in-service sleds");
-    let sled = sleds
-        .into_iter()
-        .find(|s| s.id() == sled_id)
-        .expect("Should find sled in database");
-
-    // Get current inventory to see the original sp_slot
-    let original_inventory = datastore
-        .inventory_get_latest_collection(&opctx)
-        .await
-        .expect("Should fetch latest inventory collection")
-        .expect("Inventory collection should exist");
-
-    let original_sp = original_inventory
-        .sps
-        .iter()
-        .find(|(bb, _)| bb.serial_number == sled.serial_number())
-        .map(|(_, sp)| sp)
-        .expect("Should find SP for sled in original inventory");
-
-    let original_slot = original_sp.sp_slot;
-    let sled_serial = sled.serial_number().to_string();
-    let sled_part_number = sled.part_number().to_string();
-
-    // Determine a valid target slot by querying DPD's backplane map.
-    let dpd = nexus_test_utils::dpd_client(&cptestctx);
-    let backplane = dpd
-        .backplane_map()
-        .await
-        .expect("Should fetch backplane map")
-        .into_inner();
-    let mut valid_slots: Vec<u16> = backplane
-        .keys()
-        .filter_map(|k| {
-            k.strip_prefix("rear").and_then(|s| s.parse::<u16>().ok())
-        })
-        .collect();
-    valid_slots.sort_unstable();
-    valid_slots.dedup();
-    let new_slot = valid_slots
-        .iter()
-        .copied()
-        .find(|s| *s != original_slot)
-        .unwrap_or(original_slot);
-
-    // Build a new inventory collection with the sled in a different slot
-    let mut builder =
-        nexus_inventory::CollectionBuilder::new("ttl-refresh-test");
-    builder.found_sp_state(
-        "test-sp",
-        SpType::Sled,
-        new_slot,
-        SpState {
-            serial_number: sled_serial,
-            model: sled_part_number,
-            power_state: PowerState::A0,
-            revision: 0,
-            base_mac_address: [0; 6],
-            hubris_archive_id: "test-hubris".to_string(),
-            rot: RotState::CommunicationFailed {
-                message: "test-rot-state".to_string(),
-            },
-        },
-    );
-
-    let new_collection = builder.build();
-
-    // Insert the new inventory collection
-    datastore
-        .inventory_insert_collection(&opctx, &new_collection)
-        .await
-        .expect("Should insert new inventory collection");
-
-    // Wait for sled cache TTL to expire (2 seconds)
-    tokio::time::sleep(std::time::Duration::from_millis(2500)).await;
-
-    wait_for_condition_with_reconciler(
-        &cptestctx.lockstep_client,
-        || async {
-            // Try to verify the inventory-based port mapping
-            // This will succeed once DPD has been updated with the new rear port
-            match verify_inventory_based_port_mapping(
-                &cptestctx,
-                &instance_uuid,
-            )
-            .await
-            {
-                Ok(()) => Ok(()),
-                Err(_) => {
-                    // Not yet updated, reconciler needs another cycle
-                    Err(CondCheckError::<String>::NotYet)
-                }
-            }
-        },
-        &POLL_INTERVAL,
-        &MULTICAST_OPERATION_TIMEOUT,
-    )
-    .await
-    .expect("DPD should update with new rear port after sled cache TTL expiry");
-
-    // Test backplane cache TTL without change
-
-    // Wait for backplane cache TTL to expire (1 second)
-    tokio::time::sleep(std::time::Duration::from_secs(1)).await;
-
-    // Force cache access by activating reconciler
-    // This will cause the reconciler to check backplane cache, find it expired,
-    // and refresh from DPD.
-    wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
-    // Verify member is still on the right port after backplane cache refresh
-    verify_inventory_based_port_mapping(&cptestctx, &instance_uuid)
-        .await
-        .expect("Port mapping should work after backplane cache TTL expiry");
-
-    // Verify member is still in "Joined" state after all cache operations
-    let members = list_multicast_group_members(client, GROUP_NAME).await;
-    assert_eq!(members.len(), 1, "should still have exactly one member");
-    assert_eq!(
-        members[0].state, "Joined",
-        "member should remain in Joined state after cache operations"
-    );
-    assert_eq!(
-        members[0].instance_id, instance.identity.id,
-        "member should still reference the same instance"
-    );
-
-    cptestctx.teardown().await;
-}
-
-/// Verify expunged sleds are excluded from multicast cache after refresh.
-#[nexus_test(extra_sled_agents = 1)]
-async fn test_sled_expunge_removes_from_multicast_cache(
-    cptestctx: &ControlPlaneTestContext,
-) {
-    const PROJECT_NAME: &str = "expunge-test-project";
-    const GROUP_NAME: &str = "expunge-test-group";
-    const INSTANCE_NAME: &str = "expunge-test-instance";
-
-    ensure_multicast_test_ready(cptestctx).await;
-
-    let client = &cptestctx.external_client;
-    let nexus = &cptestctx.server.server_context().nexus;
-    let datastore = nexus.datastore();
-    let opctx =
-        OpContext::for_tests(cptestctx.logctx.log.clone(), datastore.clone());
-
-    // Make the second sled non-provisionable so instances go to the first sled
-    let (authz_sled, ..) = LookupPath::new(&opctx, datastore)
-        .sled_id(cptestctx.second_sled_id())
-        .lookup_for(nexus_db_queries::authz::Action::Modify)
-        .await
-        .expect("lookup authz_sled");
-    datastore
-        .sled_set_provision_policy(
-            &opctx,
-            &authz_sled,
-            nexus_types::external_api::sled::SledProvisionPolicy::NonProvisionable,
-        )
-        .await
-        .expect("set sled provision policy");
-
-    ops::join3(
-        create_default_ip_pools(client),
-        create_project(client, PROJECT_NAME),
-        create_multicast_ip_pool(client, "expunge-test-pool"),
-    )
-    .await;
-
-    let instance = instance_for_multicast_groups(
-        cptestctx,
-        PROJECT_NAME,
-        INSTANCE_NAME,
-        true,
-        &[],
-    )
-    .await;
-
-    multicast_group_attach(&cptestctx, PROJECT_NAME, INSTANCE_NAME, GROUP_NAME)
-        .await;
-    wait_for_group_active(client, GROUP_NAME).await;
-
-    let instance_uuid = InstanceUuid::from_untyped_uuid(instance.identity.id);
-
-    wait_for_member_state(
-        cptestctx,
-        GROUP_NAME,
-        instance.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Joined,
-    )
-    .await;
-
-    verify_inventory_based_port_mapping(&cptestctx, &instance_uuid)
-        .await
-        .expect("Should verify initial port mapping");
-
-    let first_sled_id = cptestctx.first_sled_id();
-    cptestctx
-        .lockstep_client
-        .make_request(
-            Method::POST,
-            "/sleds/expunge",
-            Some(sled::SledSelector { sled: first_sled_id }),
-            StatusCode::OK,
-        )
-        .await
-        .expect("Failed to expunge sled");
-
-    // Wait for instance to fail (instance-watcher marks instances on expunged sleds as "Failed")
-    instance_wait_for_state(client, instance_uuid, InstanceState::Failed).await;
-
-    // Manually invalidate caches.
-    //
-    // Inventory-based invalidation is tested in
-    // `test_sled_move_updates_multicast_port_mapping`. This test verifies cache
-    // refresh uses SledFilter::InService, which excludes expunged sleds.
-    nexus.invalidate_multicast_caches();
-    wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
-    wait_for_member_state(
-        cptestctx,
-        GROUP_NAME,
-        instance.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Left,
-    )
-    .await;
-
-    let in_service_sleds = datastore
-        .sled_list_all_batched(&opctx, SledFilter::InService)
-        .await
-        .expect("Failed to list in-service sleds");
-
-    assert!(
-        !in_service_sleds.iter().any(|s| s.id() == first_sled_id),
-        "Expunged sled should not appear in InService sled list"
-    );
-
-    let all_sleds = datastore
-        .sled_list_all_batched(&opctx, SledFilter::All)
-        .await
-        .expect("Failed to list all sleds");
-
-    assert!(
-        all_sleds.iter().any(|s| s.id() == first_sled_id),
-        "Expunged sled should still appear in All filter"
-    );
-}
diff --git a/nexus/tests/integration_tests/multicast/failures.rs b/nexus/tests/integration_tests/multicast/failures.rs
index a0f70b79320..da0177d57b8 100644
--- a/nexus/tests/integration_tests/multicast/failures.rs
+++ b/nexus/tests/integration_tests/multicast/failures.rs
@@ -2,7 +2,7 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 //
-// Copyright 2025 Oxide Computer Company
+// Copyright 2026 Oxide Computer Company
 
 //! Integration tests for multicast group failure and recovery scenarios.
 //!
@@ -214,6 +214,15 @@ async fn test_dpd_failure_during_active_state(
 /// When DPD is unavailable during implicit group deletion:
 /// - Group stays in Deleting state (cannot complete cleanup)
 /// - After DPD recovery, deletion completes
+///
+/// This also exercises a partial-cleanup retry invariant. The deletion path
+/// in `process_deleting_external_group` is sequential (MRIB withdrawal, sled
+/// M2P/forwarding clear, DPD cleanup, DB delete) and bails on first failure.
+/// With DPD stopped, MGD-side MRIB removal succeeds and DPD removal fails,
+/// so the group stays in "Deleting". After DPD recovery the next reconciler
+/// pass must re-issue MRIB removals on already-empty routes without erroring,
+/// which depends on `mg_admin_client::static_remove_mcast_route` being
+/// idempotent (verified at the RDB layer in maghemite).
 #[nexus_test]
 async fn test_dpd_failure_during_deleting_state(
     cptestctx: &ControlPlaneTestContext,
@@ -231,13 +240,19 @@ async fn test_dpd_failure_during_deleting_state(
     )
     .await;
 
+    // The single converging pass needs inventory (sled→switch port mapping)
+    // and DPD ready before the dpd-ensure saga runs.
+    ensure_inventory_ready(cptestctx).await;
+    ensure_dpd_ready(cptestctx).await;
+
     // Create instance and add to group
     create_instance(client, project_name, instance_name).await;
     multicast_group_attach(cptestctx, project_name, instance_name, group_name)
         .await;
 
-    // Wait for group to reach Active state
-    wait_for_group_active(client, group_name).await;
+    let active_group = wait_for_group_active(client, group_name).await;
+    let multicast_ip = active_group.multicast_ip;
+    assert_mrib_route_exists(cptestctx, multicast_ip).await;
 
     // Stop DPD before triggering deletion
     cptestctx.stop_dendrite(SwitchSlot::Switch0).await;
@@ -294,11 +309,22 @@ async fn test_dpd_failure_during_deleting_state(
         assert_eq!(group.identity.name.as_str(), group_name);
     }
 
+    // Even though the deletion did not complete, MRIB removal
+    // ran before the DPD step failed. The route must already be gone.
+    assert_mrib_route_absent(cptestctx, multicast_ip).await;
+
     // Restart DPD and activate reconciler to complete deletion
     cptestctx.restart_dendrite(SwitchSlot::Switch0).await;
     activate_multicast_reconciler(&cptestctx.lockstep_client).await;
     cleanup_instances(cptestctx, client, project_name, &[instance_name]).await;
     wait_for_group_deleted(cptestctx, group_name).await;
+
+    // The second reconciler pass re-issues MRIB removal on already-empty routes.
+    // If MGD treated the missing route as an error, the pass would short-circuit
+    // at MRIB and never retry the DPD/DB cleanup, leaving the group stuck in
+    // a "Deleting" state and `wait_for_group_deleted` above would time out. The
+    // route must remain absent and not accidentally re-install.
+    assert_mrib_route_absent(cptestctx, multicast_ip).await;
 }
 
 #[nexus_test]
@@ -990,8 +1016,27 @@ async fn test_left_member_waits_for_group_active(
     put_upsert::<_, MulticastGroupMember>(client, &join_url, &join_params)
         .await;
 
-    // Verify group is stuck in "Creating" (DPD is down)
-    wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
+    // Join records the member as "Joining". The reconciler demotes to
+    // "Left" once it observes the stopped instance. This transition is
+    // independent of DPD, so the assertion can advance even with
+    // dendrite stopped.
+    wait_for_condition(
+        || async {
+            let members =
+                list_multicast_group_members(client, group_name).await;
+            match members.first() {
+                Some(m) if m.state == "Left" => Ok(()),
+                _ => Err(CondCheckError::<()>::NotYet),
+            }
+        },
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
+    )
+    .await
+    .expect(
+        "member should reach Left after reconciler observes stopped instance",
+    );
+
     let group: MulticastGroup =
         object_get(client, &format!("/v1/multicast-groups/{group_name}")).await;
     assert_eq!(
@@ -999,7 +1044,6 @@ async fn test_left_member_waits_for_group_active(
         "Group should be stuck in Creating without DPD"
     );
 
-    // Verify member is in "Left" state (stopped instance)
     let members = list_multicast_group_members(client, group_name).await;
     assert_eq!(members.len(), 1);
     assert_eq!(
@@ -1021,19 +1065,15 @@ async fn test_left_member_waits_for_group_active(
     .unwrap();
     instance_wait_for_running_with_simulation(cptestctx, instance_id).await;
 
-    // Run reconciler - member should stay in Left because group is not Active
-    wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
-    // Verify member stays in "Left" (waiting for group to become Active)
-    let members_after = list_multicast_group_members(client, group_name).await;
-    assert_eq!(members_after.len(), 1);
-    assert_eq!(
-        members_after[0].state, "Left",
-        "Member should stay in Left while group is Creating, got: {}",
-        members_after[0].state
-    );
+    activate_multicast_reconciler(&cptestctx.lockstep_client).await;
+    wait_for_member_state(
+        cptestctx,
+        group_name,
+        instance.identity.id,
+        nexus_db_model::MulticastGroupMemberState::Left,
+    )
+    .await;
 
-    // Verify group is still Creating
     let group_after: MulticastGroup =
         object_get(client, &format!("/v1/multicast-groups/{group_name}")).await;
     assert_eq!(
diff --git a/nexus/tests/integration_tests/multicast/groups.rs b/nexus/tests/integration_tests/multicast/groups.rs
index 9ae620b7dc2..bf502355ffc 100644
--- a/nexus/tests/integration_tests/multicast/groups.rs
+++ b/nexus/tests/integration_tests/multicast/groups.rs
@@ -358,11 +358,12 @@ async fn test_instance_multicast_endpoints(
     let instance = create_instance(client, project_name, instance_name).await;
     let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id);
 
-    // Simulate and wait for instance to be fully running with sled_id assigned
+    // Simulate and wait for instance to be fully running. The first
+    // `wait_for_member_state` @ "Joined" also waits for sled assignment
+    // before its assertion.
     let nexus = &cptestctx.server.server_context().nexus;
     instance_simulate(nexus, &instance_id).await;
     instance_wait_for_state(client, instance_id, InstanceState::Running).await;
-    wait_for_instance_sled_assignment(cptestctx, &instance_id).await;
 
     // Case: List instance multicast groups (should be empty initially)
     let instance_groups_url = format!(
@@ -652,6 +653,9 @@ async fn test_instance_deletion_removes_multicast_memberships(
     assert_eq!(members.len(), 1, "Instance should be a member of the group");
     assert_eq!(members[0].instance_id, instance.identity.id);
 
+    // Verify MRIB route exists while group is active with a joined member.
+    assert_mrib_route_exists(cptestctx, multicast_ip).await;
+
     // Case: Instance deletion should clean up multicast memberships
     cleanup_instances(cptestctx, client, project_name, &[instance_name]).await;
 
@@ -666,6 +670,9 @@ async fn test_instance_deletion_removes_multicast_memberships(
 
     // Wait for reconciler to clean up DPD state (activates reconciler repeatedly until DPD confirms deletion)
     wait_for_group_deleted_from_dpd(cptestctx, multicast_ip).await;
+
+    // Verify MRIB route was withdrawn after group deletion.
+    assert_mrib_route_absent(cptestctx, multicast_ip).await;
 }
 
 /// Test that the multicast_ip field is correctly populated in MulticastGroupMember API responses.
@@ -1067,6 +1074,42 @@ async fn test_ssm_source_ip_behavior(cptestctx: &ControlPlaneTestContext) {
         "DPD external group sources should be union of all member sources"
     );
 
+    // Case: (S,G) source-set narrowing on member detach.
+    // As specific-source members leave, the DPD union must shrink to the
+    // remaining members' sources. SSM groups never wildcard, so the DPD
+    // source list stays `Some(...)` throughout.
+    multicast_group_detach(
+        client,
+        project_name,
+        instance_names[2],
+        ssm_union_ip,
+    )
+    .await;
+    let mut expected_after_inst3 = vec![source1, source2];
+    expected_after_inst3.sort();
+    wait_for_dpd_source_filter(
+        cptestctx,
+        multicast_ip,
+        Some(expected_after_inst3),
+        "DPD union should shrink to {source1, source2} after inst-3 detaches",
+    )
+    .await;
+
+    multicast_group_detach(
+        client,
+        project_name,
+        instance_names[1],
+        ssm_union_ip,
+    )
+    .await;
+    wait_for_dpd_source_filter(
+        cptestctx,
+        multicast_ip,
+        Some(vec![source1]),
+        "DPD union should shrink to {source1} after inst-2 detaches",
+    )
+    .await;
+
     // Case: IPv6 source with IPv4 group should fail
     let ipv4_ssm_ip = "232.1.0.20";
     let ipv6_source: IpAddr = "2001:db8::1".parse().unwrap();
@@ -1226,6 +1269,244 @@ async fn test_ssm_source_ip_behavior(cptestctx: &ControlPlaneTestContext) {
     }
 }
 
+/// Read the DPD external group source filter as a sorted [`Option<Vec<IpAddr>>`].
+///
+/// `None` indicates DPD-level source filtering is disabled (the (*,G) case
+/// produced by [`compute_sources_for_dpd`] when any ASM member has empty
+/// `source_ips`). `Some(sorted)` is the (S,G) union written by Nexus.
+///
+/// `IpSrc::Any` entries are automatically filtered out. Nexus only emits
+/// `IpSrc::Exact` today.
+async fn dpd_external_source_filter(
+    cptestctx: &ControlPlaneTestContext,
+    multicast_ip: IpAddr,
+) -> Option<Vec<IpAddr>> {
+    let dpd_response = dpd_client(cptestctx)
+        .multicast_group_get(&multicast_ip)
+        .await
+        .expect("DPD should have external group")
+        .into_inner();
+    match dpd_response {
+        dpd_types::MulticastGroupResponse::External { sources, .. } => sources
+            .map(|srcs| {
+                let mut ips: Vec<IpAddr> = srcs
+                    .iter()
+                    .filter_map(|src| match src {
+                        dpd_types::IpSrc::Exact(ip) => Some(*ip),
+                        dpd_types::IpSrc::Any => None,
+                    })
+                    .collect();
+                ips.sort();
+                ips
+            }),
+        dpd_types::MulticastGroupResponse::Underlay { .. } => {
+            panic!("Expected External group from DPD, got Underlay")
+        }
+    }
+}
+
+/// Activate the multicast reconciler and poll DPD until the external group's
+/// source filter matches what's expected. Use this after an attach/detach where the
+/// test asserts on DPD's converged (S,G) / (*,G) state.
+async fn wait_for_dpd_source_filter(
+    cptestctx: &ControlPlaneTestContext,
+    multicast_ip: IpAddr,
+    expected: Option<Vec<IpAddr>>,
+    msg: &str,
+) {
+    activate_then_wait_for_condition(
+        &cptestctx.lockstep_client,
+        || async {
+            let actual =
+                dpd_external_source_filter(cptestctx, multicast_ip).await;
+            if actual == expected {
+                Ok(())
+            } else {
+                Err(CondCheckError::<()>::NotYet)
+            }
+        },
+        &Duration::from_millis(100),
+        &Duration::from_secs(30),
+    )
+    .await
+    .unwrap_or_else(|err| {
+        panic!(
+            "{msg}: expected {expected:?}, last observed mismatch ({err:?})",
+        )
+    });
+}
+
+/// Test ASM source-filter transitions across (*,G) and (S,G).
+///
+/// Source filtering for an ASM group is the union of all members' source IPs,
+/// unless any member has empty `source_ips`, in which case the switch-level
+/// filter is disabled (DPD `sources = None`, i.e. (*,G)). This test exercises
+/// the transitions on a single group:
+///
+/// 1. Specific-only union grows. Members join with disjoint sources.
+/// 2. Widen (S,G) -> (*,G). An any-source member joins, and DPD `sources`
+///    becomes `None`.
+/// 3. Two-any-source aggregation. A second any-source member joins, then the
+///    first leaves. DPD must remain `None` to prove `has_any_source_member`
+///    is OR-aggregated across live members rather than a stuck flag.
+/// 4. Narrow (*,G) -> (S,G). The last any-source member detaches, and DPD
+///    `sources` returns to the remaining specific-source union.
+/// 5. Specific union shrinks. A specific-source member detaches, and the DPD
+///    union contracts.
+#[nexus_test]
+async fn test_asm_source_filter_transitions(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let client = &cptestctx.external_client;
+    let project_name = "asm-src-transitions";
+
+    ops::join3(
+        create_project(&client, project_name),
+        create_default_ip_pools(&client),
+        create_multicast_ip_pool_with_range(
+            &client,
+            "asm-transitions-pool",
+            (224, 2, 0, 0),
+            (224, 2, 0, 100),
+        ),
+    )
+    .await;
+
+    // Instance names encode each member's role in the test sequence. The
+    // `specific-` prefix denotes a member that subscribes to a single source
+    // (an (S,G) contributor). The `any-source-` prefix denotes a member that
+    // subscribes to all sources, which forces the group's switch-level filter
+    // off (turning the group into (*,G)). The trailing letter or digit
+    // distinguishes peers that play the same role: two specific-source
+    // members exercise union growth and shrink, and two any-source members
+    // exercise OR-aggregation of `has_any_source_member` across live members.
+    let specific_a = "asm-specific-source-a";
+    let specific_b = "asm-specific-source-b";
+    let any_source_1 = "asm-any-source-1";
+    let any_source_2 = "asm-any-source-2";
+    let instance_names = [specific_a, specific_b, any_source_1, any_source_2];
+    for name in &instance_names {
+        create_instance(client, project_name, name).await;
+    }
+
+    let group_ip_str = "224.2.0.10";
+    let group_ip: IpAddr = group_ip_str.parse().unwrap();
+    let source_a = IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1));
+    let source_b = IpAddr::V4(Ipv4Addr::new(10, 0, 0, 2));
+
+    // Step 1: specific-source-a joins. DPD union is {source_a}.
+    multicast_group_attach_with_sources(
+        cptestctx,
+        project_name,
+        specific_a,
+        group_ip_str,
+        Some(vec![source_a]),
+    )
+    .await;
+    wait_for_group_active(client, group_ip_str).await;
+    wait_for_dpd_source_filter(
+        cptestctx,
+        group_ip,
+        Some(vec![source_a]),
+        "DPD should program (S,G) with the only specific source",
+    )
+    .await;
+
+    // specific-source-b joins. DPD union grows to {source_a, source_b}.
+    multicast_group_attach_with_sources(
+        cptestctx,
+        project_name,
+        specific_b,
+        group_ip_str,
+        Some(vec![source_b]),
+    )
+    .await;
+    let mut expected_specific = vec![source_a, source_b];
+    expected_specific.sort();
+    wait_for_dpd_source_filter(
+        cptestctx,
+        group_ip,
+        Some(expected_specific.clone()),
+        "DPD union should grow to include both specific sources",
+    )
+    .await;
+
+    // Step 2: any-source-1 joins. DPD widens (S,G) to (*,G).
+    multicast_group_attach_with_sources(
+        cptestctx,
+        project_name,
+        any_source_1,
+        group_ip_str,
+        None,
+    )
+    .await;
+    wait_for_dpd_source_filter(
+        cptestctx,
+        group_ip,
+        None,
+        "DPD source filter should be disabled once any member is any-source",
+    )
+    .await;
+
+    // Step 3: any-source-2 joins, then any-source-1 detaches.
+    // `has_any_source_member` must remain true through this swap. If it were
+    // a stuck "ever-set" flag, removing any-source-1 would still keep DPD as
+    // `None` for the wrong reason. Conversely, if it were last-writer-wins,
+    // removing the original any-source member would incorrectly narrow back
+    // to (S,G).
+    multicast_group_attach_with_sources(
+        cptestctx,
+        project_name,
+        any_source_2,
+        group_ip_str,
+        None,
+    )
+    .await;
+    wait_for_dpd_source_filter(
+        cptestctx,
+        group_ip,
+        None,
+        "DPD should stay disabled with two any-source members",
+    )
+    .await;
+
+    multicast_group_detach(client, project_name, any_source_1, group_ip_str)
+        .await;
+    wait_for_dpd_source_filter(
+        cptestctx,
+        group_ip,
+        None,
+        "DPD should stay disabled while any-source-2 is still any-source",
+    )
+    .await;
+
+    // Step 4: any-source-2 detaches. DPD narrows (*,G) to (S,G) back to the
+    // remaining specific-source union.
+    multicast_group_detach(client, project_name, any_source_2, group_ip_str)
+        .await;
+    wait_for_dpd_source_filter(
+        cptestctx,
+        group_ip,
+        Some(expected_specific),
+        "DPD source filter should re-enable with the remaining specific union",
+    )
+    .await;
+
+    // Step 5: specific-source-b detaches. DPD union contracts to {source_a}.
+    multicast_group_detach(client, project_name, specific_b, group_ip_str)
+        .await;
+    wait_for_dpd_source_filter(
+        cptestctx,
+        group_ip,
+        Some(vec![source_a]),
+        "DPD union should contract after the second specific member leaves",
+    )
+    .await;
+
+    cleanup_instances(cptestctx, client, project_name, &instance_names).await;
+    wait_for_group_deleted(cptestctx, group_ip_str).await;
+}
+
 /// Test default pool behavior when no pool is specified on member join.
 ///
 /// When a member joins a group without specifying a pool:
diff --git a/nexus/tests/integration_tests/multicast/instances.rs b/nexus/tests/integration_tests/multicast/instances.rs
index 245e284248e..139cc1487fb 100644
--- a/nexus/tests/integration_tests/multicast/instances.rs
+++ b/nexus/tests/integration_tests/multicast/instances.rs
@@ -21,10 +21,12 @@
 //!   - Instance reconfigure adding SSM: Must specify sources for new SSM groups
 //!   - SSM sources are per-member (S,G subscription model)
 
+use std::collections::{BTreeMap, BTreeSet};
 use std::net::IpAddr;
 
 use http::{Method, StatusCode};
 
+use nexus_db_model::MulticastGroupMemberState;
 use nexus_db_queries::context::OpContext;
 use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder};
 use nexus_test_utils::resource_helpers::{
@@ -136,7 +138,7 @@ async fn test_multicast_lifecycle(cptestctx: &ControlPlaneTestContext) {
         "group-lifecycle-1",
         instances[0].identity.id,
         // Instance is stopped, so should be "Left"
-        nexus_db_model::MulticastGroupMemberState::Left,
+        MulticastGroupMemberState::Left,
     )
     .await;
 
@@ -162,16 +164,15 @@ async fn test_multicast_lifecycle(cptestctx: &ControlPlaneTestContext) {
     )
     .await;
 
-    // Verify both instances are attached to group-lifecycle-2
-    for i in 0..2 {
-        wait_for_member_state(
-            cptestctx,
-            "group-lifecycle-2",
-            instances[i + 1].identity.id,
-            nexus_db_model::MulticastGroupMemberState::Left, // Stopped instances
-        )
+    // Verify both instances are attached to group-lifecycle-2 (Left state
+    // because the instances are stopped).
+    let expected_left: Vec<_> = (0..2)
+        .map(|i| {
+            (instances[i + 1].identity.id, MulticastGroupMemberState::Left)
+        })
+        .collect();
+    wait_for_members_state(cptestctx, "group-lifecycle-2", &expected_left)
         .await;
-    }
 
     // Multi-group attachment (instance to multiple groups)
     // Attach instance-multi-groups to group-lifecycle-3 (implicitly creates the group)
@@ -204,7 +205,7 @@ async fn test_multicast_lifecycle(cptestctx: &ControlPlaneTestContext) {
             cptestctx,
             group_name,
             instances[3].identity.id,
-            nexus_db_model::MulticastGroupMemberState::Left, // Stopped instance
+            MulticastGroupMemberState::Left, // Stopped instance
         )
         .await;
     }
@@ -377,7 +378,7 @@ async fn test_multicast_group_attach_conflicts(
 }
 
 #[nexus_test]
-async fn test_multicast_group_attach_limits(
+async fn test_multicast_group_attach_multiple(
     cptestctx: &ControlPlaneTestContext,
 ) {
     let client = &cptestctx.external_client;
@@ -390,14 +391,8 @@ async fn test_multicast_group_attach_limits(
     )
     .await;
 
-    // Group names for implicit groups (implicitly created when first member joins)
-    let group_names = [
-        "limit-test-group-0",
-        "limit-test-group-1",
-        "limit-test-group-2",
-        "limit-test-group-3",
-        "limit-test-group-4",
-    ];
+    let group_names =
+        ["limit-test-group-0", "limit-test-group-1", "limit-test-group-2"];
 
     // Create instance first (groups will be implicitly created when attached)
     let instance = instance_for_multicast_groups(
@@ -409,8 +404,8 @@ async fn test_multicast_group_attach_limits(
     )
     .await;
 
-    // Attach instance to 3 groups (implicitly creates each group)
-    let multicast_group_names = &group_names[0..3];
+    // Attach instance to multiple groups (implicitly creates each group)
+    let multicast_group_names = &group_names;
     for group_name in multicast_group_names {
         multicast_group_attach(
             cptestctx,
@@ -431,7 +426,7 @@ async fn test_multicast_group_attach_limits(
             cptestctx,
             group_name,
             instance.identity.id,
-            nexus_db_model::MulticastGroupMemberState::Left,
+            MulticastGroupMemberState::Left,
         )
         .await;
     }
@@ -530,17 +525,13 @@ async fn test_multicast_concurrent_operations(
     )
     .await;
 
-    // Verify all members reached correct state despite concurrent operations
-    for instance in instances.iter() {
-        wait_for_member_state(
-            cptestctx,
-            "concurrent-test-group",
-            instance.identity.id,
-            // create_instance() starts instances, so they should be Joined
-            nexus_db_model::MulticastGroupMemberState::Joined,
-        )
-        .await;
-    }
+    // Verify all members reached correct state despite concurrent operations.
+    // create_instance() starts instances, so they should all be Joined.
+    let expected: Vec<_> = instances
+        .iter()
+        .map(|i| (i.identity.id, MulticastGroupMemberState::Joined))
+        .collect();
+    wait_for_members_state(cptestctx, "concurrent-test-group", &expected).await;
 
     // Verify final member count matches expected (all 4 instances)
     let members =
@@ -585,29 +576,23 @@ async fn test_multicast_concurrent_operations(
     // Wait for final state to be consistent (should still have 2 members)
     wait_for_member_count(client, "concurrent-test-group", 2).await;
 
-    // Concurrent operations during reconciler processing
-
-    // Start a member addition and immediately follow with another operation
-    // This tests handling of operations that arrive while reconciler is processing
-    let rapid_ops_future = async {
-        multicast_group_attach(
-            cptestctx,
-            PROJECT_NAME,
-            "concurrent-instance-3",
-            "concurrent-test-group",
-        )
-        .await;
-        // Don't wait for reconciler; immediately do another operation
-        multicast_group_detach(
-            client,
-            PROJECT_NAME,
-            "concurrent-instance-4",
-            "concurrent-test-group",
-        )
-        .await;
-    };
-
-    rapid_ops_future.await;
+    // Back-to-back operations without waiting for reconciler between them.
+    // Tests that the reconciler handles state changes that arrive while it
+    // is still processing a previous batch.
+    multicast_group_attach(
+        cptestctx,
+        PROJECT_NAME,
+        "concurrent-instance-3",
+        "concurrent-test-group",
+    )
+    .await;
+    multicast_group_detach(
+        client,
+        PROJECT_NAME,
+        "concurrent-instance-4",
+        "concurrent-test-group",
+    )
+    .await;
 
     // Wait for system to reach consistent final state (should have 2 members)
     wait_for_member_count(client, "concurrent-test-group", 2).await;
@@ -616,16 +601,12 @@ async fn test_multicast_concurrent_operations(
     let post_rapid_members =
         list_multicast_group_members(client, "concurrent-test-group").await;
 
-    // Wait for all remaining members to reach "Joined" state
-    for member in &post_rapid_members {
-        wait_for_member_state(
-            cptestctx,
-            "concurrent-test-group",
-            member.instance_id,
-            nexus_db_model::MulticastGroupMemberState::Joined,
-        )
-        .await;
-    }
+    // Wait for all remaining members to reach "Joined" state.
+    let expected: Vec<_> = post_rapid_members
+        .iter()
+        .map(|m| (m.instance_id, MulticastGroupMemberState::Joined))
+        .collect();
+    wait_for_members_state(cptestctx, "concurrent-test-group", &expected).await;
 
     // Cleanup and delete instances (group is implicitly deleted when last member removed)
     cleanup_instances(cptestctx, client, PROJECT_NAME, &instance_names).await;
@@ -698,7 +679,7 @@ async fn test_multicast_member_cleanup_instance_never_started(
         cptestctx,
         group_name,
         instance.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Left,
+        MulticastGroupMemberState::Left,
     )
     .await;
 
@@ -820,16 +801,15 @@ async fn test_multicast_migration_scenarios(
         cptestctx,
         group1_name,
         instance1.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Joined,
+        MulticastGroupMemberState::Joined,
     )
     .await;
 
-    // Verify DPD before migration
-    let dpd_client = nexus_test_utils::dpd_client(cptestctx);
-    dpd_client
-        .multicast_group_get(&multicast_ip)
-        .await
-        .expect("Group should exist in DPD before migration");
+    for (slot, dpd) in nexus_test_utils::dpd_clients_by_switch(cptestctx) {
+        dpd.multicast_group_get(&multicast_ip).await.unwrap_or_else(|e| {
+            panic!("{slot:?}: group should exist in DPD before migration: {e}")
+        });
+    }
 
     // Migrate instance
     let source_sled = nexus
@@ -879,22 +859,106 @@ async fn test_multicast_migration_scenarios(
         .sled_id;
     assert_eq!(post_sled, target_sled, "Instance should be on target sled");
 
-    wait_for_multicast_reconciler(lockstep_client).await;
     wait_for_member_state(
         cptestctx,
         group1_name,
         instance1.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Joined,
+        MulticastGroupMemberState::Joined,
     )
     .await;
 
     verify_inventory_based_port_mapping(cptestctx, &instance1_id)
         .await
         .expect("Port mapping should be updated");
-    dpd_client
-        .multicast_group_get(&multicast_ip)
+    for (slot, dpd) in nexus_test_utils::dpd_clients_by_switch(cptestctx) {
+        dpd.multicast_group_get(&multicast_ip).await.unwrap_or_else(|e| {
+            panic!("{slot:?}: group should exist in DPD after migration: {e}")
+        });
+    }
+
+    // Verify sled-agent state after migration: the target sled should
+    // have the VMM subscription and M2P mapping. The source sled should
+    // not have any subscription for the old propolis.
+    {
+        let datastore = nexus.datastore();
+        let opctx = OpContext::for_tests(
+            cptestctx.logctx.log.clone(),
+            datastore.clone(),
+        );
+
+        let external_group = datastore
+            .multicast_group_lookup_by_ip(&opctx, multicast_ip)
+            .await
+            .expect("Should look up multicast group by IP");
+
+        let underlay_group_id = external_group
+            .underlay_group_id
+            .expect("Active group should have underlay_group_id");
+
+        let underlay_group = datastore
+            .underlay_multicast_group_fetch(&opctx, underlay_group_id)
+            .await
+            .expect("Should fetch underlay group");
+
+        let underlay_ipv6 = match underlay_group.multicast_ip.ip() {
+            IpAddr::V6(v6) => v6,
+            other => {
+                panic!("Expected IPv6 underlay address, got {other}")
+            }
+        };
+
+        // Target sled should have the VMM subscription after the
+        // reconciler pushes it via verify_members. Poll because the
+        // reconciler may still be propagating state to the sled-agent.
+        let target_agent = cptestctx
+            .sled_agents
+            .iter()
+            .find(|sa| sa.sled_agent_id() == target_sled)
+            .unwrap()
+            .sled_agent();
+
+        activate_then_wait_for_condition(
+            &cptestctx.lockstep_client,
+            || async {
+                let groups =
+                    target_agent.instance_multicast_groups.lock().unwrap();
+                let has_sub = groups.get(&instance1_id).map_or(false, |g| {
+                    g.iter().any(|m| m.group_ip == multicast_ip)
+                });
+                if has_sub { Ok(()) } else { Err(CondCheckError::NotYet::<()>) }
+            },
+            &POLL_INTERVAL,
+            &POLL_TIMEOUT,
+        )
         .await
-        .expect("Group should exist in DPD after migration");
+        .expect(
+            "Target sled should have instance subscription after migration",
+        );
+
+        // Target sled should have M2P mapping.
+        activate_then_wait_for_condition(
+            &cptestctx.lockstep_client,
+            || async {
+                let m2p = target_agent.m2p_mappings.lock().unwrap();
+                if m2p.contains(&(multicast_ip, underlay_ipv6)) {
+                    Ok(())
+                } else {
+                    Err(CondCheckError::NotYet::<()>)
+                }
+            },
+            &POLL_INTERVAL,
+            &POLL_TIMEOUT,
+        )
+        .await
+        .expect("Target sled should have M2P mapping after migration");
+
+        // TODO: assert the source sled no longer holds a multicast
+        // subscription for the old propolis_id. On real hardware,
+        // VMM teardown (release_opte_ports -> PortTicket::release_inner)
+        // clears it. The sim does not model per-propolis cleanup on
+        // unregister for any of the networking maps (external_ips,
+        // attached_subnets, multicast_groups).
+    }
 
     // Case: Concurrent migrations
 
@@ -911,7 +975,9 @@ async fn test_multicast_migration_scenarios(
         group2_name,
     )
     .await;
+
     wait_for_group_active(client, group2_name).await;
+
     multicast_group_attach(
         cptestctx,
         project_name,
@@ -925,21 +991,18 @@ async fn test_multicast_migration_scenarios(
         .map(|i| InstanceUuid::from_untyped_uuid(i.identity.id))
         .collect();
 
-    // Start all instances via simulation
-    for &instance_id in &instance_ids {
+    // Start all instances via simulation in parallel.
+    ops::join_all(instance_ids.iter().map(|&instance_id| async move {
         instance_simulate(nexus, &instance_id).await;
         instance_wait_for_state(client, instance_id, InstanceState::Running)
             .await;
-    }
-    for inst in &instances {
-        wait_for_member_state(
-            cptestctx,
-            group2_name,
-            inst.identity.id,
-            nexus_db_model::MulticastGroupMemberState::Joined,
-        )
-        .await;
-    }
+    }))
+    .await;
+    let expected_joined: Vec<_> = instances
+        .iter()
+        .map(|inst| (inst.identity.id, MulticastGroupMemberState::Joined))
+        .collect();
+    wait_for_members_state(cptestctx, group2_name, &expected_joined).await;
 
     // Get source/target sleds for each instance
     let mut source_sleds = Vec::new();
@@ -974,30 +1037,39 @@ async fn test_multicast_migration_scenarios(
         r.expect("Migration should initiate");
     }
 
-    // Complete all migrations
-    for (i, &instance_id) in instance_ids.iter().enumerate() {
-        let info = nexus
-            .active_instance_info(&instance_id, None)
-            .await
-            .unwrap()
-            .unwrap();
-        vmm_simulate_on_sled(
-            cptestctx,
-            nexus,
-            source_sleds[i],
-            info.propolis_id,
-        )
-        .await;
-        vmm_simulate_on_sled(
-            cptestctx,
-            nexus,
-            target_sleds[i],
-            info.dst_propolis_id.unwrap(),
-        )
-        .await;
-        instance_wait_for_state(client, instance_id, InstanceState::Running)
+    // Complete all migrations in parallel.
+    ops::join_all(instance_ids.iter().enumerate().map(|(i, &instance_id)| {
+        let source_sled = source_sleds[i];
+        let target_sled = target_sleds[i];
+        async move {
+            let info = nexus
+                .active_instance_info(&instance_id, None)
+                .await
+                .unwrap()
+                .unwrap();
+            vmm_simulate_on_sled(
+                cptestctx,
+                nexus,
+                source_sled,
+                info.propolis_id,
+            )
             .await;
-    }
+            vmm_simulate_on_sled(
+                cptestctx,
+                nexus,
+                target_sled,
+                info.dst_propolis_id.unwrap(),
+            )
+            .await;
+            instance_wait_for_state(
+                client,
+                instance_id,
+                InstanceState::Running,
+            )
+            .await;
+        }
+    }))
+    .await;
 
     // Verify all on target sleds
     for (i, &instance_id) in instance_ids.iter().enumerate() {
@@ -1015,8 +1087,6 @@ async fn test_multicast_migration_scenarios(
         );
     }
 
-    wait_for_multicast_reconciler(lockstep_client).await;
-
     let post_members = list_multicast_group_members(client, group2_name).await;
     assert_eq!(
         post_members.len(),
@@ -1024,15 +1094,12 @@ async fn test_multicast_migration_scenarios(
         "Both members should persist after concurrent migration"
     );
 
-    for inst in &instances {
-        wait_for_member_state(
-            cptestctx,
-            group2_name,
-            inst.identity.id,
-            nexus_db_model::MulticastGroupMemberState::Joined,
-        )
+    let post_migration_joined: Vec<_> = instances
+        .iter()
+        .map(|inst| (inst.identity.id, MulticastGroupMemberState::Joined))
+        .collect();
+    wait_for_members_state(cptestctx, group2_name, &post_migration_joined)
         .await;
-    }
 
     // Cleanup
     cleanup_instances(
@@ -1630,18 +1697,31 @@ async fn test_ssm_without_sources_fails_create_and_reconfigure(
 ///
 /// This tests the invariant that `multicast_group_member_delete_by_group_and_instance`
 /// filters by both `group_id` and `instance_id`, not just `group_id`. This is
-/// important for saga undo correctness: if Instance B's create saga fails after
-/// joining a group, the undo must not affect Instance A's existing membership
+/// important for saga undo correctness: if instance B's create saga fails after
+/// joining a group, the undo must not affect instance A's existing membership
 /// in the same group.
-#[nexus_test]
+///
+/// This also verifies the shared-sled underlay invariant. Underlay membership
+/// is port-scoped, not member-scoped, as members on the same sled share a
+/// single rear-port entry in the underlay group. To exercise this, the test
+/// forces a multi-sled layout via migration:
+///
+/// - instances A and B sit on the same sled (sharing one rear port).
+/// - instance C sits on the other sled (its own rear port).
+///
+/// Deleting instance B must leave the rear-port set in the underlay group
+/// unchanged, since A still needs the shared port and C still needs its own.
+#[nexus_test(extra_sled_agents = 1)]
 async fn test_instance_delete_preserves_other_memberships(
     cptestctx: &ControlPlaneTestContext,
 ) {
+    ensure_multicast_test_ready(cptestctx).await;
+
     let client = &cptestctx.external_client;
+    let nexus = &cptestctx.server.server_context().nexus;
     let project_name = "delete-preserve-project";
     let group_name = "delete-preserve-group";
 
-    // Setup: create project and multicast pool
     ops::join3(
         create_default_ip_pools(client),
         create_project(client, project_name),
@@ -1654,53 +1734,194 @@ async fn test_instance_delete_preserves_other_memberships(
     )
     .await;
 
-    // Create Instance A and join it to the multicast group
-    create_instance(client, project_name, "instance-a").await;
-    multicast_group_attach(cptestctx, project_name, "instance-a", group_name)
-        .await;
+    let available_sleds =
+        [cptestctx.first_sled_id(), cptestctx.second_sled_id()];
+
+    // Bring up A, B, C as "Running" with the group attached.
+    let instances = ["instance-a", "instance-b", "instance-c"].iter().map(
+        |name| async move {
+            let inst = instance_for_multicast_groups(
+                cptestctx,
+                project_name,
+                name,
+                true,
+                &[group_name],
+            )
+            .await;
+            let id = InstanceUuid::from_untyped_uuid(inst.identity.id);
+            instance_simulate(nexus, &id).await;
+            instance_wait_for_state(client, id, InstanceState::Running).await;
+            (inst, id)
+        },
+    );
+    let started: Vec<(Instance, InstanceUuid)> = ops::join_all(instances).await;
+    let (instance_a, instance_a_uuid) = &started[0];
+    let (_instance_b, instance_b_uuid) = &started[1];
+    let (instance_c, instance_c_uuid) = &started[2];
+
     wait_for_group_active(client, group_name).await;
+    let initial_joined: Vec<_> = started
+        .iter()
+        .map(|(inst, _)| (inst.identity.id, MulticastGroupMemberState::Joined))
+        .collect();
+    wait_for_members_state(cptestctx, group_name, &initial_joined).await;
+
+    // Pick a "shared" sled (where A and B will live) and a "solo" sled
+    // (where C will live), based on A's current placement.
+    let shared_sled = nexus
+        .active_instance_info(instance_a_uuid, None)
+        .await
+        .unwrap()
+        .expect("instance A should be on a sled")
+        .sled_id;
+    let solo_sled = *available_sleds
+        .iter()
+        .find(|&&s| s != shared_sled)
+        .expect("two distinct sleds expected");
+
+    migrate_instance_to(cptestctx, *instance_b_uuid, shared_sled).await;
+    migrate_instance_to(cptestctx, *instance_c_uuid, solo_sled).await;
+
+    // After migration, the reconciler must observe each member's new
+    // sled_id before the rear-port snapshot. We explicitly
+    // poll until the DB row matches the post-migration placement for
+    // every member.
+    let expected_placement = [
+        (instance_a.identity.id, shared_sled),
+        (instance_b_uuid.into_untyped_uuid(), shared_sled),
+        (instance_c.identity.id, solo_sled),
+    ];
+    wait_for_member_sled_ids(cptestctx, group_name, &expected_placement).await;
 
-    // Verify Instance A is a member
     let members_before = list_multicast_group_members(client, group_name).await;
-    assert_eq!(members_before.len(), 1, "Instance A should be a member");
-    let instance_a_id = members_before[0].instance_id;
+    assert_eq!(
+        members_before.len(),
+        3,
+        "all three instances should be members"
+    );
 
-    // Create Instance B and join it to the same group
-    create_instance(client, project_name, "instance-b").await;
-    multicast_group_attach(cptestctx, project_name, "instance-b", group_name)
-        .await;
+    let group_view = get_multicast_group(client, group_name).await;
+    let multicast_ip = group_view.multicast_ip;
+    let underlay_admin_ip =
+        fetch_underlay_admin_ip(cptestctx, multicast_ip).await;
+
+    // Each switch independently programs the full set of rear ports for the
+    // group's underlay members. Read every switch's underlay group so a
+    // missing-on-one-switch fanout regression is caught.
+    let collect_rear_ports_by_switch = || {
+        let admin_ip = underlay_admin_ip.clone();
+        let dpd_clients = nexus_test_utils::dpd_clients_by_switch(cptestctx);
+        async move {
+            let mut by_switch: BTreeMap<_, BTreeSet<_>> = BTreeMap::new();
+            for (slot, dpd) in dpd_clients {
+                let resp = dpd
+                    .multicast_group_get_underlay(&admin_ip)
+                    .await
+                    .expect("underlay group should exist in DPD")
+                    .into_inner();
+                // Key on (port_id, link_id) so breakout-link members are
+                // distinguished and any link/direction drift would be
+                // visible as a set difference.
+                let ports: BTreeSet<_> = resp
+                    .members
+                    .into_iter()
+                    .filter(|m| {
+                        matches!(m.port_id, dpd_client::types::PortId::Rear(_))
+                            && m.direction
+                                == dpd_client::types::Direction::Underlay
+                    })
+                    .map(|m| (m.port_id, m.link_id))
+                    .collect();
+                by_switch.insert(slot, ports);
+            }
+            by_switch
+        }
+    };
 
-    // Verify both instances are now members
-    let members_with_b = list_multicast_group_members(client, group_name).await;
-    assert_eq!(members_with_b.len(), 2, "Both instances should be members");
+    // The DB sled_id update precedes DPD programming, and switches are
+    // updated independently per pass. Poll until every switch has both
+    // rear-port entries (shared sled + solo sled) before snapshotting.
+    let rear_ports_before = wait_for_condition(
+        || async {
+            let by_switch = collect_rear_ports_by_switch().await;
+            if by_switch.values().all(|ports| ports.len() == 2) {
+                Ok(by_switch)
+            } else {
+                Err(CondCheckError::<()>::NotYet)
+            }
+        },
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
+    )
+    .await
+    .unwrap_or_else(|e| {
+        panic!(
+            "underlay group did not converge to one rear-port entry per \
+             occupied sled (shared by A+B, plus C's own) on every switch: \
+             {e:?}"
+        )
+    });
 
-    // Delete Instance B, only removing B's membership, not A's
+    // Delete instance B. A still occupies the shared sled, so the
+    // shared rear port must remain; C's separate rear port must also
+    // remain.
     cleanup_instances(cptestctx, client, project_name, &["instance-b"]).await;
 
-    // Verify that Instance A's membership must still exist
-    let members_after_b_delete =
+    let members_after_b =
         list_multicast_group_members(client, group_name).await;
-
-    assert_eq!(
-        members_after_b_delete.len(),
-        1,
-        "Instance A's membership should survive Instance B's deletion"
+    assert_eq!(members_after_b.len(), 2, "A and C must survive B's deletion");
+    let remaining_instance_ids: BTreeSet<_> =
+        members_after_b.iter().map(|m| m.instance_id).collect();
+    assert!(
+        remaining_instance_ids.contains(&instance_a.identity.id),
+        "A's membership must remain"
     );
-    assert_eq!(
-        members_after_b_delete[0].instance_id, instance_a_id,
-        "The remaining member should be Instance A"
+    assert!(
+        remaining_instance_ids.contains(&instance_c.identity.id),
+        "C's membership must remain"
     );
 
-    // Verify the group is still active (not deleted due to last member leaving)
     let group = get_multicast_group(client, group_name).await;
-    assert_eq!(
-        group.state, "Active",
-        "Group should still be active since Instance A is still a member"
-    );
+    assert_eq!(group.state, "Active");
+
+    // Per-switch DPD updates lag the member-list change. Poll until
+    // every switch returns to its pre-delete state: A still on the
+    // shared sled, C on its own.
+    let rear_ports_after = wait_for_condition(
+        || async {
+            let by_switch = collect_rear_ports_by_switch().await;
+            if by_switch == rear_ports_before {
+                Ok(by_switch)
+            } else {
+                Err(CondCheckError::<()>::NotYet)
+            }
+        },
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
+    )
+    .await
+    .unwrap_or_else(|e| {
+        panic!(
+            "per-switch rear-port set diverged from pre-deletion snapshot \
+             (expected {rear_ports_before:?}): {e:?}"
+        )
+    });
+    assert_eq!(rear_ports_after, rear_ports_before);
 
-    // Cleanup: delete Instance A, which should trigger group deletion
-    cleanup_instances(cptestctx, client, project_name, &["instance-a"]).await;
+    assert_mrib_route_exists(cptestctx, multicast_ip).await;
+
+    // Cleanup: deleting A and C drops both rear ports and tears down
+    // the group.
+    cleanup_instances(
+        cptestctx,
+        client,
+        project_name,
+        &["instance-a", "instance-c"],
+    )
+    .await;
     wait_for_group_deleted(cptestctx, group_name).await;
+    wait_for_group_deleted_from_dpd(cptestctx, multicast_ip).await;
+    assert_mrib_route_absent(cptestctx, multicast_ip).await;
 }
 
 /// Test IPv6 multicast group lifecycle: create, start, stop, delete.
@@ -1785,16 +2006,14 @@ async fn test_multicast_ipv6_lifecycle(cptestctx: &ControlPlaneTestContext) {
     .expect("Start should succeed");
     instance_simulate(nexus, &instance_id).await;
     instance_wait_for_state(client, instance_id, InstanceState::Running).await;
-    wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
 
-    let member_joined = wait_for_member_state(
+    wait_for_member_state(
         cptestctx,
         group_name,
         instance.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Joined,
+        MulticastGroupMemberState::Joined,
     )
     .await;
-    assert_eq!(member_joined.state, "Joined");
 
     // Stop the instance - member should transition to "Left"
     let stop_url =
@@ -1811,16 +2030,14 @@ async fn test_multicast_ipv6_lifecycle(cptestctx: &ControlPlaneTestContext) {
 
     instance_simulate(nexus, &instance_id).await;
     instance_wait_for_state(client, instance_id, InstanceState::Stopped).await;
-    wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
 
-    let member_left = wait_for_member_state(
+    wait_for_member_state(
         cptestctx,
         group_name,
         instance.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Left,
+        MulticastGroupMemberState::Left,
     )
     .await;
-    assert_eq!(member_left.state, "Left");
 
     // Delete the instance - this should delete the group since it's the only member
     cleanup_instances(cptestctx, client, project_name, &["ipv6-instance"])
@@ -1884,7 +2101,7 @@ async fn test_group_with_all_members_left(cptestctx: &ControlPlaneTestContext) {
         cptestctx,
         group_name,
         instance1.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Joined,
+        MulticastGroupMemberState::Joined,
     )
     .await;
 
@@ -1915,7 +2132,7 @@ async fn test_group_with_all_members_left(cptestctx: &ControlPlaneTestContext) {
         cptestctx,
         group_name,
         instance2.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Joined,
+        MulticastGroupMemberState::Joined,
     )
     .await;
 
@@ -1937,21 +2154,14 @@ async fn test_group_with_all_members_left(cptestctx: &ControlPlaneTestContext) {
         instance_wait_for_state(client, id, InstanceState::Stopped).await;
     }
 
-    wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
     // Verify both members are "Left"
-    wait_for_member_state(
+    wait_for_members_state(
         cptestctx,
         group_name,
-        instance1.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Left,
-    )
-    .await;
-    wait_for_member_state(
-        cptestctx,
-        group_name,
-        instance2.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Left,
+        &[
+            (instance1.identity.id, MulticastGroupMemberState::Left),
+            (instance2.identity.id, MulticastGroupMemberState::Left),
+        ],
     )
     .await;
 
@@ -1977,13 +2187,12 @@ async fn test_group_with_all_members_left(cptestctx: &ControlPlaneTestContext) {
 
     instance_simulate(nexus, &id1).await;
     instance_wait_for_state(client, id1, InstanceState::Running).await;
-    wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
 
     wait_for_member_state(
         cptestctx,
         group_name,
         instance1.identity.id,
-        nexus_db_model::MulticastGroupMemberState::Joined,
+        MulticastGroupMemberState::Joined,
     )
     .await;
 
diff --git a/nexus/tests/integration_tests/multicast/mod.rs b/nexus/tests/integration_tests/multicast/mod.rs
index cc3c947008c..8daf3bb12de 100644
--- a/nexus/tests/integration_tests/multicast/mod.rs
+++ b/nexus/tests/integration_tests/multicast/mod.rs
@@ -16,8 +16,7 @@
 
 use std::future::Future;
 use std::net::IpAddr;
-use std::sync::{Arc, Mutex};
-use std::time::{Duration, Instant};
+use std::time::Duration;
 
 use dropshot::test_util::ClientTestContext;
 use http::{Method, StatusCode};
@@ -42,13 +41,16 @@ use nexus_types::external_api::multicast::{
     MulticastGroupJoinSpec, MulticastGroupMember,
 };
 use nexus_types::identity::{Asset, Resource};
+use nexus_types::internal_api::params::InstanceMigrateRequest;
 use omicron_common::api::external::{
     ByteCount, DataPageParams, Hostname, IdentityMetadataCreateParams,
     Instance, InstanceCpuCount, InstanceState,
 };
 use omicron_nexus::TestInterfaces;
 use omicron_test_utils::dev::poll::{self, CondCheckError, wait_for_condition};
-use omicron_uuid_kinds::{GenericUuid, InstanceUuid, MulticastGroupUuid};
+use omicron_uuid_kinds::{
+    GenericUuid, InstanceUuid, MulticastGroupUuid, SledUuid,
+};
 
 use crate::integration_tests::instances as instance_helpers;
 use sled_agent_client::TestInterfaces as SledAgentTestInterfaces;
@@ -59,7 +61,6 @@ pub(crate) type ControlPlaneTestContext =
 
 mod api;
 mod authorization;
-mod cache_invalidation;
 mod enablement;
 mod failures;
 mod groups;
@@ -69,6 +70,7 @@ mod pool_selection;
 
 // Timeout constants for test operations
 const POLL_INTERVAL: Duration = Duration::from_millis(50);
+const POLL_TIMEOUT: Duration = Duration::from_secs(30);
 const MULTICAST_OPERATION_TIMEOUT: Duration = Duration::from_secs(120);
 
 /// Generic helper for PUT upsert requests that return 201 Created.
@@ -211,6 +213,11 @@ pub(crate) async fn create_multicast_ip_pool_v6(
     pool
 }
 
+/// The reconciler can take longer than the default 10s timeout under
+/// parallel test load, especially after the CRDB graceful-shutdown
+/// change (eb8ae2f8f). 30s matches other heavy background task timeouts.
+const RECONCILER_ACTIVATION_TIMEOUT: Duration = Duration::from_secs(30);
+
 /// Waits for the multicast group reconciler to complete.
 ///
 /// This wraps wait_background_task with the correct task name.
@@ -231,35 +238,24 @@ pub(crate) async fn wait_for_multicast_reconciler(
 pub(crate) async fn activate_multicast_reconciler(
     lockstep_client: &ClientTestContext,
 ) -> nexus_lockstep_client::types::BackgroundTask {
-    nexus_test_utils::background::activate_background_task(
+    nexus_test_utils::background::activate_background_task_with_timeout(
         lockstep_client,
         "multicast_reconciler",
+        RECONCILER_ACTIVATION_TIMEOUT,
     )
     .await
 }
 
-/// Activates the inventory loader and waits for it to complete.
+/// Activate the multicast reconciler once, then poll `condition` until it
+/// holds (or `timeout` elapses).
 ///
-/// This ensures the watch channel has the latest inventory collection from the database.
-pub(crate) async fn activate_inventory_loader(
-    lockstep_client: &ClientTestContext,
-) -> nexus_lockstep_client::types::BackgroundTask {
-    nexus_test_utils::background::activate_background_task(
-        lockstep_client,
-        "inventory_loader",
-    )
-    .await
-}
-
-/// Wait for a condition to be true, activating the reconciler periodically.
-///
-/// This is like `wait_for_condition` but activates the multicast reconciler
-/// periodically (not on every poll) to drive state changes. We activate the
-/// reconciler every 500ms.
-///
-/// Useful for tests that need to wait for reconciler-driven state changes
-/// (e.g., member state transitions).
-pub(crate) async fn wait_for_condition_with_reconciler<F, Fut, T, E>(
+/// For tests that expect convergence in a single reconciler pass. We
+/// poll after the activation to absorb read-after-write visibility lag
+/// (DB commits, sled-agent state propagation), not to wait for further
+/// reconciler iterations. If `condition` only holds after multiple
+/// passes, the test author must orchestrate explicitly: activate per
+/// step and assert intermediate state between steps.
+pub(crate) async fn activate_then_wait_for_condition<F, Fut, T, E>(
     lockstep_client: &ClientTestContext,
     condition: F,
     poll_interval: &Duration,
@@ -269,37 +265,8 @@ where
     F: Fn() -> Fut,
     Fut: Future<Output = Result<T, CondCheckError<E>>>,
 {
-    // Activate reconciler less frequently than we check the condition
-    // This reduces overhead while still driving state changes forward
-    const RECONCILER_ACTIVATION_INTERVAL: Duration = Duration::from_millis(500);
-
-    let last_reconciler_activation = Arc::new(Mutex::new(Instant::now()));
-
-    // First, wait for any already-activated reconciler run to complete.
-    // This tests explicit activation paths (saga completions, etc.).
-    wait_for_multicast_reconciler(lockstep_client).await;
-
-    wait_for_condition(
-        || async {
-            // Only activate reconciler if enough time has passed
-            let now = Instant::now();
-            let should_activate = {
-                let last = last_reconciler_activation.lock().unwrap();
-                now.duration_since(*last) >= RECONCILER_ACTIVATION_INTERVAL
-            };
-
-            if should_activate {
-                // Use activate to drive progress
-                activate_multicast_reconciler(lockstep_client).await;
-                *last_reconciler_activation.lock().unwrap() = now;
-            }
-
-            condition().await
-        },
-        poll_interval,
-        timeout,
-    )
-    .await
+    activate_multicast_reconciler(lockstep_client).await;
+    wait_for_condition(condition, poll_interval, timeout).await
 }
 
 /// Ensure inventory collection has completed with SP data for all sleds.
@@ -307,8 +274,8 @@ where
 /// This function verifies that inventory has SP data for EVERY in-service sled,
 /// not just that inventory completed.
 ///
-/// This is required for multicast member operations which map `sled_id` → `sp_slot`
-/// → switch ports via inventory.
+/// This is required for multicast member operations which map `sled_id` to
+/// `sp_slot` to switch ports via inventory.
 pub(crate) async fn ensure_inventory_ready(
     cptestctx: &ControlPlaneTestContext,
 ) {
@@ -358,9 +325,8 @@ pub(crate) async fn ensure_inventory_ready(
             let mut missing_sleds = Vec::new();
             for sled in &sleds {
                 let has_sp = inventory.sps.iter().any(|(bb, _)| {
-                    (bb.serial_number == sled.serial_number()
-                        && bb.part_number == sled.part_number())
-                        || bb.serial_number == sled.serial_number()
+                    bb.serial_number == sled.serial_number()
+                        && bb.part_number == sled.part_number()
                 });
 
                 if !has_sp {
@@ -385,8 +351,8 @@ pub(crate) async fn ensure_inventory_ready(
                 Err(CondCheckError::<String>::NotYet)
             }
         },
-        &Duration::from_millis(500), // Check every 500ms
-        &Duration::from_secs(120),   // Wait up to 120s
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
     )
     .await
     {
@@ -448,8 +414,8 @@ pub(crate) async fn ensure_dpd_ready(cptestctx: &ControlPlaneTestContext) {
                 }
             }
         },
-        &Duration::from_millis(200), // Check every 200ms
-        &Duration::from_secs(30),    // Wait up to 30 seconds for switches
+        &POLL_INTERVAL,
+        &POLL_TIMEOUT,
     )
     .await
     {
@@ -552,11 +518,20 @@ pub(crate) async fn wait_for_group_active(
     .await
 }
 
-/// Wait for a specific member to reach the expected state
-/// (e.g., Joined, Joining, Left).
+/// Wait for a multicast group member to reach the expected state.
+///
+/// Ensures inventory and DPD are ready, drives one reconciler activation,
+/// then asserts the member is observable in `expected_state`. If the state
+/// does not match after the pass, fails loudly rather than retrying via
+/// reactivation.
+///
+/// We poll briefly after the pass to absorb DB read-after-write lag,
+/// not to wait for further reconciler iterations.
 ///
-/// For "Joined" state, this function uses `wait_for_condition_with_reconciler`
-/// to ensure the reconciler processes member state transitions.
+/// Tests that genuinely need multi-step convergence (e.g., recovery from
+/// an injected external failure) must orchestrate explicitly: drive each
+/// step with `activate_multicast_reconciler` and assert the intermediate
+/// state between steps.
 pub(crate) async fn wait_for_member_state(
     cptestctx: &ControlPlaneTestContext,
     group_name: &str,
@@ -567,92 +542,138 @@ pub(crate) async fn wait_for_member_state(
     let lockstep_client = &cptestctx.lockstep_client;
     let expected_state_as_str = expected_state.to_string();
 
-    // For "Joined" state, ensure instance has a sled_id assigned
-    // (no need to check inventory again since ensure_inventory_ready() already
-    // verified all sleds have SP data at test setup)
+    // "Joined" requires the dataplane: the reconciler resolves
+    // sled→port and programs DPD before that transition. Pre-populate
+    // DDM peers and wait for DPD readiness before polling for it.
+    //
+    // "Joining" and "Left" converge from DB-only transitions, so
+    // don't gate those as failure-mode tests rely on being able to wait
+    // on them with working DPD stopped.
     if expected_state == nexus_db_model::MulticastGroupMemberState::Joined {
+        nexus_test_utils::multicast::populate_ddm_peers(cptestctx).await;
+        ensure_dpd_ready(cptestctx).await;
         let instance_uuid = InstanceUuid::from_untyped_uuid(instance_id);
         wait_for_instance_sled_assignment(cptestctx, &instance_uuid).await;
     }
 
+    // Drive one converging pass. This explicit activate guarantees a fresh
+    // pass runs after this point regardless of whether the API call that
+    // triggered the test already activated the reconciler.
+    activate_multicast_reconciler(lockstep_client).await;
+
+    // Verify the post-pass state. Treat read-after-write visibility lag as
+    // `NotYet`, but treat any *other* observed state as a permanent failure.
     let check_member = || async {
         let members = list_multicast_group_members(client, group_name).await;
-
-        // If we're looking for "Joined" state, we need to ensure the member exists first
-        // and then wait for the reconciler to process it
-        if expected_state == nexus_db_model::MulticastGroupMemberState::Joined {
-            if let Some(member) =
-                members.iter().find(|m| m.instance_id == instance_id)
-            {
-                match member.state.as_str() {
-                    "Joined" => Ok(member.clone()),
-                    "Joining" => {
-                        // Member exists and is in transition - wait a bit more
-                        Err(CondCheckError::NotYet)
-                    }
-                    "Left" => {
-                        // Member in Left state, reconciler needs to process instance start - wait more
-                        Err(CondCheckError::NotYet)
-                    }
-                    other_state => Err(CondCheckError::Failed(format!(
-                        "Member {instance_id} in group {group_name} has unexpected state '{other_state}', expected 'Left', 'Joining' or 'Joined'"
-                    ))),
-                }
-            } else {
-                // Member doesn't exist yet - wait for it to be created
-                Err(CondCheckError::NotYet)
-            }
-        } else {
-            // For other states, just look for exact match
-            if let Some(member) =
-                members.iter().find(|m| m.instance_id == instance_id)
-            {
-                if member.state == expected_state_as_str {
-                    Ok(member.clone())
-                } else {
-                    Err(CondCheckError::NotYet)
-                }
-            } else {
-                Err(CondCheckError::NotYet)
+        match members.iter().find(|m| m.instance_id == instance_id) {
+            Some(member) if member.state == expected_state_as_str => {
+                Ok(member.clone())
             }
+            Some(member) => Err(CondCheckError::Failed(format!(
+                "member {instance_id} in group {group_name} reached state \
+                 '{}' after one reconciler pass, expected '{expected_state_as_str}'",
+                member.state
+            ))),
+            None => Err(CondCheckError::NotYet),
         }
     };
 
-    // Use reconciler-activating wait for "Joined" state
-    let res = if expected_state
-        == nexus_db_model::MulticastGroupMemberState::Joined
+    match wait_for_condition(
+        check_member,
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
+    )
+    .await
     {
-        wait_for_condition_with_reconciler(
-            lockstep_client,
-            check_member,
-            &POLL_INTERVAL,
-            &MULTICAST_OPERATION_TIMEOUT,
-        )
-        .await
-    } else {
-        wait_for_condition(
-            check_member,
-            &POLL_INTERVAL,
-            &MULTICAST_OPERATION_TIMEOUT,
-        )
-        .await
-    };
-
-    match res {
         Ok(member) => member,
         Err(poll::Error::TimedOut(elapsed)) => {
             panic!(
-                "member {instance_id} in group {group_name} did not reach state '{expected_state_as_str}' within {elapsed:?}",
+                "member {instance_id} in group {group_name} did not appear within {elapsed:?}",
             );
         }
         Err(poll::Error::PermanentError(err)) => {
             panic!(
-                "failed waiting for member {instance_id} in group {group_name} to reach state '{expected_state_as_str}': {err:?}",
+                "reconciler did not converge member {instance_id} in group \
+                 {group_name} to '{expected_state_as_str}': {err}",
             );
         }
     }
 }
 
+/// Wait for a batch of multicast group members to reach their respective
+/// expected states after a single reconciler pass.
+///
+/// Like [`wait_for_member_state`] but checks multiple members after
+/// one shared reconciler pass. Panics if any member ends up in an
+/// unexpected state.
+pub(crate) async fn wait_for_members_state(
+    cptestctx: &ControlPlaneTestContext,
+    group_name: &str,
+    expected: &[(Uuid, nexus_db_model::MulticastGroupMemberState)],
+) -> Vec<MulticastGroupMember> {
+    let client = &cptestctx.external_client;
+    let lockstep_client = &cptestctx.lockstep_client;
+
+    let joined_instances: Vec<InstanceUuid> = expected
+        .iter()
+        .filter(|(_, state)| {
+            *state == nexus_db_model::MulticastGroupMemberState::Joined
+        })
+        .map(|(id, _)| InstanceUuid::from_untyped_uuid(*id))
+        .collect();
+
+    if !joined_instances.is_empty() {
+        nexus_test_utils::multicast::populate_ddm_peers(cptestctx).await;
+        ensure_dpd_ready(cptestctx).await;
+        for instance_uuid in &joined_instances {
+            wait_for_instance_sled_assignment(cptestctx, instance_uuid).await;
+        }
+    }
+
+    activate_multicast_reconciler(lockstep_client).await;
+
+    let check = || async {
+        let members = list_multicast_group_members(client, group_name).await;
+        let mut resolved = Vec::with_capacity(expected.len());
+        for (instance_id, expected_state) in expected {
+            let expected_str = expected_state.to_string();
+            match members.iter().find(|m| m.instance_id == *instance_id) {
+                Some(member) if member.state == expected_str => {
+                    resolved.push(member.clone());
+                }
+                Some(member) => {
+                    return Err(CondCheckError::Failed(format!(
+                        "member {instance_id} in group {group_name} reached \
+                         state '{}' after one reconciler pass, expected \
+                         '{expected_str}'",
+                        member.state
+                    )));
+                }
+                None => return Err(CondCheckError::NotYet),
+            }
+        }
+        Ok(resolved)
+    };
+
+    match wait_for_condition(
+        check,
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
+    )
+    .await
+    {
+        Ok(members) => members,
+        Err(poll::Error::TimedOut(elapsed)) => panic!(
+            "members in group {group_name} did not all appear within \
+             {elapsed:?} (expected {expected:?})",
+        ),
+        Err(poll::Error::PermanentError(err)) => panic!(
+            "reconciler did not converge members in group {group_name} \
+             (expected {expected:?}): {err}",
+        ),
+    }
+}
+
 /// Wait for an instance to have a sled_id assigned.
 ///
 /// This is a stricter check than `instance_wait_for_vmm_registration` - it ensures
@@ -1055,7 +1076,12 @@ pub(crate) async fn wait_for_member_count(
     }
 }
 
-/// Wait for a multicast group to be deleted (returns 404).
+/// Wait for a multicast group to be fully deleted (returns 404).
+///
+/// Drives one reconciler activation, which runs `process_deleting_group_inner`
+/// end-to-end (M2P/forwarding clear, DPD removal, underlay delete, member
+/// delete, group row delete) for groups in "Deleting". Polling around the
+/// API check is only for read-after-write visibility.
 pub(crate) async fn wait_for_group_deleted(
     cptestctx: &ControlPlaneTestContext,
     group_name: &str,
@@ -1063,25 +1089,25 @@ pub(crate) async fn wait_for_group_deleted(
     let client = &cptestctx.external_client;
     let lockstep_client = &cptestctx.lockstep_client;
 
-    match wait_for_condition_with_reconciler(
-        lockstep_client,
-        || async {
-            let group_url = mcast_group_url(group_name);
-            match NexusRequest::object_get(client, &group_url)
-                .authn_as(AuthnMode::PrivilegedUser)
-                .execute()
-                .await
-            {
-                Ok(response) => {
-                    if response.status == StatusCode::NOT_FOUND {
-                        Ok(())
-                    } else {
-                        Err(CondCheckError::<()>::NotYet)
-                    }
-                }
-                Err(_) => Ok(()), // Assume 404 or similar error means deleted
-            }
-        },
+    activate_multicast_reconciler(lockstep_client).await;
+
+    let check = || async {
+        let group_url = mcast_group_url(group_name);
+        let response = NexusRequest::new(
+            RequestBuilder::new(client, Method::GET, &group_url)
+                .expect_status(Some(StatusCode::NOT_FOUND)),
+        )
+        .authn_as(AuthnMode::PrivilegedUser)
+        .execute()
+        .await;
+        match response {
+            Ok(_) => Ok(()),
+            Err(_) => Err(CondCheckError::<()>::NotYet),
+        }
+    };
+
+    match wait_for_condition(
+        check,
         &POLL_INTERVAL,
         &MULTICAST_OPERATION_TIMEOUT,
     )
@@ -1089,7 +1115,10 @@ pub(crate) async fn wait_for_group_deleted(
     {
         Ok(_) => {}
         Err(poll::Error::TimedOut(elapsed)) => {
-            panic!("group {group_name} was not deleted within {elapsed:?}",);
+            panic!(
+                "group {group_name} was not deleted within {elapsed:?} after \
+                 one reconciler pass",
+            );
         }
         Err(poll::Error::PermanentError(err)) => {
             panic!(
@@ -1099,10 +1128,11 @@ pub(crate) async fn wait_for_group_deleted(
     }
 }
 
-/// Wait for a multicast group to be deleted from DPD (dataplane) with reconciler activation.
+/// Wait for a multicast group to be removed from DPD (dataplane).
 ///
-/// This function waits for the DPD to report that the multicast group no longer exists
-/// (returns 404), while periodically activating the reconciler to drive the cleanup process.
+/// Drives one reconciler activation, which runs `process_deleting_group_inner`
+/// (DPD `remove_groups` by tag) for groups in "Deleting". Polling around the
+/// DPD GET is only for read-after-write visibility.
 pub(crate) async fn wait_for_group_deleted_from_dpd(
     cptestctx: &ControlPlaneTestContext,
     multicast_ip: std::net::IpAddr,
@@ -1110,17 +1140,17 @@ pub(crate) async fn wait_for_group_deleted_from_dpd(
     let lockstep_client = &cptestctx.lockstep_client;
     let dpd_client = nexus_test_utils::dpd_client(cptestctx);
 
-    match wait_for_condition_with_reconciler(
-        lockstep_client,
-        || async {
-            match dpd_client.multicast_group_get(&multicast_ip).await {
-                Ok(_) => {
-                    // Group still exists in DPD - not yet deleted
-                    Err(CondCheckError::<()>::NotYet)
-                }
-                Err(_) => Ok(()), // Group doesn't exist - deleted
-            }
-        },
+    activate_multicast_reconciler(lockstep_client).await;
+
+    let check = || async {
+        match dpd_client.multicast_group_get(&multicast_ip).await {
+            Ok(_) => Err(CondCheckError::<()>::NotYet),
+            Err(_) => Ok(()),
+        }
+    };
+
+    match wait_for_condition(
+        check,
         &POLL_INTERVAL,
         &MULTICAST_OPERATION_TIMEOUT,
     )
@@ -1129,7 +1159,8 @@ pub(crate) async fn wait_for_group_deleted_from_dpd(
         Ok(_) => {}
         Err(poll::Error::TimedOut(elapsed)) => {
             panic!(
-                "group with IP {multicast_ip} was not deleted from DPD within {elapsed:?}",
+                "group with IP {multicast_ip} was not deleted from DPD within \
+                 {elapsed:?} after one reconciler pass",
             );
         }
         Err(poll::Error::PermanentError(err)) => {
@@ -1347,6 +1378,183 @@ pub(crate) async fn cleanup_instances(
     ops::join_all(delete_futures).await;
 }
 
+/// Wait until each listed member's stored `sled_id` matches the expected
+/// post-migration sled.
+///
+/// [`wait_for_member_state`] for "Joined" is satisfied as soon as the
+/// member is in "Joined", which can happen with the *pre-migration*
+/// `sled_id` still recorded if the reconciler has not yet re-observed
+/// the new active VMM.
+/// Tests that snapshot dataplane state immediately after migration must
+/// wait until the DB row reflects the new placement.
+///
+/// Drives one reconciler activation. The members reconciler detects the
+/// `member.sled_id != live_vmm.sled_id` skew and runs `handle_sled_migration`
+/// inline (`members.rs:704-713`), so the row is settled by the time this
+/// returns. Polling around the read is only for read-after-write visibility.
+pub(crate) async fn wait_for_member_sled_ids(
+    cptestctx: &ControlPlaneTestContext,
+    group_name: &str,
+    expected: &[(Uuid, SledUuid)],
+) {
+    let lockstep_client = &cptestctx.lockstep_client;
+    let nexus = &cptestctx.server.server_context().nexus;
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.clone(), datastore.clone());
+
+    let group_id = {
+        let group =
+            get_multicast_group(&cptestctx.external_client, group_name).await;
+        group.identity.id
+    };
+
+    activate_multicast_reconciler(lockstep_client).await;
+
+    let check = || async {
+        let members = datastore
+            .multicast_group_members_list(
+                &opctx,
+                MulticastGroupUuid::from_untyped_uuid(group_id),
+                &DataPageParams::max_page(),
+            )
+            .await
+            .map_err(|e| {
+                CondCheckError::Failed(format!("list members failed: {e}"))
+            })?;
+
+        for (instance_id, expected_sled) in expected {
+            let member = members
+                .iter()
+                .find(|m| m.parent_id == *instance_id)
+                .ok_or(CondCheckError::NotYet)?;
+            let sled_id = member.sled_id.ok_or(CondCheckError::NotYet)?;
+            if sled_id.into_untyped_uuid() != expected_sled.into_untyped_uuid()
+            {
+                return Err(CondCheckError::Failed(format!(
+                    "member for instance {instance_id} reached sled_id \
+                     {sled_id:?} after one reconciler pass, expected \
+                     {expected_sled:?}"
+                )));
+            }
+        }
+        Ok::<_, CondCheckError<String>>(())
+    };
+
+    wait_for_condition(check, &POLL_INTERVAL, &MULTICAST_OPERATION_TIMEOUT)
+        .await
+        .unwrap_or_else(|e| {
+            panic!(
+                "members in group {group_name} did not reach expected sled \
+                 assignments {expected:?}: {e:?}"
+            )
+        });
+}
+
+/// Migrate an instance to a specific target sled.
+///
+/// No-op if the instance is already on `target_sled`. Otherwise drives
+/// the standard request-then-simulate-source-then-simulate-target sequence
+/// used by other integration tests, returning when the instance has
+/// reached `Running` on the target.
+pub(crate) async fn migrate_instance_to(
+    cptestctx: &ControlPlaneTestContext,
+    instance_id: InstanceUuid,
+    target_sled: SledUuid,
+) {
+    let client = &cptestctx.external_client;
+    let lockstep_client = &cptestctx.lockstep_client;
+    let nexus = &cptestctx.server.server_context().nexus;
+
+    let info = nexus
+        .active_instance_info(&instance_id, None)
+        .await
+        .unwrap()
+        .expect("instance should be on a sled");
+    if info.sled_id == target_sled {
+        return;
+    }
+    let source_sled = info.sled_id;
+
+    let migrate_url = format!("/instances/{instance_id}/migrate");
+    NexusRequest::new(
+        RequestBuilder::new(lockstep_client, Method::POST, &migrate_url)
+            .body(Some(&InstanceMigrateRequest { dst_sled_id: target_sled }))
+            .expect_status(Some(StatusCode::OK)),
+    )
+    .authn_as(AuthnMode::PrivilegedUser)
+    .execute()
+    .await
+    .expect("should initiate migration");
+
+    let info =
+        nexus.active_instance_info(&instance_id, None).await.unwrap().unwrap();
+    let src_propolis = info.propolis_id;
+    let dst_propolis = info.dst_propolis_id.unwrap();
+
+    instance_helpers::vmm_simulate_on_sled(
+        cptestctx,
+        nexus,
+        source_sled,
+        src_propolis,
+    )
+    .await;
+    instance_helpers::instance_wait_for_state(
+        client,
+        instance_id,
+        InstanceState::Migrating,
+    )
+    .await;
+
+    instance_helpers::vmm_simulate_on_sled(
+        cptestctx,
+        nexus,
+        target_sled,
+        dst_propolis,
+    )
+    .await;
+    instance_helpers::instance_wait_for_state(
+        client,
+        instance_id,
+        InstanceState::Running,
+    )
+    .await;
+}
+
+/// Resolve the underlay admin-local IPv6 address for a multicast group
+/// given its external multicast IP.
+pub(crate) async fn fetch_underlay_admin_ip(
+    cptestctx: &ControlPlaneTestContext,
+    external_multicast_ip: IpAddr,
+) -> dpd_client::types::UnderlayMulticastIpv6 {
+    let nexus = &cptestctx.server.server_context().nexus;
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.clone(), datastore.clone());
+
+    let external_group = datastore
+        .multicast_group_lookup_by_ip(&opctx, external_multicast_ip)
+        .await
+        .expect("should look up external multicast group by IP");
+    let underlay_group_id = external_group
+        .underlay_group_id
+        .expect("external group should have underlay_group_id");
+    let underlay_group = datastore
+        .underlay_multicast_group_fetch(&opctx, underlay_group_id)
+        .await
+        .expect("should fetch underlay multicast group");
+
+    match underlay_group.multicast_ip.ip() {
+        IpAddr::V6(v6) => {
+            dpd_client::types::UnderlayMulticastIpv6::try_from(v6)
+                .expect("underlay IP should be admin-local IPv6")
+        }
+        IpAddr::V4(other) => {
+            panic!("expected IPv6 underlay address, got {other}")
+        }
+    }
+}
+
 /// Stop multiple instances, poking the simulated sled-agent while waiting.
 pub(crate) async fn stop_instances(
     cptestctx: &ControlPlaneTestContext,
@@ -1533,3 +1741,121 @@ pub(crate) mod ops {
         tokio::join!(op1, op2, op3, op4)
     }
 }
+
+/// Assert that *every* mgd in the fixture has an MRIB route for `group_ip`.
+///
+/// Iterates every switch zone present in `cptestctx.mgd`, so multi-switch
+/// fixtures (`extra_sled_agents > 0`) catch a route that is programmed only
+/// on a subset of switches.
+pub(crate) async fn assert_mrib_route_exists(
+    cptestctx: &nexus_test_utils::ControlPlaneTestContext<
+        omicron_nexus::Server,
+    >,
+    group_ip: IpAddr,
+) {
+    for_each_mgd(cptestctx, |slot, mgd_client| async move {
+        wait_for_condition::<_, (), _, _>(
+            || async {
+                let routes = mgd_client
+                    .static_list_mcast_routes()
+                    .await
+                    .unwrap()
+                    .into_inner();
+                if routes
+                    .iter()
+                    .any(|r| mrib_route_matches_group(&r.key, group_ip))
+                {
+                    Ok(())
+                } else {
+                    Err(CondCheckError::NotYet)
+                }
+            },
+            &POLL_INTERVAL,
+            &MULTICAST_OPERATION_TIMEOUT,
+        )
+        .await
+        .unwrap_or_else(|e| {
+            panic!("mgd on {slot:?} never had a route for {group_ip}: {e:?}")
+        });
+    })
+    .await;
+}
+
+/// Assert that *no* mgd in the fixture has an MRIB route for `group_ip`.
+pub(crate) async fn assert_mrib_route_absent(
+    cptestctx: &nexus_test_utils::ControlPlaneTestContext<
+        omicron_nexus::Server,
+    >,
+    group_ip: IpAddr,
+) {
+    for_each_mgd(cptestctx, |slot, mgd_client| async move {
+        wait_for_condition::<_, (), _, _>(
+            || async {
+                let routes = mgd_client
+                    .static_list_mcast_routes()
+                    .await
+                    .unwrap()
+                    .into_inner();
+                if routes
+                    .iter()
+                    .any(|r| mrib_route_matches_group(&r.key, group_ip))
+                {
+                    Err(CondCheckError::NotYet)
+                } else {
+                    Ok(())
+                }
+            },
+            &POLL_INTERVAL,
+            &MULTICAST_OPERATION_TIMEOUT,
+        )
+        .await
+        .unwrap_or_else(|e| {
+            panic!("mgd on {slot:?} still had a route for {group_ip}: {e:?}")
+        });
+    })
+    .await;
+}
+
+/// Run `f` against every mgd client in the fixture, in `SwitchSlot` order.
+async fn for_each_mgd<F, Fut>(
+    cptestctx: &nexus_test_utils::ControlPlaneTestContext<
+        omicron_nexus::Server,
+    >,
+    f: F,
+) where
+    F: Fn(
+        sled_agent_types::early_networking::SwitchSlot,
+        mg_admin_client::Client,
+    ) -> Fut,
+    Fut: Future<Output = ()>,
+{
+    assert!(
+        !cptestctx.mgd.is_empty(),
+        "multicast MRIB assertions require at least one mgd in the test \
+         fixture",
+    );
+    let switches: std::collections::BTreeMap<_, _> =
+        cptestctx.mgd.iter().collect();
+    for (slot, mgd) in switches {
+        let mgd_client = mg_admin_client::Client::new(
+            &format!("http://[::1]:{}", mgd.port),
+            cptestctx.logctx.log.clone(),
+        );
+        f(*slot, mgd_client).await;
+    }
+}
+
+fn mrib_route_matches_group(
+    key: &mg_admin_client::types::MulticastRouteKey,
+    group_ip: IpAddr,
+) -> bool {
+    match (key, group_ip) {
+        (mg_admin_client::types::MulticastRouteKey::V4(k), IpAddr::V4(ip)) => {
+            k.group == ip
+        }
+        (mg_admin_client::types::MulticastRouteKey::V6(k), IpAddr::V6(ip)) => {
+            k.group == ip
+        }
+        _ => false,
+    }
+}
diff --git a/nexus/tests/integration_tests/multicast/networking_integration.rs b/nexus/tests/integration_tests/multicast/networking_integration.rs
index 3b28892ef82..218897f56b6 100644
--- a/nexus/tests/integration_tests/multicast/networking_integration.rs
+++ b/nexus/tests/integration_tests/multicast/networking_integration.rs
@@ -8,10 +8,14 @@
 //!
 //! - External IPs: Instances with ephemeral/floating IPs can join multicast groups
 //! - Floating IP attach/detach: Multicast membership unaffected by IP changes
+//! - Sled-agent M2P/forwarding propagation on member join and group deletion
+//! - Per-VMM multicast subscriptions via sled-agent
 
-use std::time::Duration;
+use std::net::IpAddr;
 
 use http::{Method, StatusCode};
+use nexus_db_lookup::LookupPath;
+use nexus_db_queries::context::OpContext;
 use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder};
 use nexus_test_utils::resource_helpers::create_floating_ip;
 use nexus_test_utils::resource_helpers::{
@@ -30,6 +34,7 @@ use omicron_common::api::external::{
     ByteCount, IdentityMetadataCreateParams, Instance, InstanceCpuCount,
     NameOrId,
 };
+use omicron_nexus::TestInterfaces;
 use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition};
 use omicron_uuid_kinds::{GenericUuid, InstanceUuid};
 
@@ -109,8 +114,6 @@ async fn test_multicast_external_ip_scenarios(
         instance_wait_for_running_with_simulation(cptestctx, instance_uuid)
             .await;
 
-        wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
         // Add instance to multicast group via instance-centric API
         multicast_group_attach(
             cptestctx,
@@ -181,9 +184,6 @@ async fn test_multicast_external_ip_scenarios(
         );
         object_delete(client, &external_ip_detach_url).await;
 
-        // Wait for operations to settle
-        wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
         // Verify multicast membership is still intact after external IP removal
         let members_after_detach =
             list_multicast_group_members(client, group_name).await;
@@ -255,8 +255,6 @@ async fn test_multicast_external_ip_scenarios(
         instance_wait_for_running_with_simulation(cptestctx, instance_uuid)
             .await;
 
-        wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
         // Add instance to multicast group via instance-centric API
         multicast_group_attach(
             cptestctx,
@@ -302,9 +300,6 @@ async fn test_multicast_external_ip_scenarios(
             .await
             .unwrap();
 
-            // Wait for dataplane configuration to settle
-            wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
             // Verify multicast state is preserved
             let members_with_ip =
                 list_multicast_group_members(client, group_name).await;
@@ -336,9 +331,6 @@ async fn test_multicast_external_ip_scenarios(
             );
             object_delete(client, &external_ip_detach_url).await;
 
-            // Wait for operations to settle
-            wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
             // Verify multicast state is still preserved
             let members_without_ip =
                 list_multicast_group_members(client, group_name).await;
@@ -418,8 +410,6 @@ async fn test_multicast_external_ip_scenarios(
         instance_wait_for_running_with_simulation(cptestctx, instance_uuid)
             .await;
 
-        wait_for_multicast_reconciler(&cptestctx.lockstep_client).await;
-
         // Verify external IP was allocated at creation
         let external_ips_after_start =
             fetch_instance_external_ips(client, instance_name, project_name)
@@ -537,7 +527,6 @@ async fn test_multicast_with_floating_ip_basic(
     let instance_id = instance.identity.id;
 
     let instance_uuid = InstanceUuid::from_untyped_uuid(instance_id);
-    wait_for_instance_sled_assignment(cptestctx, &instance_uuid).await;
     instance_wait_for_running_with_simulation(cptestctx, instance_uuid).await;
 
     // Ensure multicast test prerequisites (inventory + DPD) are ready
@@ -546,24 +535,7 @@ async fn test_multicast_with_floating_ip_basic(
     // Add instance to multicast group via instance-centric API
     multicast_group_attach(cptestctx, project_name, instance_name, group_name)
         .await;
-    // Group activation is reconciler-driven; explicitly drive it to avoid flakes.
-    wait_for_condition_with_reconciler(
-        &cptestctx.lockstep_client,
-        || async {
-            let group = get_multicast_group(client, group_name).await;
-            if group.state == "Active" {
-                Ok(())
-            } else {
-                Err(CondCheckError::<String>::NotYet)
-            }
-        },
-        &POLL_INTERVAL,
-        &MULTICAST_OPERATION_TIMEOUT,
-    )
-    .await
-    .unwrap_or_else(|e| {
-        panic!("group {group_name} did not reach Active state in time: {e:?}")
-    });
+    wait_for_group_active(client, group_name).await;
 
     // Wait for multicast member to reach "Joined" state
     wait_for_member_state(
@@ -637,13 +609,13 @@ async fn test_multicast_with_floating_ip_basic(
                 Err(CondCheckError::<String>::NotYet)
             }
         },
-        &Duration::from_millis(200),
-        &Duration::from_secs(30),
+        &POLL_INTERVAL,
+        &POLL_TIMEOUT,
     )
     .await
     .unwrap_or_else(|e| {
         panic!(
-            "instance did not show floating IP {} as attached within 30s: {e:?}",
+            "instance did not show floating IP {} as attached within {POLL_TIMEOUT:?}: {e:?}",
             floating_ip.ip
         )
     });
@@ -694,13 +666,13 @@ async fn test_multicast_with_floating_ip_basic(
                 Err(CondCheckError::<String>::NotYet)
             }
         },
-        &Duration::from_millis(200),
-        &Duration::from_secs(30),
+        &POLL_INTERVAL,
+        &POLL_TIMEOUT,
     )
     .await
     .unwrap_or_else(|e| {
         panic!(
-            "instance still showed floating IP {} as attached after 30s: {e:?}",
+            "instance still showed floating IP {} as attached after {POLL_TIMEOUT:?}: {e:?}",
             floating_ip.ip
         )
     });
@@ -713,3 +685,995 @@ async fn test_multicast_with_floating_ip_basic(
     cleanup_instances(cptestctx, client, project_name, &[instance_name]).await;
     wait_for_group_deleted(cptestctx, group_name).await;
 }
+
+/// Verify that when an instance joins a multicast group, the reconciler
+/// pushes M2P mappings, forwarding entries, and per-VMM subscriptions
+/// to the sim sled-agent. Also verify cleanup on instance deletion.
+#[nexus_test]
+async fn test_multicast_sled_agent_m2p_and_subscriptions(
+    cptestctx: &nexus_test_utils::ControlPlaneTestContext<
+        omicron_nexus::Server,
+    >,
+) {
+    let client = &cptestctx.external_client;
+    let project_name = "sled-agent-mcast-project";
+    let group_name = "sled-agent-mcast-group";
+    let instance_name = "sled-agent-mcast-instance";
+
+    ops::join3(
+        create_project(client, project_name),
+        create_default_ip_pools(client),
+        create_multicast_ip_pool_with_range(
+            client,
+            "sled-agent-mcast-pool",
+            (224, 150, 0, 1),
+            (224, 150, 0, 255),
+        ),
+    )
+    .await;
+
+    ensure_multicast_test_ready(cptestctx).await;
+
+    // Create and start an instance.
+    let instance_params = InstanceCreate {
+        identity: IdentityMetadataCreateParams {
+            name: instance_name.parse().unwrap(),
+            description: "Instance for sled-agent multicast test".to_string(),
+        },
+        ncpus: InstanceCpuCount::try_from(1).unwrap(),
+        memory: ByteCount::from_gibibytes_u32(1),
+        hostname: instance_name.parse().unwrap(),
+        user_data: vec![],
+        ssh_public_keys: None,
+        network_interfaces: InstanceNetworkInterfaceAttachment::DefaultIpv4,
+        external_ips: vec![],
+        multicast_groups: vec![],
+        disks: vec![],
+        boot_disk: None,
+        cpu_platform: None,
+        start: true,
+        auto_restart_policy: Default::default(),
+        anti_affinity_groups: Vec::new(),
+    };
+
+    let instance_url = format!("/v1/instances?project={project_name}");
+    let instance: Instance =
+        object_create(client, &instance_url, &instance_params).await;
+    let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id);
+
+    instance_wait_for_running_with_simulation(cptestctx, instance_id).await;
+    // Attach instance to a multicast group.
+    multicast_group_attach(cptestctx, project_name, instance_name, group_name)
+        .await;
+    wait_for_group_active(client, group_name).await;
+
+    // "Joined" convergence drives the DDM primary path via the
+    // `populate_ddm_peers` precondition baked into `wait_for_member_state`.
+    wait_for_member_state(
+        cptestctx,
+        group_name,
+        instance.identity.id,
+        nexus_db_model::MulticastGroupMemberState::Joined,
+    )
+    .await;
+
+    // Look up the underlay multicast IPv6 address for verification.
+    let nexus = &cptestctx.server.server_context().nexus;
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.clone(), datastore.clone());
+
+    let group_view = get_multicast_group(client, group_name).await;
+    let multicast_ip = group_view.multicast_ip;
+
+    let external_group = datastore
+        .multicast_group_lookup_by_ip(&opctx, multicast_ip)
+        .await
+        .expect("Should look up multicast group by IP");
+
+    let underlay_group_id = external_group
+        .underlay_group_id
+        .expect("Active group should have underlay_group_id");
+
+    let underlay_group = datastore
+        .underlay_multicast_group_fetch(&opctx, underlay_group_id)
+        .await
+        .expect("Should fetch underlay multicast group");
+
+    let underlay_ipv6 = match underlay_group.multicast_ip.ip() {
+        IpAddr::V6(v6) => v6,
+        other => panic!("Expected IPv6 underlay address, got {other}"),
+    };
+
+    // Verify MRIB route was programmed on mgd.
+    assert_mrib_route_exists(cptestctx, multicast_ip).await;
+
+    // Verify M2P mapping on the sim sled-agent.
+    let sled_agent = cptestctx.first_sled_agent();
+    {
+        let m2p = sled_agent.m2p_mappings.lock().unwrap();
+        assert!(
+            m2p.contains(&(multicast_ip, underlay_ipv6)),
+            "Sled-agent should have M2P mapping ({multicast_ip}, \
+             {underlay_ipv6}), got: {m2p:?}"
+        );
+    }
+
+    // Verify forwarding entries on the sim sled-agent.
+    // The forwarding entry points at a switch for replication.
+    {
+        let fwd = sled_agent.mcast_fwd.lock().unwrap();
+        assert!(
+            fwd.contains_key(&underlay_ipv6),
+            "Sled-agent should have forwarding entry for {underlay_ipv6}, \
+             got: {fwd:?}"
+        );
+        let next_hops = &fwd[&underlay_ipv6];
+        assert_eq!(
+            next_hops.len(),
+            1,
+            "Should have 1 next_hop (a switch), got: {next_hops:?}"
+        );
+    }
+
+    // Verify per-VMM multicast subscription on the sim sled-agent.
+    {
+        let groups = sled_agent.instance_multicast_groups.lock().unwrap();
+        let instance_groups = groups
+            .get(&instance_id)
+            .expect("Sled-agent should have multicast groups for instance");
+
+        assert!(
+            instance_groups.iter().any(|m| m.group_ip == multicast_ip),
+            "Instance should be subscribed to multicast group \
+             {multicast_ip}, got: {instance_groups:?}"
+        );
+    }
+
+    // Stop the instance. The member transitions "Joined" -> "Left".
+    let stop_url =
+        format!("/v1/instances/{instance_name}/stop?project={project_name}");
+    NexusRequest::new(
+        RequestBuilder::new(client, Method::POST, &stop_url)
+            .body(None as Option<&serde_json::Value>)
+            .expect_status(Some(StatusCode::ACCEPTED)),
+    )
+    .authn_as(AuthnMode::PrivilegedUser)
+    .execute()
+    .await
+    .expect("Should stop instance");
+
+    wait_for_instance_stopped(cptestctx, client, instance_id, instance_name)
+        .await;
+
+    wait_for_member_state(
+        cptestctx,
+        group_name,
+        instance.identity.id,
+        nexus_db_model::MulticastGroupMemberState::Left,
+    )
+    .await;
+
+    // Per-VMM subscription cleanup after stop is not asserted here.
+    // In production, destroying the VMM tears down the OPTE port, which
+    // implicitly removes multicast subscriptions. The reconciler's
+    // unsubscribe path correctly skips when the propolis_id is gone
+    // (matching production semantics where the port no longer exists).
+    //
+    // V2P follows the same pattern: sled-agent cleanup is keyed by
+    // network identity, not VMM identity.
+
+    // M2P and forwarding should be cleared since there are no "Joined"
+    // members remaining.
+    activate_then_wait_for_condition(
+        &cptestctx.lockstep_client,
+        || async {
+            let m2p = sled_agent.m2p_mappings.lock().unwrap();
+            if !m2p.contains(&(multicast_ip, underlay_ipv6)) {
+                Ok(())
+            } else {
+                Err(CondCheckError::<()>::NotYet)
+            }
+        },
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
+    )
+    .await
+    .expect("M2P should be cleared when no Joined members remain");
+
+    // Forwarding should also be cleared when no "Joined" members remain.
+    activate_then_wait_for_condition(
+        &cptestctx.lockstep_client,
+        || async {
+            let fwd = sled_agent.mcast_fwd.lock().unwrap();
+            if !fwd.contains_key(&underlay_ipv6) {
+                Ok(())
+            } else {
+                Err(CondCheckError::<()>::NotYet)
+            }
+        },
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
+    )
+    .await
+    .expect("Forwarding should be cleared when no Joined members remain");
+
+    // Delete the instance, which should trigger group deletion.
+    cleanup_instances(cptestctx, client, project_name, &[instance_name]).await;
+    wait_for_group_deleted(cptestctx, group_name).await;
+
+    // Verify M2P and forwarding are cleared.
+    {
+        let m2p = sled_agent.m2p_mappings.lock().unwrap();
+        assert!(
+            !m2p.contains(&(multicast_ip, underlay_ipv6)),
+            "M2P mapping should be cleared after group deletion, got: {m2p:?}"
+        );
+    }
+    {
+        let fwd = sled_agent.mcast_fwd.lock().unwrap();
+        assert!(
+            !fwd.contains_key(&underlay_ipv6),
+            "Forwarding entry should be cleared after group deletion, \
+             got: {fwd:?}"
+        );
+    }
+
+    // Verify MRIB route was withdrawn after group deletion.
+    assert_mrib_route_absent(cptestctx, multicast_ip).await;
+}
+
+/// Verify M2P and forwarding entries propagate to all sleds, not just the
+/// hosting sled. Analogous to `test_instance_v2p_mappings` which verifies
+/// V2P mappings on all sleds.
+///
+/// Also verifies cleanup: after instance deletion, M2P and forwarding
+/// entries are removed from every sled.
+#[nexus_test(extra_sled_agents = 1)]
+async fn test_multicast_multi_sled_m2p_propagation(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let client = &cptestctx.external_client;
+    let nexus = &cptestctx.server.server_context().nexus;
+    let project_name = "multi-sled-mcast-project";
+    let group_name = "multi-sled-mcast-group";
+    let instance_name = "multi-sled-mcast-instance";
+
+    ops::join3(
+        create_project(client, project_name),
+        create_default_ip_pools(client),
+        create_multicast_ip_pool_with_range(
+            client,
+            "multi-sled-mcast-pool",
+            (224, 160, 0, 1),
+            (224, 160, 0, 255),
+        ),
+    )
+    .await;
+
+    ensure_multicast_test_ready(cptestctx).await;
+
+    // Collect all sled agents (2 total: 1 default + 1 extra).
+    // We use extra_sled_agents = 1 (not 2) because the gateway sim only
+    // provides SP data for the two well-known sled UUIDs. A 3rd sled with
+    // a random UUID would have no SP entry, causing inventory readiness
+    // to time out. Two sleds is sufficient to verify cross-sled propagation.
+    let all_sled_agents: Vec<_> =
+        cptestctx.sled_agents.iter().map(|sa| sa.sled_agent()).collect();
+    assert_eq!(all_sled_agents.len(), 2, "expected 2 sled agents");
+
+    // Create and start an instance.
+    let instance = instance_for_multicast_groups(
+        cptestctx,
+        project_name,
+        instance_name,
+        true,
+        &[],
+    )
+    .await;
+    let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id);
+
+    instance_wait_for_running_with_simulation(cptestctx, instance_id).await;
+    // Attach to a multicast group.
+    multicast_group_attach(cptestctx, project_name, instance_name, group_name)
+        .await;
+    wait_for_group_active(client, group_name).await;
+
+    wait_for_member_state(
+        cptestctx,
+        group_name,
+        instance.identity.id,
+        nexus_db_model::MulticastGroupMemberState::Joined,
+    )
+    .await;
+
+    // Look up the underlay IPv6 address for verification.
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.clone(), datastore.clone());
+
+    let group_view = get_multicast_group(client, group_name).await;
+    let multicast_ip = group_view.multicast_ip;
+
+    let external_group = datastore
+        .multicast_group_lookup_by_ip(&opctx, multicast_ip)
+        .await
+        .expect("Should look up multicast group by IP");
+
+    let underlay_group_id = external_group
+        .underlay_group_id
+        .expect("Active group should have underlay_group_id");
+
+    let underlay_group = datastore
+        .underlay_multicast_group_fetch(&opctx, underlay_group_id)
+        .await
+        .expect("Should fetch underlay multicast group");
+
+    let underlay_ipv6 = match underlay_group.multicast_ip.ip() {
+        IpAddr::V6(v6) => v6,
+        other => panic!("Expected IPv6 underlay address, got {other}"),
+    };
+
+    // Look up the hosting sled for subscription verification.
+    let info = nexus
+        .active_instance_info(&instance_id, None)
+        .await
+        .unwrap()
+        .expect("Running instance should have active info");
+
+    let hosting_sled_id = info.sled_id;
+
+    // Verify MRIB route was programmed.
+    assert_mrib_route_exists(cptestctx, multicast_ip).await;
+
+    // M2P and forwarding are pushed to all sleds (like V2P). Any
+    // instance on any sled may send to a multicast group; without the
+    // M2P mapping OPTE's overlay layer silently drops the packet.
+    // Forwarding entries let sender sleds replicate to member sleds.
+    for (i, sled_agent) in cptestctx.sled_agents.iter().enumerate() {
+        let agent = sled_agent.sled_agent();
+
+        // Wait for M2P on every sled. The reconciler may need an
+        // additional pass after the member reaches "Joined": during
+        // reconcile_member_states, propagate_m2p_and_forwarding may
+        // see member_sleds=0 (member still "Joining" in DB), so the
+        // actual push happens in reconcile_active_groups or the next
+        // full pass.
+        activate_then_wait_for_condition(
+            &cptestctx.lockstep_client,
+            || async {
+                let m2p = agent.m2p_mappings.lock().unwrap();
+                if m2p.contains(&(multicast_ip, underlay_ipv6)) {
+                    Ok(())
+                } else {
+                    Err(CondCheckError::NotYet::<()>)
+                }
+            },
+            &POLL_INTERVAL,
+            &MULTICAST_OPERATION_TIMEOUT,
+        )
+        .await
+        .unwrap_or_else(|e| {
+            panic!("Sled {i} should have M2P mapping within timeout: {e:?}")
+        });
+
+        // Verify forwarding on every sled. With a single member on
+        // one sled, the hosting sled's forwarding has no next hops
+        // (local delivery via subscription). Non-hosting sleds list
+        // the hosting sled as a next hop so senders can reach it.
+        activate_then_wait_for_condition(
+            &cptestctx.lockstep_client,
+            || async {
+                let fwd = agent.mcast_fwd.lock().unwrap();
+                if fwd.contains_key(&underlay_ipv6) {
+                    Ok(())
+                } else {
+                    Err(CondCheckError::NotYet::<()>)
+                }
+            },
+            &POLL_INTERVAL,
+            &MULTICAST_OPERATION_TIMEOUT,
+        )
+        .await
+        .unwrap_or_else(|e| {
+            panic!(
+                "Sled {i} should have forwarding entry within timeout: {e:?}"
+            )
+        });
+
+        let fwd = agent.mcast_fwd.lock().unwrap();
+        let next_hops = &fwd[&underlay_ipv6];
+        // Every sled gets a single next hop pointing at a switch.
+        // The switch replicates to member sled ports via DPD config.
+        assert_eq!(
+            next_hops.len(),
+            1,
+            "Sled {i} should have 1 next_hop (a switch), \
+             got: {next_hops:?}"
+        );
+    }
+
+    // Verify per-VMM subscription on the hosting sled only.
+    // Subscriptions are member-sled-only (not all sleds).
+    let hosting_agent = cptestctx
+        .sled_agents
+        .iter()
+        .find(|sa| sa.sled_agent_id() == hosting_sled_id)
+        .unwrap()
+        .sled_agent();
+
+    activate_then_wait_for_condition(
+        &cptestctx.lockstep_client,
+        || async {
+            let groups =
+                hosting_agent.instance_multicast_groups.lock().unwrap();
+            match groups.get(&instance_id) {
+                Some(instance_groups)
+                    if instance_groups
+                        .iter()
+                        .any(|m| m.group_ip == multicast_ip) =>
+                {
+                    Ok(())
+                }
+                _ => Err(CondCheckError::NotYet::<()>),
+            }
+        },
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
+    )
+    .await
+    .unwrap_or_else(|e| {
+        panic!(
+            "VMM should be subscribed to {multicast_ip} within timeout: {e:?}"
+        )
+    });
+
+    // Delete the instance, which triggers group deletion.
+    cleanup_instances(cptestctx, client, project_name, &[instance_name]).await;
+    wait_for_group_deleted(cptestctx, group_name).await;
+
+    // Verify MRIB route removed after group deletion.
+    assert_mrib_route_absent(cptestctx, multicast_ip).await;
+
+    // Verify cleanup on every sled: M2P and forwarding removed.
+    for (i, sled_agent) in all_sled_agents.iter().enumerate() {
+        activate_then_wait_for_condition(
+            &cptestctx.lockstep_client,
+            || async {
+                let m2p = sled_agent.m2p_mappings.lock().unwrap();
+                let fwd = sled_agent.mcast_fwd.lock().unwrap();
+                if !m2p.contains(&(multicast_ip, underlay_ipv6))
+                    && !fwd.contains_key(&underlay_ipv6)
+                {
+                    Ok(())
+                } else {
+                    Err(CondCheckError::NotYet::<()>)
+                }
+            },
+            &POLL_INTERVAL,
+            &MULTICAST_OPERATION_TIMEOUT,
+        )
+        .await
+        .unwrap_or_else(|e| {
+            panic!(
+                "Sled {i} M2P/forwarding not cleaned up within timeout: {e:?}"
+            )
+        });
+    }
+}
+
+/// Verify cross-sled forwarding when members exist on both sleds.
+///
+/// With one member on sled A and another on sled B, each sled's forwarding
+/// entry should list the other sled as its sole next hop (self-exclusion).
+/// This exercises the `.filter(|(id, _)| *id != sled_id)` logic in
+/// `converge_forwarding`.
+#[nexus_test(extra_sled_agents = 1)]
+async fn test_multicast_cross_sled_forwarding(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let client = &cptestctx.external_client;
+    let nexus = &cptestctx.server.server_context().nexus;
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.clone(), datastore.clone());
+    let project_name = "bidir-fwd-project";
+    let group_name = "bidir-fwd-group";
+    let instance_a_name = "bidir-instance-a";
+    let instance_b_name = "bidir-instance-b";
+
+    ops::join3(
+        create_project(client, project_name),
+        create_default_ip_pools(client),
+        create_multicast_ip_pool_with_range(
+            client,
+            "bidir-fwd-pool",
+            (224, 170, 0, 1),
+            (224, 170, 0, 255),
+        ),
+    )
+    .await;
+
+    ensure_multicast_test_ready(cptestctx).await;
+
+    let sled_a_id = cptestctx.sled_agents[0].sled_agent_id();
+    let sled_b_id = cptestctx.sled_agents[1].sled_agent_id();
+
+    // Pin instance A to sled A by making sled B non-provisionable.
+    {
+        let (authz_sled, ..) = LookupPath::new(&opctx, datastore)
+            .sled_id(sled_b_id)
+            .lookup_for(nexus_auth::authz::Action::Modify)
+            .await
+            .expect("lookup sled B");
+        datastore
+            .sled_set_provision_policy(
+                &opctx,
+                &authz_sled,
+                nexus_types::external_api::sled::SledProvisionPolicy::NonProvisionable,
+            )
+            .await
+            .expect("set sled B non-provisionable");
+    }
+
+    let instance_a = instance_for_multicast_groups(
+        cptestctx,
+        project_name,
+        instance_a_name,
+        true,
+        &[],
+    )
+    .await;
+    let instance_a_id = InstanceUuid::from_untyped_uuid(instance_a.identity.id);
+    instance_wait_for_running_with_simulation(cptestctx, instance_a_id).await;
+
+    // Verify instance A landed on sled A.
+    let info_a = nexus
+        .active_instance_info(&instance_a_id, None)
+        .await
+        .unwrap()
+        .expect("instance A should be running");
+    assert_eq!(info_a.sled_id, sled_a_id, "instance A should be on sled A");
+
+    // Swap provisionability: sled A non-provisionable, sled B provisionable.
+    {
+        let (authz_sled_a, ..) = LookupPath::new(&opctx, datastore)
+            .sled_id(sled_a_id)
+            .lookup_for(nexus_auth::authz::Action::Modify)
+            .await
+            .expect("lookup sled A");
+        let (authz_sled_b, ..) = LookupPath::new(&opctx, datastore)
+            .sled_id(sled_b_id)
+            .lookup_for(nexus_auth::authz::Action::Modify)
+            .await
+            .expect("lookup sled B");
+        datastore
+            .sled_set_provision_policy(
+                &opctx,
+                &authz_sled_a,
+                nexus_types::external_api::sled::SledProvisionPolicy::NonProvisionable,
+            )
+            .await
+            .expect("set sled A non-provisionable");
+        datastore
+            .sled_set_provision_policy(
+                &opctx,
+                &authz_sled_b,
+                nexus_types::external_api::sled::SledProvisionPolicy::Provisionable,
+            )
+            .await
+            .expect("set sled B provisionable");
+    }
+
+    let instance_b = instance_for_multicast_groups(
+        cptestctx,
+        project_name,
+        instance_b_name,
+        true,
+        &[],
+    )
+    .await;
+
+    let instance_b_id = InstanceUuid::from_untyped_uuid(instance_b.identity.id);
+    instance_wait_for_running_with_simulation(cptestctx, instance_b_id).await;
+
+    // Verify instance B landed on sled B.
+    let info_b = nexus
+        .active_instance_info(&instance_b_id, None)
+        .await
+        .unwrap()
+        .expect("instance B should be running");
+
+    assert_eq!(info_b.sled_id, sled_b_id, "instance B should be on sled B");
+
+    // Both instances join the same multicast group.
+    multicast_group_attach(
+        cptestctx,
+        project_name,
+        instance_a_name,
+        group_name,
+    )
+    .await;
+
+    multicast_group_attach(
+        cptestctx,
+        project_name,
+        instance_b_name,
+        group_name,
+    )
+    .await;
+
+    wait_for_group_active(client, group_name).await;
+
+    // Wait for both members to reach "Joined".
+    for instance in [&instance_a, &instance_b] {
+        wait_for_member_state(
+            cptestctx,
+            group_name,
+            instance.identity.id,
+            nexus_db_model::MulticastGroupMemberState::Joined,
+        )
+        .await;
+    }
+
+    // Resolve underlay IPv6 for forwarding assertions.
+    let group_view = get_multicast_group(client, group_name).await;
+    let external_group = datastore
+        .multicast_group_lookup_by_ip(&opctx, group_view.multicast_ip)
+        .await
+        .expect("lookup group by IP");
+
+    let underlay_group = datastore
+        .underlay_multicast_group_fetch(
+            &opctx,
+            external_group
+                .underlay_group_id
+                .expect("active group should have underlay_group_id"),
+        )
+        .await
+        .expect("fetch underlay group");
+
+    let underlay_ipv6 = match underlay_group.multicast_ip.ip() {
+        IpAddr::V6(v6) => v6,
+        other => panic!("Expected IPv6 underlay address, got {other}"),
+    };
+
+    // Verify MRIB route was programmed for the group.
+    assert_mrib_route_exists(cptestctx, group_view.multicast_ip).await;
+
+    // Wait for forwarding entries on both sleds, then verify each sled's
+    // forwarding lists exactly the other sled (not itself).
+    let agent_a = cptestctx.sled_agents[0].sled_agent();
+    let agent_b = cptestctx.sled_agents[1].sled_agent();
+
+    for (label, agent) in [("sled A", &agent_a), ("sled B", &agent_b)] {
+        activate_then_wait_for_condition(
+            &cptestctx.lockstep_client,
+            || async {
+                let fwd = agent.mcast_fwd.lock().unwrap();
+                match fwd.get(&underlay_ipv6) {
+                    Some(hops) if hops.len() == 1 => Ok(()),
+                    _ => Err(CondCheckError::NotYet::<()>),
+                }
+            },
+            &POLL_INTERVAL,
+            &MULTICAST_OPERATION_TIMEOUT,
+        )
+        .await
+        .unwrap_or_else(|e| {
+            panic!("{label} should have exactly 1 forwarding next_hop: {e:?}")
+        });
+    }
+
+    // Cleanup.
+    cleanup_instances(
+        cptestctx,
+        client,
+        project_name,
+        &[instance_a_name, instance_b_name],
+    )
+    .await;
+    wait_for_group_deleted(cptestctx, group_name).await;
+
+    // Verify MRIB route removed after group deletion.
+    assert_mrib_route_absent(cptestctx, group_view.multicast_ip).await;
+}
+
+/// Verify multicast state is re-established after simulated cold start.
+/// Analogous to `test_instance_start_creates_networking_state` which tests
+/// V2P re-establishment after forcibly clearing sled-agent state.
+///
+/// Steps: a) create instance, b) join multicast, c) stop instance,
+/// d) forcibly clear all sim sled-agent multicast state, e) restart
+/// instance, f) verify M2P, forwarding, and per-VMM subscriptions are
+/// re-established.
+#[nexus_test(extra_sled_agents = 1)]
+async fn test_multicast_cold_start_reestablishment(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let client = &cptestctx.external_client;
+    let nexus = &cptestctx.server.server_context().nexus;
+    let project_name = "cold-start-mcast-project";
+    let group_name = "cold-start-mcast-group";
+    let instance_name = "cold-start-mcast-instance";
+
+    ops::join3(
+        create_project(client, project_name),
+        create_default_ip_pools(client),
+        create_multicast_ip_pool_with_range(
+            client,
+            "cold-start-mcast-pool",
+            (224, 170, 0, 1),
+            (224, 170, 0, 255),
+        ),
+    )
+    .await;
+
+    ensure_multicast_test_ready(cptestctx).await;
+
+    let all_sled_agents: Vec<_> =
+        cptestctx.sled_agents.iter().map(|sa| sa.sled_agent()).collect();
+
+    // Create and start an instance, join a multicast group.
+    let instance = instance_for_multicast_groups(
+        cptestctx,
+        project_name,
+        instance_name,
+        true,
+        &[],
+    )
+    .await;
+    let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id);
+
+    instance_wait_for_running_with_simulation(cptestctx, instance_id).await;
+    multicast_group_attach(cptestctx, project_name, instance_name, group_name)
+        .await;
+    wait_for_group_active(client, group_name).await;
+
+    wait_for_member_state(
+        cptestctx,
+        group_name,
+        instance.identity.id,
+        nexus_db_model::MulticastGroupMemberState::Joined,
+    )
+    .await;
+
+    // Look up the underlay IPv6.
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.clone(), datastore.clone());
+
+    let group_view = get_multicast_group(client, group_name).await;
+    let multicast_ip = group_view.multicast_ip;
+
+    let external_group = datastore
+        .multicast_group_lookup_by_ip(&opctx, multicast_ip)
+        .await
+        .expect("Should look up multicast group by IP");
+
+    let underlay_group_id = external_group
+        .underlay_group_id
+        .expect("Active group should have underlay_group_id");
+
+    let underlay_group = datastore
+        .underlay_multicast_group_fetch(&opctx, underlay_group_id)
+        .await
+        .expect("Should fetch underlay multicast group");
+
+    let underlay_ipv6 = match underlay_group.multicast_ip.ip() {
+        IpAddr::V6(v6) => v6,
+        other => panic!("Expected IPv6 underlay address, got {other}"),
+    };
+
+    // Verify MRIB route was programmed.
+    assert_mrib_route_exists(cptestctx, multicast_ip).await;
+
+    // M2P and forwarding are pushed to all sleds. Verify at least the
+    // hosting sled has M2P before we clear state.
+    let pre_info = nexus
+        .active_instance_info(&instance_id, None)
+        .await
+        .unwrap()
+        .expect("Running instance should have active info");
+
+    let pre_hosting_agent = cptestctx
+        .sled_agents
+        .iter()
+        .find(|sa| sa.sled_agent_id() == pre_info.sled_id)
+        .unwrap()
+        .sled_agent();
+
+    activate_then_wait_for_condition(
+        &cptestctx.lockstep_client,
+        || async {
+            let m2p = pre_hosting_agent.m2p_mappings.lock().unwrap();
+            if m2p.contains(&(multicast_ip, underlay_ipv6)) {
+                Ok(())
+            } else {
+                Err(CondCheckError::NotYet::<()>)
+            }
+        },
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
+    )
+    .await
+    .expect("Hosting sled M2P should exist before cold start simulation");
+
+    // Stop the instance.
+    let stop_url =
+        format!("/v1/instances/{instance_name}/stop?project={project_name}");
+    NexusRequest::new(
+        RequestBuilder::new(client, Method::POST, &stop_url)
+            .body(None as Option<&serde_json::Value>)
+            .expect_status(Some(StatusCode::ACCEPTED)),
+    )
+    .authn_as(AuthnMode::PrivilegedUser)
+    .execute()
+    .await
+    .expect("Should stop instance");
+
+    wait_for_instance_stopped(cptestctx, client, instance_id, instance_name)
+        .await;
+
+    wait_for_member_state(
+        cptestctx,
+        group_name,
+        instance.identity.id,
+        nexus_db_model::MulticastGroupMemberState::Left,
+    )
+    .await;
+
+    // Forcibly clear all sim sled-agent multicast state, simulating a cold
+    // start where sled-agents lose in-memory state.
+    for sled_agent in &all_sled_agents {
+        sled_agent.m2p_mappings.lock().unwrap().clear();
+        sled_agent.mcast_fwd.lock().unwrap().clear();
+        sled_agent.instance_multicast_groups.lock().unwrap().clear();
+    }
+
+    // Restart the instance.
+    let start_url =
+        format!("/v1/instances/{instance_name}/start?project={project_name}");
+    NexusRequest::new(
+        RequestBuilder::new(client, Method::POST, &start_url)
+            .body(None as Option<&serde_json::Value>)
+            .expect_status(Some(StatusCode::ACCEPTED)),
+    )
+    .authn_as(AuthnMode::PrivilegedUser)
+    .execute()
+    .await
+    .expect("Should start instance");
+
+    // Use `try_instance_simulate` here instead of `instance_wait_for_running_with_simulation`
+    // because the old VMM may still be draining from the sim collection after
+    // the stop. `instance_simulate` would panic if it pokes a VMM that was just
+    // removed; `try_instance_simulate` handles that gracefully.
+    wait_for_condition(
+        || async {
+            let _ =
+                instance_helpers::try_instance_simulate(nexus, &instance_id)
+                    .await;
+
+            let url = format!("/v1/instances/{instance_id}");
+            let instance: Instance = NexusRequest::object_get(client, &url)
+                .authn_as(AuthnMode::PrivilegedUser)
+                .execute()
+                .await
+                .map_err(|_| CondCheckError::<()>::NotYet)?
+                .parsed_body()
+                .map_err(|_| CondCheckError::<()>::NotYet)?;
+
+            if instance.runtime.run_state == InstanceState::Running {
+                Ok(())
+            } else {
+                Err(CondCheckError::<()>::NotYet)
+            }
+        },
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
+    )
+    .await
+    .expect("Instance should reach Running after restart");
+
+    // Wait for the reconciler to re-establish multicast state.
+    wait_for_member_state(
+        cptestctx,
+        group_name,
+        instance.identity.id,
+        nexus_db_model::MulticastGroupMemberState::Joined,
+    )
+    .await;
+
+    // Verify MRIB route re-established after cold start.
+    assert_mrib_route_exists(cptestctx, multicast_ip).await;
+
+    // Verify M2P and forwarding re-established on all sleds.
+    for (i, sled_agent) in all_sled_agents.iter().enumerate() {
+        activate_then_wait_for_condition(
+            &cptestctx.lockstep_client,
+            || async {
+                let m2p = sled_agent.m2p_mappings.lock().unwrap();
+                if m2p.contains(&(multicast_ip, underlay_ipv6)) {
+                    Ok(())
+                } else {
+                    Err(CondCheckError::NotYet::<()>)
+                }
+            },
+            &POLL_INTERVAL,
+            &MULTICAST_OPERATION_TIMEOUT,
+        )
+        .await
+        .unwrap_or_else(|e| {
+            panic!("Sled {i} M2P not re-established within timeout: {e:?}")
+        });
+
+        activate_then_wait_for_condition(
+            &cptestctx.lockstep_client,
+            || async {
+                let fwd = sled_agent.mcast_fwd.lock().unwrap();
+                if fwd.contains_key(&underlay_ipv6) {
+                    Ok(())
+                } else {
+                    Err(CondCheckError::NotYet::<()>)
+                }
+            },
+            &POLL_INTERVAL,
+            &MULTICAST_OPERATION_TIMEOUT,
+        )
+        .await
+        .unwrap_or_else(|e| {
+            panic!(
+                "Sled {i} forwarding not re-established within timeout: {e:?}"
+            )
+        });
+    }
+
+    // Verify per-VMM subscription on the hosting sled (new propolis_id
+    // since restart creates a new VMM).
+    let post_info = nexus
+        .active_instance_info(&instance_id, None)
+        .await
+        .unwrap()
+        .expect("Restarted instance should have active info");
+
+    let post_hosting_agent = cptestctx
+        .sled_agents
+        .iter()
+        .find(|sa| sa.sled_agent_id() == post_info.sled_id)
+        .unwrap()
+        .sled_agent();
+
+    activate_then_wait_for_condition(
+        &cptestctx.lockstep_client,
+        || async {
+            let groups =
+                post_hosting_agent.instance_multicast_groups.lock().unwrap();
+            match groups.get(&instance_id) {
+                Some(instance_groups)
+                    if instance_groups
+                        .iter()
+                        .any(|m| m.group_ip == multicast_ip) =>
+                {
+                    Ok(())
+                }
+                _ => Err(CondCheckError::NotYet::<()>),
+            }
+        },
+        &POLL_INTERVAL,
+        &MULTICAST_OPERATION_TIMEOUT,
+    )
+    .await
+    .unwrap_or_else(|e| {
+        panic!(
+            "Instance should be subscribed to {multicast_ip} after restart: \
+             {e:?}"
+        )
+    });
+
+    // Cleanup.
+    cleanup_instances(cptestctx, client, project_name, &[instance_name]).await;
+    wait_for_group_deleted(cptestctx, group_name).await;
+
+    // Verify MRIB route removed after group deletion.
+    assert_mrib_route_absent(cptestctx, multicast_ip).await;
+}
diff --git a/nexus/types/src/deployment/execution/dns.rs b/nexus/types/src/deployment/execution/dns.rs
index 009377fd8d9..3730576eda2 100644
--- a/nexus/types/src/deployment/execution/dns.rs
+++ b/nexus/types/src/deployment/execution/dns.rs
@@ -158,6 +158,7 @@ pub fn blueprint_internal_dns_config(
             overrides.dendrite_port(scrimlet.id()),
             overrides.mgs_port(scrimlet.id()),
             overrides.mgd_port(scrimlet.id()),
+            overrides.ddm_port(scrimlet.id()),
         )?;
     }
 
diff --git a/nexus/types/src/deployment/execution/overridables.rs b/nexus/types/src/deployment/execution/overridables.rs
index 881a7c49bdd..7dc3ae0bf4d 100644
--- a/nexus/types/src/deployment/execution/overridables.rs
+++ b/nexus/types/src/deployment/execution/overridables.rs
@@ -2,6 +2,7 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
+use omicron_common::address::DDMD_PORT;
 use omicron_common::address::DENDRITE_PORT;
 use omicron_common::address::Ipv6Subnet;
 use omicron_common::address::MGD_PORT;
@@ -29,6 +30,8 @@ pub struct Overridables {
     pub mgs_ports: BTreeMap<SledUuid, u16>,
     /// map: sled id -> TCP port on which that sled's MGD is listening
     pub mgd_ports: BTreeMap<SledUuid, u16>,
+    /// map: sled id -> TCP port on which that sled's DDM is listening
+    pub ddm_ports: BTreeMap<SledUuid, u16>,
     /// map: sled id -> IP address of the sled's switch zone
     pub switch_zone_ips: BTreeMap<SledUuid, Ipv6Addr>,
 }
@@ -67,6 +70,16 @@ impl Overridables {
         self.mgd_ports.get(&sled_id).copied().unwrap_or(MGD_PORT)
     }
 
+    /// Specify the TCP port on which this sled's DDM is listening
+    pub fn override_ddm_port(&mut self, sled_id: SledUuid, port: u16) {
+        self.ddm_ports.insert(sled_id, port);
+    }
+
+    /// Returns the TCP port on which this sled's DDM is listening
+    pub fn ddm_port(&self, sled_id: SledUuid) -> u16 {
+        self.ddm_ports.get(&sled_id).copied().unwrap_or(DDMD_PORT)
+    }
+
     /// Specify the IP address of this switch zone
     pub fn override_switch_zone_ip(
         &mut self,
diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs
index c1089e85e20..43653d0878f 100644
--- a/nexus/types/src/internal_api/background.rs
+++ b/nexus/types/src/internal_api/background.rs
@@ -167,6 +167,14 @@ pub struct MulticastGroupReconcilerStatus {
     pub members_deleted: usize,
     /// Number of empty groups marked for deletion (implicit deletion).
     pub empty_groups_marked: usize,
+    /// Reconciliation steps skipped this pass because their downstream
+    /// client was unavailable. Distinguishes "no work needed" (counters
+    /// at 0, `skipped` empty) from "work was deferred" (counters at 0,
+    /// step name in `skipped`).
+    pub skipped: Vec<String>,
+    /// Number of sleds whose DDM port mapping disagreed with inventory.
+    /// DDM wins (live state); a non-zero count surfaces inventory lag.
+    pub ddm_inventory_drift: usize,
     /// Errors that occurred during reconciliation operations.
     pub errors: Vec<String>,
 }
diff --git a/openapi/sled-agent/sled-agent-35.0.0-93533c.json.gitstub b/openapi/sled-agent/sled-agent-35.0.0-93533c.json.gitstub
new file mode 100644
index 00000000000..00b46848648
--- /dev/null
+++ b/openapi/sled-agent/sled-agent-35.0.0-93533c.json.gitstub
@@ -0,0 +1 @@
+5579a6d72e5f6be577d2b17ba940ccc0de10decd:openapi/sled-agent/sled-agent-35.0.0-93533c.json
diff --git a/openapi/sled-agent/sled-agent-35.0.0-93533c.json b/openapi/sled-agent/sled-agent-36.0.0-7b7885.json
similarity index 96%
rename from openapi/sled-agent/sled-agent-35.0.0-93533c.json
rename to openapi/sled-agent/sled-agent-36.0.0-7b7885.json
index b03e32d920e..0a2d8b93339 100644
--- a/openapi/sled-agent/sled-agent-35.0.0-93533c.json
+++ b/openapi/sled-agent/sled-agent-36.0.0-7b7885.json
@@ -7,7 +7,7 @@
       "url": "https://oxide.computer",
       "email": "api@oxide.computer"
     },
-    "version": "35.0.0"
+    "version": "36.0.0"
   },
   "paths": {
     "/artifacts": {
@@ -287,6 +287,76 @@
         }
       }
     },
+    "/instances/{instance_id}/multicast-group": {
+      "put": {
+        "operationId": "instance_join_multicast_group",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "instance_id",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/InstanceUuid"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/InstanceMulticastMembership"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "204": {
+            "description": "resource updated"
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      },
+      "delete": {
+        "operationId": "instance_leave_multicast_group",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "instance_id",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/InstanceUuid"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/InstanceMulticastMembership"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "204": {
+            "description": "resource updated"
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      }
+    },
     "/inventory": {
       "get": {
         "summary": "Fetch basic information about this sled",
@@ -389,6 +459,162 @@
         }
       }
     },
+    "/networking/mcast-fwd": {
+      "get": {
+        "summary": "List multicast forwarding entries present on this sled.",
+        "operationId": "list_mcast_fwd",
+        "responses": {
+          "200": {
+            "description": "successful operation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "title": "Array_of_McastForwardingEntry",
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/McastForwardingEntry"
+                  }
+                }
+              }
+            }
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      },
+      "put": {
+        "summary": "Set multicast forwarding entries for an underlay address.",
+        "operationId": "set_mcast_fwd",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/McastForwardingEntry"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "204": {
+            "description": "resource updated"
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      },
+      "delete": {
+        "summary": "Clear multicast forwarding entries for an underlay address.",
+        "operationId": "clear_mcast_fwd",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ClearMcastForwarding"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "204": {
+            "description": "resource updated"
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      }
+    },
+    "/networking/mcast-m2p": {
+      "get": {
+        "summary": "List M2P mappings present on this sled.",
+        "operationId": "list_mcast_m2p",
+        "responses": {
+          "200": {
+            "description": "successful operation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "title": "Array_of_Mcast2PhysMapping",
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/Mcast2PhysMapping"
+                  }
+                }
+              }
+            }
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      },
+      "put": {
+        "summary": "Set a multicast-to-physical (M2P) mapping in OPTE.",
+        "operationId": "set_mcast_m2p",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/Mcast2PhysMapping"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "204": {
+            "description": "resource updated"
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      },
+      "delete": {
+        "summary": "Clear a multicast-to-physical (M2P) mapping in OPTE.",
+        "operationId": "clear_mcast_m2p",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ClearMcast2Phys"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "204": {
+            "description": "resource updated"
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      }
+    },
     "/omicron-config": {
       "put": {
         "operationId": "omicron_config_put",
@@ -2293,76 +2519,6 @@
         }
       }
     },
-    "/vmms/{propolis_id}/multicast-group": {
-      "put": {
-        "operationId": "vmm_join_multicast_group",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "propolis_id",
-            "required": true,
-            "schema": {
-              "$ref": "#/components/schemas/PropolisUuid"
-            }
-          }
-        ],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/InstanceMulticastBody"
-              }
-            }
-          },
-          "required": true
-        },
-        "responses": {
-          "204": {
-            "description": "resource updated"
-          },
-          "4XX": {
-            "$ref": "#/components/responses/Error"
-          },
-          "5XX": {
-            "$ref": "#/components/responses/Error"
-          }
-        }
-      },
-      "delete": {
-        "operationId": "vmm_leave_multicast_group",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "propolis_id",
-            "required": true,
-            "schema": {
-              "$ref": "#/components/schemas/PropolisUuid"
-            }
-          }
-        ],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/InstanceMulticastBody"
-              }
-            }
-          },
-          "required": true
-        },
-        "responses": {
-          "204": {
-            "description": "resource updated"
-          },
-          "4XX": {
-            "$ref": "#/components/responses/Error"
-          },
-          "5XX": {
-            "$ref": "#/components/responses/Error"
-          }
-        }
-      }
-    },
     "/vmms/{propolis_id}/state": {
       "get": {
         "operationId": "vmm_get_state",
@@ -3992,6 +4148,40 @@
           }
         ]
       },
+      "ClearMcast2Phys": {
+        "description": "Clear a mapping from an overlay multicast group to an underlay multicast address.",
+        "type": "object",
+        "properties": {
+          "group": {
+            "description": "Overlay multicast group address.",
+            "type": "string",
+            "format": "ip"
+          },
+          "underlay": {
+            "description": "Underlay IPv6 multicast address. See [`Mcast2PhysMapping::underlay`].",
+            "type": "string",
+            "format": "ipv6"
+          }
+        },
+        "required": [
+          "group",
+          "underlay"
+        ]
+      },
+      "ClearMcastForwarding": {
+        "description": "Clear all forwarding entries for an underlay multicast address.",
+        "type": "object",
+        "properties": {
+          "underlay": {
+            "description": "Underlay IPv6 multicast address. See [`Mcast2PhysMapping::underlay`].",
+            "type": "string",
+            "format": "ipv6"
+          }
+        },
+        "required": [
+          "underlay"
+        ]
+      },
       "CombineError": {
         "type": "string",
         "enum": [
@@ -5781,35 +5971,6 @@
           "src_propolis_addr"
         ]
       },
-      "InstanceMulticastBody": {
-        "description": "Request body for multicast group operations.",
-        "oneOf": [
-          {
-            "type": "object",
-            "properties": {
-              "join": {
-                "$ref": "#/components/schemas/InstanceMulticastMembership"
-              }
-            },
-            "required": [
-              "join"
-            ],
-            "additionalProperties": false
-          },
-          {
-            "type": "object",
-            "properties": {
-              "leave": {
-                "$ref": "#/components/schemas/InstanceMulticastMembership"
-              }
-            },
-            "required": [
-              "leave"
-            ],
-            "additionalProperties": false
-          }
-        ]
-      },
       "InstanceMulticastMembership": {
         "description": "Represents a multicast group membership for an instance.\n\nIntroduced in v7.",
         "type": "object",
@@ -6631,6 +6792,151 @@
         "minimum": 1,
         "maximum": 32
       },
+      "Mcast2PhysMapping": {
+        "description": "Mapping from an overlay multicast group to an underlay multicast address.\n\nThe underlay address must be within the underlay multicast subnet (ff04::/64). This invariant is enforced by mapping in Nexus, not validated at this layer.",
+        "type": "object",
+        "properties": {
+          "group": {
+            "description": "Overlay multicast group address.",
+            "type": "string",
+            "format": "ip"
+          },
+          "underlay": {
+            "description": "Underlay IPv6 multicast address (ff04::/64).",
+            "type": "string",
+            "format": "ipv6"
+          }
+        },
+        "required": [
+          "group",
+          "underlay"
+        ]
+      },
+      "McastFilterMode": {
+        "description": "Filter mode for multicast source filtering.",
+        "oneOf": [
+          {
+            "description": "Accept only packets from listed sources (SSM).",
+            "type": "string",
+            "enum": [
+              "include"
+            ]
+          },
+          {
+            "description": "Accept packets from all sources except those listed. With an empty sources list this is any-source multicast (ASM).",
+            "type": "string",
+            "enum": [
+              "exclude"
+            ]
+          }
+        ]
+      },
+      "McastForwardingEntry": {
+        "description": "Forwarding entry for an underlay multicast address, specifying which next hops should receive replicated packets.",
+        "type": "object",
+        "properties": {
+          "next_hops": {
+            "description": "Next hops with replication and source filter configuration.",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/McastForwardingNextHop"
+            }
+          },
+          "underlay": {
+            "description": "Underlay IPv6 multicast address. See [`Mcast2PhysMapping::underlay`].",
+            "type": "string",
+            "format": "ipv6"
+          }
+        },
+        "required": [
+          "next_hops",
+          "underlay"
+        ]
+      },
+      "McastForwardingNextHop": {
+        "description": "A forwarding next hop with replication mode and aggregated source filter.",
+        "type": "object",
+        "properties": {
+          "filter": {
+            "description": "Aggregated source filter for this destination.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/McastSourceFilter"
+              }
+            ]
+          },
+          "next_hop": {
+            "description": "Unicast IPv6 address of the destination sled.",
+            "type": "string",
+            "format": "ipv6"
+          },
+          "replication": {
+            "description": "Replication mode for this next hop.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/McastReplication"
+              }
+            ]
+          }
+        },
+        "required": [
+          "filter",
+          "next_hop",
+          "replication"
+        ]
+      },
+      "McastReplication": {
+        "description": "Replication mode for multicast forwarding.",
+        "oneOf": [
+          {
+            "description": "Replicate to front panel ports (egress to external networks).",
+            "type": "string",
+            "enum": [
+              "external"
+            ]
+          },
+          {
+            "description": "Replicate to sled underlay ports.",
+            "type": "string",
+            "enum": [
+              "underlay"
+            ]
+          },
+          {
+            "description": "Replicate to both external and underlay ports.",
+            "type": "string",
+            "enum": [
+              "both"
+            ]
+          }
+        ]
+      },
+      "McastSourceFilter": {
+        "description": "Source filter for multicast forwarding.",
+        "type": "object",
+        "properties": {
+          "mode": {
+            "description": "Filter mode.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/McastFilterMode"
+              }
+            ]
+          },
+          "sources": {
+            "description": "Source addresses to include or exclude.",
+            "type": "array",
+            "items": {
+              "type": "string",
+              "format": "ip"
+            }
+          }
+        },
+        "required": [
+          "mode",
+          "sources"
+        ]
+      },
       "Measurement": {
         "description": "An RoT provided measurement which represents a digest of some component in the trusted computing base (TCB) for the attestor.",
         "oneOf": [
diff --git a/openapi/sled-agent/sled-agent-latest.json b/openapi/sled-agent/sled-agent-latest.json
index 68fb2ddf57b..e0196fa33ef 120000
--- a/openapi/sled-agent/sled-agent-latest.json
+++ b/openapi/sled-agent/sled-agent-latest.json
@@ -1 +1 @@
-sled-agent-35.0.0-93533c.json
\ No newline at end of file
+sled-agent-36.0.0-7b7885.json
\ No newline at end of file
diff --git a/package-manifest.toml b/package-manifest.toml
index d873d147789..b25b9c75a1b 100644
--- a/package-manifest.toml
+++ b/package-manifest.toml
@@ -683,10 +683,10 @@ source.repo = "maghemite"
 # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when
 # building `ddm-admin-client` (which will instruct you to update
 # `tools/maghemite_openapi_version`).
-source.commit = "4d1f20f793da102b29b914569725ebc9fdf746dd"
+source.commit = "c3c3032f8bdc91d6faf2b36e05b8375a0980765c"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image/<commit>/mg-ddm-gz.sha256.txt
-source.sha256 = "2cb4a97731d55bea78b83aabbba9a43602419e49a9d3eeb214b745463388ff60"
+source.sha256 = "751f94de83cf95d2215f3d910dc49bd5c90c18ec6680a9616755bd91fca3a2b1"
 output.type = "tarball"
 
 [package.mg-ddm]
@@ -699,10 +699,10 @@ source.repo = "maghemite"
 # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when
 # building `ddm-admin-client` (which will instruct you to update
 # `tools/maghemite_openapi_version`).
-source.commit = "4d1f20f793da102b29b914569725ebc9fdf746dd"
+source.commit = "c3c3032f8bdc91d6faf2b36e05b8375a0980765c"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image/<commit>/mg-ddm.sha256.txt
-source.sha256 = "932cc6149eb87ee9c01226a49708b34fea0479c357f1b19d779f96be40a4c729"
+source.sha256 = "4c9b6cf597ec6e26c4f99de82b71482b25cabcd9dd23ccbe87229a997fb6c368"
 output.type = "zone"
 output.intermediate_only = true
 
@@ -714,10 +714,10 @@ source.repo = "maghemite"
 # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when
 # building `ddm-admin-client` (which will instruct you to update
 # `tools/maghemite_openapi_version`).
-source.commit = "4d1f20f793da102b29b914569725ebc9fdf746dd"
+source.commit = "c3c3032f8bdc91d6faf2b36e05b8375a0980765c"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image/<commit>/mgd.sha256.txt
-source.sha256 = "dd07d2ea491842cce28fd4eabc0f957f7672a75a8e4d92c31630d4332cb40ebd"
+source.sha256 = "f65bf058322013feb2b5771e24046b0c6953d4e9324f8f48374caf7565845851"
 output.type = "zone"
 output.intermediate_only = true
 
@@ -765,8 +765,8 @@ only_for_targets.image = "standard"
 # the other `source.*` keys.
 source.type = "prebuilt"
 source.repo = "dendrite"
-source.commit = "1ddaa5d6b101fbaa2c29eca847111cbef1a272ad"
-source.sha256 = "d899f9a761bb04bc9b9c88995883196dd691b758de547f7b1836344db5bd5080"
+source.commit = "e10e4f5a993fe950ab1b478abb5dcbfa7aa92091"
+source.sha256 = "bf93c4d2c6139dca1bf0abab39be25b20b434d998212d08fd6b2df7b015af268"
 output.type = "zone"
 output.intermediate_only = true
 
@@ -792,8 +792,8 @@ only_for_targets.image = "standard"
 # the other `source.*` keys.
 source.type = "prebuilt"
 source.repo = "dendrite"
-source.commit = "1ddaa5d6b101fbaa2c29eca847111cbef1a272ad"
-source.sha256 = "bb0f7930f3af6679c552e3299cdac82a46866f7f3f38b665c02d5f02464ab7b3"
+source.commit = "e10e4f5a993fe950ab1b478abb5dcbfa7aa92091"
+source.sha256 = "841a17b2ccfc3e020c1f581f610b852339b038f250b4a3918adc3f34b87d295b"
 output.type = "zone"
 output.intermediate_only = true
 
@@ -812,8 +812,8 @@ only_for_targets.image = "standard"
 # the other `source.*` keys.
 source.type = "prebuilt"
 source.repo = "dendrite"
-source.commit = "1ddaa5d6b101fbaa2c29eca847111cbef1a272ad"
-source.sha256 = "9e2c578302c3c11763a2a17e6d0b7a65b811ad2458b8a85c65b48fcec0133ab3"
+source.commit = "e10e4f5a993fe950ab1b478abb5dcbfa7aa92091"
+source.sha256 = "5ae4ab1df725365a5399d295eab84f7b4f21b8157e549d6e85c1811817156d2f"
 output.type = "zone"
 output.intermediate_only = true
 
diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs
index f261f89db87..fdc8ae39200 100644
--- a/sled-agent/api/src/lib.rs
+++ b/sled-agent/api/src/lib.rs
@@ -19,6 +19,10 @@ use omicron_common::api::internal::{
         SledIdentifiers, VirtualNetworkInterfaceHost,
     },
 };
+use sled_agent_types_versions::latest::multicast::{
+    ClearMcast2Phys, ClearMcastForwarding, Mcast2PhysMapping,
+    McastForwardingEntry,
+};
 use sled_agent_types_versions::{
     latest, v1, v4, v6, v7, v9, v10, v11, v12, v14, v16, v17, v18, v20, v22,
     v24, v25, v26, v28, v29, v30, v31, v33,
@@ -38,6 +42,7 @@ api_versions!([
     // |  example for the next person.
     // v
     // (next_int, IDENT),
+    (36, MCAST_M2P_FORWARDING),
     (35, INLINE_ROUTER_PEER_IP_ADDR),
     (34, MODIFY_SVCS_TYPES),
     (33, BOOTSTORE_SERVICE_NAT),
@@ -631,25 +636,57 @@ pub trait SledAgentApi {
     ) -> Result<HttpResponseUpdatedNoContent, HttpError>;
 
     #[endpoint {
+        method = PUT,
+        path = "/instances/{instance_id}/multicast-group",
+        versions = VERSION_MCAST_M2P_FORWARDING..,
+    }]
+    async fn instance_join_multicast_group(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<latest::instance::InstancePathParam>,
+        body: TypedBody<latest::instance::InstanceMulticastMembership>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError>;
+
+    #[endpoint {
+        method = DELETE,
+        path = "/instances/{instance_id}/multicast-group",
+        versions = VERSION_MCAST_M2P_FORWARDING..,
+    }]
+    async fn instance_leave_multicast_group(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<latest::instance::InstancePathParam>,
+        body: TypedBody<latest::instance::InstanceMulticastMembership>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError>;
+
+    /// Join a multicast group.
+    //
+    // Deprecated. This was keyed by the active VMM's Propolis ID, while
+    // newer versions use an instance-scoped endpoint.
+    #[endpoint {
+        operation_id = "vmm_join_multicast_group",
         method = PUT,
         path = "/vmms/{propolis_id}/multicast-group",
-        versions = VERSION_MULTICAST_SUPPORT..,
+        versions = VERSION_MULTICAST_SUPPORT..VERSION_MCAST_M2P_FORWARDING,
     }]
-    async fn vmm_join_multicast_group(
+    async fn vmm_join_multicast_group_v7(
         rqctx: RequestContext<Self::Context>,
-        path_params: Path<latest::instance::VmmPathParam>,
-        body: TypedBody<latest::instance::InstanceMulticastBody>,
+        path_params: Path<v1::instance::VmmPathParam>,
+        body: TypedBody<v7::instance::InstanceMulticastBody>,
     ) -> Result<HttpResponseUpdatedNoContent, HttpError>;
 
+    /// Leave a multicast group.
+    //
+    // Deprecated. This was keyed by the active VMM's Propolis ID, while
+    // newer versions use an instance-scoped endpoint.
     #[endpoint {
+        operation_id = "vmm_leave_multicast_group",
         method = DELETE,
         path = "/vmms/{propolis_id}/multicast-group",
-        versions = VERSION_MULTICAST_SUPPORT..,
+        versions = VERSION_MULTICAST_SUPPORT..VERSION_MCAST_M2P_FORWARDING,
     }]
-    async fn vmm_leave_multicast_group(
+    async fn vmm_leave_multicast_group_v7(
         rqctx: RequestContext<Self::Context>,
-        path_params: Path<latest::instance::VmmPathParam>,
-        body: TypedBody<latest::instance::InstanceMulticastBody>,
+        path_params: Path<v1::instance::VmmPathParam>,
+        body: TypedBody<v7::instance::InstanceMulticastBody>,
     ) -> Result<HttpResponseUpdatedNoContent, HttpError>;
 
     #[endpoint {
@@ -808,6 +845,70 @@ pub trait SledAgentApi {
         rqctx: RequestContext<Self::Context>,
     ) -> Result<HttpResponseOk<Vec<VirtualNetworkInterfaceHost>>, HttpError>;
 
+    /// Set a multicast-to-physical (M2P) mapping in OPTE.
+    #[endpoint {
+        method = PUT,
+        path = "/networking/mcast-m2p",
+        versions = VERSION_MCAST_M2P_FORWARDING..,
+    }]
+    async fn set_mcast_m2p(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<Mcast2PhysMapping>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError>;
+
+    /// Clear a multicast-to-physical (M2P) mapping in OPTE.
+    #[endpoint {
+        method = DELETE,
+        path = "/networking/mcast-m2p",
+        versions = VERSION_MCAST_M2P_FORWARDING..,
+    }]
+    async fn clear_mcast_m2p(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<ClearMcast2Phys>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError>;
+
+    /// Set multicast forwarding entries for an underlay address.
+    #[endpoint {
+        method = PUT,
+        path = "/networking/mcast-fwd",
+        versions = VERSION_MCAST_M2P_FORWARDING..,
+    }]
+    async fn set_mcast_fwd(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<McastForwardingEntry>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError>;
+
+    /// Clear multicast forwarding entries for an underlay address.
+    #[endpoint {
+        method = DELETE,
+        path = "/networking/mcast-fwd",
+        versions = VERSION_MCAST_M2P_FORWARDING..,
+    }]
+    async fn clear_mcast_fwd(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<ClearMcastForwarding>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError>;
+
+    /// List M2P mappings present on this sled.
+    #[endpoint {
+        method = GET,
+        path = "/networking/mcast-m2p",
+        versions = VERSION_MCAST_M2P_FORWARDING..,
+    }]
+    async fn list_mcast_m2p(
+        rqctx: RequestContext<Self::Context>,
+    ) -> Result<HttpResponseOk<Vec<Mcast2PhysMapping>>, HttpError>;
+
+    /// List multicast forwarding entries present on this sled.
+    #[endpoint {
+        method = GET,
+        path = "/networking/mcast-fwd",
+        versions = VERSION_MCAST_M2P_FORWARDING..,
+    }]
+    async fn list_mcast_fwd(
+        rqctx: RequestContext<Self::Context>,
+    ) -> Result<HttpResponseOk<Vec<McastForwardingEntry>>, HttpError>;
+
     #[endpoint {
         method = POST,
         path = "/switch-ports",
diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs
index 2eb525d3e53..0c49cf4c67d 100644
--- a/sled-agent/src/http_entrypoints.rs
+++ b/sled-agent/src/http_entrypoints.rs
@@ -45,12 +45,17 @@ use sled_agent_types::early_networking::EarlyNetworkConfigEnvelope;
 use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody;
 use sled_agent_types::instance::SledVmmState;
 use sled_agent_types::instance::{
-    InstanceEnsureBody, InstanceExternalIpBody, InstanceMulticastBody,
-    VmmIssueDiskSnapshotRequestBody, VmmIssueDiskSnapshotRequestPathParam,
-    VmmIssueDiskSnapshotRequestResponse, VmmPathParam, VmmPutStateBody,
-    VmmPutStateResponse, VmmUnregisterResponse, VpcPathParam,
+    InstanceEnsureBody, InstanceExternalIpBody, InstanceMulticastMembership,
+    InstancePathParam, VmmIssueDiskSnapshotRequestBody,
+    VmmIssueDiskSnapshotRequestPathParam, VmmIssueDiskSnapshotRequestResponse,
+    VmmPathParam, VmmPutStateBody, VmmPutStateResponse, VmmUnregisterResponse,
+    VpcPathParam,
 };
 use sled_agent_types::inventory::{Inventory, OmicronSledConfig};
+use sled_agent_types::multicast::{
+    ClearMcast2Phys, ClearMcastForwarding, Mcast2PhysMapping,
+    McastForwardingEntry,
+};
 use sled_agent_types::probes::ProbeSet;
 use sled_agent_types::rot::{
     Attestation, CertificateChain, MeasurementLog, Nonce, RotPathParams,
@@ -79,7 +84,7 @@ use trust_quorum_types::messages::{
 use trust_quorum_types::status::{CommitStatus, CoordinatorStatus, NodeStatus};
 
 // Fixed identifiers for prior versions only
-use sled_agent_types_versions::{v1, v20, v25, v26, v30, v33};
+use sled_agent_types_versions::{v1, v7, v20, v25, v26, v30, v33};
 use sled_diagnostics::{
     SledDiagnosticsCommandHttpOutput, SledDiagnosticsQueryOutput,
 };
@@ -707,33 +712,96 @@ impl SledAgentApi for SledAgentImpl {
             .await
     }
 
-    async fn vmm_join_multicast_group(
+    async fn instance_join_multicast_group(
         rqctx: RequestContext<Self::Context>,
-        path_params: Path<VmmPathParam>,
-        body: TypedBody<InstanceMulticastBody>,
+        path_params: Path<InstancePathParam>,
+        body: TypedBody<InstanceMulticastMembership>,
     ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
         let sa = rqctx.context();
-        let id = path_params.into_inner().propolis_id;
-        let body_args = body.into_inner();
+        let instance_id = path_params.into_inner().instance_id;
+        let membership = body.into_inner();
         sa.latencies()
             .instrument_dropshot_handler(&rqctx, async {
-                sa.instance_join_multicast_group(id, &body_args).await?;
+                sa.instance_join_multicast_group(instance_id, &membership)
+                    .await?;
                 Ok(HttpResponseUpdatedNoContent())
             })
             .await
     }
 
-    async fn vmm_leave_multicast_group(
+    async fn instance_leave_multicast_group(
         rqctx: RequestContext<Self::Context>,
-        path_params: Path<VmmPathParam>,
-        body: TypedBody<InstanceMulticastBody>,
+        path_params: Path<InstancePathParam>,
+        body: TypedBody<InstanceMulticastMembership>,
     ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
         let sa = rqctx.context();
-        let id = path_params.into_inner().propolis_id;
-        let body_args = body.into_inner();
+        let instance_id = path_params.into_inner().instance_id;
+        let membership = body.into_inner();
+        sa.latencies()
+            .instrument_dropshot_handler(&rqctx, async {
+                sa.instance_leave_multicast_group(instance_id, &membership)
+                    .await?;
+                Ok(HttpResponseUpdatedNoContent())
+            })
+            .await
+    }
+
+    async fn vmm_join_multicast_group_v7(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<v1::instance::VmmPathParam>,
+        body: TypedBody<v7::instance::InstanceMulticastBody>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        let sa = rqctx.context();
+        let propolis_id = path_params.into_inner().propolis_id;
+        let membership = match body.into_inner() {
+            v7::instance::InstanceMulticastBody::Join(m) => m,
+            v7::instance::InstanceMulticastBody::Leave(_) => {
+                return Err(HttpError::for_bad_request(
+                    None,
+                    "Join endpoint cannot process Leave operations".to_string(),
+                ));
+            }
+        };
+        sa.latencies()
+            .instrument_dropshot_handler(&rqctx, async {
+                let Some(instance_id) =
+                    sa.instance_id_for_propolis(propolis_id).await?
+                else {
+                    // No registered VMM means no OPTE port to update.
+                    return Ok(HttpResponseUpdatedNoContent());
+                };
+                sa.instance_join_multicast_group(instance_id, &membership)
+                    .await?;
+                Ok(HttpResponseUpdatedNoContent())
+            })
+            .await
+    }
+
+    async fn vmm_leave_multicast_group_v7(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<v1::instance::VmmPathParam>,
+        body: TypedBody<v7::instance::InstanceMulticastBody>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        let sa = rqctx.context();
+        let propolis_id = path_params.into_inner().propolis_id;
+        let membership = match body.into_inner() {
+            v7::instance::InstanceMulticastBody::Leave(m) => m,
+            v7::instance::InstanceMulticastBody::Join(_) => {
+                return Err(HttpError::for_bad_request(
+                    None,
+                    "Leave endpoint cannot process Join operations".to_string(),
+                ));
+            }
+        };
         sa.latencies()
             .instrument_dropshot_handler(&rqctx, async {
-                sa.instance_leave_multicast_group(id, &body_args).await?;
+                let Some(instance_id) =
+                    sa.instance_id_for_propolis(propolis_id).await?
+                else {
+                    return Ok(HttpResponseUpdatedNoContent());
+                };
+                sa.instance_leave_multicast_group(instance_id, &membership)
+                    .await?;
                 Ok(HttpResponseUpdatedNoContent())
             })
             .await
@@ -932,6 +1000,86 @@ impl SledAgentApi for SledAgentImpl {
             .await
     }
 
+    async fn set_mcast_m2p(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<Mcast2PhysMapping>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        let sa = rqctx.context();
+        let body_args = body.into_inner();
+        sa.latencies()
+            .instrument_dropshot_handler(&rqctx, async {
+                sa.set_mcast_m2p(&body_args).await.map_err(Error::from)?;
+                Ok(HttpResponseUpdatedNoContent())
+            })
+            .await
+    }
+
+    async fn clear_mcast_m2p(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<ClearMcast2Phys>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        let sa = rqctx.context();
+        let body_args = body.into_inner();
+        sa.latencies()
+            .instrument_dropshot_handler(&rqctx, async {
+                sa.clear_mcast_m2p(&body_args).await.map_err(Error::from)?;
+                Ok(HttpResponseUpdatedNoContent())
+            })
+            .await
+    }
+
+    async fn set_mcast_fwd(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<McastForwardingEntry>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        let sa = rqctx.context();
+        let body_args = body.into_inner();
+        sa.latencies()
+            .instrument_dropshot_handler(&rqctx, async {
+                sa.set_mcast_fwd(&body_args).await.map_err(Error::from)?;
+                Ok(HttpResponseUpdatedNoContent())
+            })
+            .await
+    }
+
+    async fn clear_mcast_fwd(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<ClearMcastForwarding>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        let sa = rqctx.context();
+        let body_args = body.into_inner();
+        sa.latencies()
+            .instrument_dropshot_handler(&rqctx, async {
+                sa.clear_mcast_fwd(&body_args).await.map_err(Error::from)?;
+                Ok(HttpResponseUpdatedNoContent())
+            })
+            .await
+    }
+
+    async fn list_mcast_m2p(
+        rqctx: RequestContext<Self::Context>,
+    ) -> Result<HttpResponseOk<Vec<Mcast2PhysMapping>>, HttpError> {
+        let sa = rqctx.context();
+        sa.latencies()
+            .instrument_dropshot_handler(&rqctx, async {
+                let m2p = sa.list_mcast_m2p().await.map_err(Error::from)?;
+                Ok(HttpResponseOk(m2p))
+            })
+            .await
+    }
+
+    async fn list_mcast_fwd(
+        rqctx: RequestContext<Self::Context>,
+    ) -> Result<HttpResponseOk<Vec<McastForwardingEntry>>, HttpError> {
+        let sa = rqctx.context();
+        sa.latencies()
+            .instrument_dropshot_handler(&rqctx, async {
+                let fwd = sa.list_mcast_fwd().await.map_err(Error::from)?;
+                Ok(HttpResponseOk(fwd))
+            })
+            .await
+    }
+
     async fn uplink_ensure(
         rqctx: RequestContext<Self::Context>,
         body: TypedBody<SwitchPorts>,
diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs
index a009e7f3843..bf70e3e9448 100644
--- a/sled-agent/src/instance.rs
+++ b/sled-agent/src/instance.rs
@@ -2325,7 +2325,11 @@ impl InstanceRunner {
         // for them.
         let mut opte_ports = Vec::with_capacity(self.requested_nics.len());
         let mut opte_port_names = Vec::with_capacity(self.requested_nics.len());
+        let mcast_cfg = self.multicast_group_cfgs();
         for nic in self.requested_nics.iter() {
+            // Multicast subscriptions target the primary NIC only.
+            // See the TODO on ensure_multicast_groups.
+            let groups: &[_] = if nic.primary { &mcast_cfg } else { &[] };
             let port = self.port_manager.create_port(PortCreateParams {
                 nic,
                 external_ips: &self.external_ips,
@@ -2337,6 +2341,7 @@ impl InstanceRunner {
                     .copied()
                     .map(Into::into)
                     .collect(),
+                multicast_groups: groups,
             })?;
             opte_port_names.push(port.0.name().to_string());
             opte_ports.push(port);
@@ -2618,12 +2623,13 @@ impl InstanceRunner {
         &mut self,
         membership: &InstanceMulticastMembership,
     ) -> Result<(), Error> {
-        // Similar logic to add_external_ip - save state for rollback
+        // Save pre-call state so rollback restores exactly what was
+        // present, mirroring add_external_ip's old_config pattern.
+        let old_groups = self.multicast_groups.clone();
         let out = self.join_multicast_group_inner(membership).await;
 
         if out.is_err() {
-            // Rollback state on error
-            self.multicast_groups.retain(|m| m != membership);
+            self.multicast_groups = old_groups;
         }
         out
     }
@@ -2632,14 +2638,13 @@ impl InstanceRunner {
         &mut self,
         membership: &InstanceMulticastMembership,
     ) -> Result<(), Error> {
-        // Similar logic to delete_external_ip - save state for rollback
+        // Save pre-call state so rollback restores exactly what was
+        // present, mirroring delete_external_ip's old_config pattern.
+        let old_groups = self.multicast_groups.clone();
         let out = self.leave_multicast_group_inner(membership).await;
 
         if out.is_err() {
-            // Rollback state on error - readd the membership if it was removed
-            if !self.multicast_groups.contains(membership) {
-                self.multicast_groups.push(membership.clone());
-            }
+            self.multicast_groups = old_groups;
         }
         out
     }
@@ -2648,48 +2653,54 @@ impl InstanceRunner {
         self.refresh_multicast_groups_inner()
     }
 
-    async fn join_multicast_group_inner(
-        &mut self,
-        membership: &InstanceMulticastMembership,
-    ) -> Result<(), Error> {
-        // Check for duplicate membership (idempotency)
-        if self.multicast_groups.contains(membership) {
-            return Ok(());
-        }
-
-        // Add to local state
-        self.multicast_groups.push(membership.clone());
+    /// Convert `InstanceMulticastMembership` list to OPTE
+    /// `MulticastGroupCfg` list.
+    fn multicast_group_cfgs(
+        &self,
+    ) -> Vec<illumos_utils::opte::MulticastGroupCfg> {
+        self.multicast_groups
+            .iter()
+            .map(|m| illumos_utils::opte::MulticastGroupCfg {
+                group_ip: m.group_ip,
+                sources: m.sources.clone(),
+            })
+            .collect()
+    }
 
-        // Update OPTE configuration
+    /// Sync the current multicast group memberships to OPTE via the
+    /// port manager.
+    ///
+    // TODO: subscriptions target the primary NIC only.
+    // InstanceMulticastMembership carries no NIC identifier, same as
+    // external IPs and attached subnets (though not firewall rules,
+    // which fan out across all VPC ports by VNI). If per-NIC multicast
+    // is needed, the membership type needs a NIC field and both this
+    // function and setup_propolis_zone must be updated.
+    fn ensure_multicast_groups(&self) -> Result<(), Error> {
         let Some(primary_nic) = self.primary_nic() else {
             return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic));
         };
 
-        // Convert InstanceMulticastMembership to MulticastGroupCfg
-        let multicast_cfg: Vec<illumos_utils::opte::MulticastGroupCfg> = self
-            .multicast_groups
-            .iter()
-            .map(|membership| illumos_utils::opte::MulticastGroupCfg {
-                group_ip: membership.group_ip,
-                sources: membership.sources.clone(),
-            })
-            .collect();
-
-        // Validate multicast configuration with OPTE
         self.port_manager.multicast_groups_ensure(
             primary_nic.id,
             primary_nic.kind,
-            &multicast_cfg,
+            &self.multicast_group_cfgs(),
         )?;
 
-        // TODO: Configure underlay multicast group addresses on the zone's vNIC.
-        // This should add the multicast group addresses to the zone's network
-        // interface so it can receive underlay multicast traffic (physical
-        // network layer). Rack-wide dataplane forwarding is handled by the
-        // RPW reconciler + DPD.
-        // See also: port_manager.rs multicast_groups_ensure() TODO about
-        // configuring OPTE port-level multicast group membership.
+        Ok(())
+    }
 
+    async fn join_multicast_group_inner(
+        &mut self,
+        membership: &InstanceMulticastMembership,
+    ) -> Result<(), Error> {
+        // Idempotent -> skip if already subscribed.
+        if self.multicast_groups.contains(membership) {
+            return Ok(());
+        }
+
+        self.multicast_groups.push(membership.clone());
+        self.ensure_multicast_groups()?;
         Ok(())
     }
 
@@ -2697,56 +2708,12 @@ impl InstanceRunner {
         &mut self,
         membership: &InstanceMulticastMembership,
     ) -> Result<(), Error> {
-        // Remove from local state
         self.multicast_groups.retain(|m| m != membership);
-
-        // Update OPTE configuration
-        let Some(primary_nic) = self.primary_nic() else {
-            return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic));
-        };
-
-        // Convert InstanceMulticastMembership to MulticastGroupCfg
-        let multicast_cfg: Vec<illumos_utils::opte::MulticastGroupCfg> = self
-            .multicast_groups
-            .iter()
-            .map(|membership| illumos_utils::opte::MulticastGroupCfg {
-                group_ip: membership.group_ip,
-                sources: membership.sources.clone(),
-            })
-            .collect();
-
-        self.port_manager.multicast_groups_ensure(
-            primary_nic.id,
-            primary_nic.kind,
-            &multicast_cfg,
-        )?;
-
-        Ok(())
+        self.ensure_multicast_groups()
     }
 
     fn refresh_multicast_groups_inner(&mut self) -> Result<(), Error> {
-        // Update OPTE configuration
-        let Some(primary_nic) = self.primary_nic() else {
-            return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic));
-        };
-
-        // Convert InstanceMulticastMembership to MulticastGroupCfg
-        let multicast_cfg: Vec<illumos_utils::opte::MulticastGroupCfg> = self
-            .multicast_groups
-            .iter()
-            .map(|membership| illumos_utils::opte::MulticastGroupCfg {
-                group_ip: membership.group_ip,
-                sources: membership.sources.clone(),
-            })
-            .collect();
-
-        self.port_manager.multicast_groups_ensure(
-            primary_nic.id,
-            primary_nic.kind,
-            &multicast_cfg,
-        )?;
-
-        Ok(())
+        self.ensure_multicast_groups()
     }
 }
 
@@ -3036,6 +3003,7 @@ mod tests {
         let port_manager = PortManager::new(
             log.new(o!("component" => "PortManager")),
             Ipv6Addr::new(0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01),
+            &[],
         );
 
         let cleanup_context = CleanupContext::default();
diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs
index f110a379470..012d0a45df0 100644
--- a/sled-agent/src/instance_manager.rs
+++ b/sled-agent/src/instance_manager.rs
@@ -18,7 +18,7 @@ use illumos_utils::opte::PortManager;
 use illumos_utils::running_zone::ZoneBuilderFactory;
 use omicron_common::api::external::ByteCount;
 use omicron_common::api::internal::shared::SledIdentifiers;
-use omicron_uuid_kinds::PropolisUuid;
+use omicron_uuid_kinds::{InstanceUuid, PropolisUuid};
 use oxnet::IpNet;
 use sled_agent_config_reconciler::AvailableDatasetsReceiver;
 use sled_agent_config_reconciler::CurrentlyManagedZpoolsReceiver;
@@ -43,6 +43,9 @@ pub enum Error {
     #[error("VMM with ID {0} not found")]
     NoSuchVmm(PropolisUuid),
 
+    #[error("No active VMM for instance {0}")]
+    NoActiveVmmForInstance(InstanceUuid),
+
     #[error("OPTE port management error")]
     Opte(#[from] illumos_utils::opte::Error),
 
@@ -303,17 +306,22 @@ impl InstanceManager {
         rx.await?
     }
 
-    pub async fn join_multicast_group(
+    /// Subscribe an instance's active VMM OPTE port to a multicast group.
+    ///
+    /// The active Propolis ID is resolved inside the manager's run loop so
+    /// that the lookup and the OPTE dispatch are serialized with other
+    /// per-instance state changes.
+    pub async fn join_multicast_group_by_instance(
         &self,
-        propolis_id: PropolisUuid,
-        multicast_body: &InstanceMulticastBody,
+        instance_id: InstanceUuid,
+        membership: &InstanceMulticastMembership,
     ) -> Result<(), Error> {
         let (tx, rx) = oneshot::channel();
         self.inner
             .tx
-            .send(InstanceManagerRequest::JoinMulticastGroup {
-                propolis_id,
-                multicast_body: multicast_body.clone(),
+            .send(InstanceManagerRequest::JoinMulticastGroupByInstance {
+                instance_id,
+                membership: membership.clone(),
                 tx,
             })
             .await
@@ -322,17 +330,22 @@ impl InstanceManager {
         rx.await?
     }
 
-    pub async fn leave_multicast_group(
+    /// Unsubscribe an instance's active VMM OPTE port from a multicast group.
+    ///
+    /// The active Propolis ID is resolved inside the manager's run loop so
+    /// that the lookup and the OPTE dispatch are serialized with other
+    /// per-instance state changes.
+    pub async fn leave_multicast_group_by_instance(
         &self,
-        propolis_id: PropolisUuid,
-        multicast_body: &InstanceMulticastBody,
+        instance_id: InstanceUuid,
+        membership: &InstanceMulticastMembership,
     ) -> Result<(), Error> {
         let (tx, rx) = oneshot::channel();
         self.inner
             .tx
-            .send(InstanceManagerRequest::LeaveMulticastGroup {
-                propolis_id,
-                multicast_body: multicast_body.clone(),
+            .send(InstanceManagerRequest::LeaveMulticastGroupByInstance {
+                instance_id,
+                membership: membership.clone(),
                 tx,
             })
             .await
@@ -341,6 +354,27 @@ impl InstanceManager {
         rx.await?
     }
 
+    /// Resolve a Propolis ID to its registered instance ID.
+    ///
+    /// # Returns
+    ///
+    /// `Ok(None)` if no instance is registered with that Propolis ID.
+    pub async fn instance_id_for_propolis(
+        &self,
+        propolis_id: PropolisUuid,
+    ) -> Result<Option<InstanceUuid>, Error> {
+        let (tx, rx) = oneshot::channel();
+        self.inner
+            .tx
+            .send(InstanceManagerRequest::LookupInstanceForPropolis {
+                propolis_id,
+                tx,
+            })
+            .await
+            .map_err(|_| Error::FailedSendInstanceManagerClosed)?;
+        rx.await?
+    }
+
     /// Returns the last-set size of the reservoir
     pub fn reservoir_size(&self) -> ByteCount {
         self.inner.vmm_reservoir_manager.reservoir_size()
@@ -482,16 +516,20 @@ enum InstanceManagerRequest {
     RefreshExternalIps {
         tx: oneshot::Sender<Result<(), Error>>,
     },
-    JoinMulticastGroup {
-        propolis_id: PropolisUuid,
-        multicast_body: InstanceMulticastBody,
+    JoinMulticastGroupByInstance {
+        instance_id: InstanceUuid,
+        membership: InstanceMulticastMembership,
         tx: oneshot::Sender<Result<(), Error>>,
     },
-    LeaveMulticastGroup {
-        propolis_id: PropolisUuid,
-        multicast_body: InstanceMulticastBody,
+    LeaveMulticastGroupByInstance {
+        instance_id: InstanceUuid,
+        membership: InstanceMulticastMembership,
         tx: oneshot::Sender<Result<(), Error>>,
     },
+    LookupInstanceForPropolis {
+        propolis_id: PropolisUuid,
+        tx: oneshot::Sender<Result<Option<InstanceUuid>, Error>>,
+    },
     GetState {
         propolis_id: PropolisUuid,
         tx: oneshot::Sender<Result<SledVmmState, Error>>,
@@ -630,11 +668,14 @@ impl InstanceManagerRunner {
                         Some(RefreshExternalIps { tx }) => {
                             self.refresh_external_ips(tx)
                         },
-                        Some(JoinMulticastGroup { propolis_id, multicast_body, tx }) => {
-                            self.join_multicast_group(tx, propolis_id, &multicast_body)
-                        },
-                        Some(LeaveMulticastGroup { propolis_id, multicast_body, tx }) => {
-                            self.leave_multicast_group(tx, propolis_id, &multicast_body)
+                        Some(JoinMulticastGroupByInstance { instance_id, membership, tx }) => {
+                            self.join_multicast_group_by_instance(tx, instance_id, &membership)
+                        }
+                        Some(LeaveMulticastGroupByInstance { instance_id, membership, tx }) => {
+                            self.leave_multicast_group_by_instance(tx, instance_id, &membership)
+                        }
+                        Some(LookupInstanceForPropolis { propolis_id, tx }) => {
+                            self.lookup_instance_for_propolis(tx, propolis_id)
                         }
                         Some(GetState { propolis_id, tx }) => {
                             // TODO(eliza): it could potentially be nice to
@@ -903,48 +944,66 @@ impl InstanceManagerRunner {
         Ok(())
     }
 
-    fn join_multicast_group(
+    /// Resolve the active VMM for `instance_id` and forward a join to its
+    /// instance task. The lookup runs inside this dispatcher loop, so it is
+    /// serialized with `EnsureRegistered`, `EnsureUnregistered`, and other
+    /// state changes for the same instance. If no active VMM is registered
+    /// the call is a noop, then there is no OPTE port to update.
+    fn join_multicast_group_by_instance(
         &self,
         tx: oneshot::Sender<Result<(), Error>>,
-        propolis_id: PropolisUuid,
-        multicast_body: &InstanceMulticastBody,
+        instance_id: InstanceUuid,
+        membership: &InstanceMulticastMembership,
     ) -> Result<(), Error> {
-        let Some(instance) = self.get_propolis(propolis_id) else {
-            return Err(Error::NoSuchVmm(propolis_id));
+        let Some(instance) = self.find_instance(instance_id) else {
+            return tx.send(Ok(())).map_err(|_| Error::FailedSendClientClosed);
         };
-
-        match multicast_body {
-            InstanceMulticastBody::Join(membership) => {
-                instance.join_multicast_group(tx, membership)?;
-            }
-            InstanceMulticastBody::Leave(membership) => {
-                instance.leave_multicast_group(tx, membership)?;
-            }
-        }
+        instance.join_multicast_group(tx, membership)?;
         Ok(())
     }
 
-    fn leave_multicast_group(
+    /// Resolve the active VMM for `instance_id` and forward a leave to its
+    /// instance task. See [`Self::join_multicast_group_by_instance`] for the
+    /// serialization and no-active-VMM behavior.
+    fn leave_multicast_group_by_instance(
         &self,
         tx: oneshot::Sender<Result<(), Error>>,
-        propolis_id: PropolisUuid,
-        multicast_body: &InstanceMulticastBody,
+        instance_id: InstanceUuid,
+        membership: &InstanceMulticastMembership,
     ) -> Result<(), Error> {
-        let Some(instance) = self.get_propolis(propolis_id) else {
-            return Err(Error::NoSuchVmm(propolis_id));
+        let Some(instance) = self.find_instance(instance_id) else {
+            return tx.send(Ok(())).map_err(|_| Error::FailedSendClientClosed);
         };
-
-        match multicast_body {
-            InstanceMulticastBody::Join(membership) => {
-                instance.join_multicast_group(tx, membership)?;
-            }
-            InstanceMulticastBody::Leave(membership) => {
-                instance.leave_multicast_group(tx, membership)?;
-            }
-        }
+        instance.leave_multicast_group(tx, membership)?;
         Ok(())
     }
 
+    /// Locate the active VMM whose instance ID matches `instance_id`.
+    ///
+    /// The instance manager indexes by Propolis ID, so this is a linear
+    /// scan over the active jobs. The dispatcher serializes calls, so the
+    /// scan runs without any external lock contention.
+    fn find_instance(&self, instance_id: InstanceUuid) -> Option<&Instance> {
+        self.jobs.values().find(|i| i.id() == instance_id)
+    }
+
+    /// Look up the instance ID for a registered Propolis ID.
+    ///
+    /// Runs inside the dispatcher loop, so the lookup is serialized with
+    /// other per-instance state changes.
+    ///
+    /// # Returns
+    ///
+    /// `Ok(None)` if no instance is registered with that Propolis ID.
+    fn lookup_instance_for_propolis(
+        &self,
+        tx: oneshot::Sender<Result<Option<InstanceUuid>, Error>>,
+        propolis_id: PropolisUuid,
+    ) -> Result<(), Error> {
+        let result = self.get_propolis(propolis_id).map(|i| i.id());
+        tx.send(Ok(result)).map_err(|_| Error::FailedSendClientClosed)
+    }
+
     fn get_instance_state(
         &self,
         tx: oneshot::Sender<Result<SledVmmState, Error>>,
diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs
index 2f2d5421204..38c0d0eba06 100644
--- a/sled-agent/src/probe_manager.rs
+++ b/sled-agent/src/probe_manager.rs
@@ -382,6 +382,7 @@ impl ProbeManagerInner {
             // but probes are supposed to mimic instances as closely as
             // possible. We should consider if we want to support them here.
             attached_subnets: vec![],
+            multicast_groups: &[],
         })?;
 
         let installed_zone = ZoneBuilderFactory::new()
diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs
index 8cc6b6dd63a..aaf21f37d9b 100644
--- a/sled-agent/src/rack_setup/plan/service.rs
+++ b/sled-agent/src/rack_setup/plan/service.rs
@@ -29,10 +29,10 @@ use nexus_types::deployment::{
 };
 use nexus_types::external_api::sled::SledState;
 use omicron_common::address::{
-    CP_SERVICES_RESERVED_ADDRESSES, DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT,
-    Ipv6Subnet, MGD_PORT, MGS_PORT, NEXUS_INTERNAL_PORT, NEXUS_LOCKSTEP_PORT,
-    NTP_PORT, NUM_SOURCE_NAT_PORTS, REPO_DEPOT_PORT, ReservedRackSubnet,
-    SLED_PREFIX, SLED_RESERVED_ADDRESSES, get_sled_address,
+    CP_SERVICES_RESERVED_ADDRESSES, DDMD_PORT, DENDRITE_PORT, DNS_HTTP_PORT,
+    DNS_PORT, Ipv6Subnet, MGD_PORT, MGS_PORT, NEXUS_INTERNAL_PORT,
+    NEXUS_LOCKSTEP_PORT, NTP_PORT, NUM_SOURCE_NAT_PORTS, REPO_DEPOT_PORT,
+    ReservedRackSubnet, SLED_PREFIX, SLED_RESERVED_ADDRESSES, get_sled_address,
     get_switch_zone_address,
 };
 use omicron_common::api::external::{Generation, MacAddr, Vni};
@@ -341,6 +341,7 @@ impl Plan {
                     DENDRITE_PORT,
                     MGS_PORT,
                     MGD_PORT,
+                    DDMD_PORT,
                 )
                 .unwrap();
         }
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index 82d2fc23a4e..c2993101644 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -1179,6 +1179,7 @@ impl ServiceManager {
                 dhcp_config: DhcpCfg::default(),
                 // Services do not use attached subnets, only instances.
                 attached_subnets: vec![],
+                multicast_groups: &[],
             })
             .map_err(|err| Error::ServicePortCreation {
                 service: zone_kind,
diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs
index 84a31190cb8..935566bb242 100644
--- a/sled-agent/src/sim/collection.rs
+++ b/sled-agent/src/sim/collection.rs
@@ -236,18 +236,15 @@ impl<S: Simulatable + 'static> SimCollection<S> {
 
         while should_step {
             let (new_state, to_destroy) = {
-                // The object must be present in `objects` because it only gets
-                // removed when it comes to rest in the "Destroyed" state, but
-                // we can only get here if there's an asynchronous state
-                // transition desired.
-                //
                 // We do as little as possible with the lock held.  In
                 // particular, we want to finish this work before calling out to
                 // notify the nexus.
                 let mut objects = self.objects.lock().await;
+
+                // The object may already have been destroyed and removed by a
+                // concurrent poke (e.g., sim_step racing with an explicit poke
+                // from a test). In that case there is nothing left to do.
                 let Some(mut object) = objects.remove(&id) else {
-                    // Instance was already removed (e.g., destroyed by a
-                    // concurrent transition). Nothing left to do.
                     break;
                 };
                 object.transition_finish();
diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs
index 8e65a9a4574..7e6c47eaa31 100644
--- a/sled-agent/src/sim/http_entrypoints.rs
+++ b/sled-agent/src/sim/http_entrypoints.rs
@@ -56,12 +56,17 @@ use sled_agent_types::early_networking::EarlyNetworkConfigEnvelope;
 use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody;
 use sled_agent_types::instance::SledVmmState;
 use sled_agent_types::instance::{
-    InstanceEnsureBody, InstanceExternalIpBody, InstanceMulticastBody,
-    VmmIssueDiskSnapshotRequestBody, VmmIssueDiskSnapshotRequestPathParam,
-    VmmIssueDiskSnapshotRequestResponse, VmmPathParam, VmmPutStateBody,
-    VmmPutStateResponse, VmmUnregisterResponse, VpcPathParam,
+    InstanceEnsureBody, InstanceExternalIpBody, InstanceMulticastMembership,
+    InstancePathParam, VmmIssueDiskSnapshotRequestBody,
+    VmmIssueDiskSnapshotRequestPathParam, VmmIssueDiskSnapshotRequestResponse,
+    VmmPathParam, VmmPutStateBody, VmmPutStateResponse, VmmUnregisterResponse,
+    VpcPathParam,
 };
 use sled_agent_types::inventory::{Inventory, OmicronSledConfig};
+use sled_agent_types::multicast::ClearMcast2Phys;
+use sled_agent_types::multicast::ClearMcastForwarding;
+use sled_agent_types::multicast::Mcast2PhysMapping;
+use sled_agent_types::multicast::McastForwardingEntry;
 use sled_agent_types::probes::ProbeSet;
 use sled_agent_types::rot::{
     Attestation, CertificateChain, MeasurementLog, Nonce, RotPathParams,
@@ -84,6 +89,7 @@ use sled_agent_types::zone_bundle::{
 use sled_hardware_types::BaseboardId;
 // Fixed identifiers for prior versions only
 use sled_agent_types_versions::v1;
+use sled_agent_types_versions::v7;
 use sled_agent_types_versions::v20;
 use sled_agent_types_versions::v25;
 use sled_agent_types_versions::v26;
@@ -188,56 +194,46 @@ impl SledAgentApi for SledAgentSimImpl {
         Ok(HttpResponseUpdatedNoContent())
     }
 
-    async fn vmm_join_multicast_group(
+    async fn instance_join_multicast_group(
         rqctx: RequestContext<Self::Context>,
-        path_params: Path<VmmPathParam>,
-        body: TypedBody<InstanceMulticastBody>,
+        path_params: Path<InstancePathParam>,
+        body: TypedBody<InstanceMulticastMembership>,
     ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
         let sa = rqctx.context();
-        let propolis_id = path_params.into_inner().propolis_id;
-        let body_args = body.into_inner();
-
-        match body_args {
-            InstanceMulticastBody::Join(membership) => {
-                sa.instance_join_multicast_group(propolis_id, &membership)
-                    .await?;
-            }
-            InstanceMulticastBody::Leave(_) => {
-                // This endpoint is for joining - reject leave operations
-                return Err(HttpError::for_bad_request(
-                    None,
-                    "Join endpoint cannot process Leave operations".to_string(),
-                ));
-            }
-        }
-
+        let instance_id = path_params.into_inner().instance_id;
+        let membership = body.into_inner();
+        sa.instance_join_multicast_group(instance_id, &membership).await?;
         Ok(HttpResponseUpdatedNoContent())
     }
 
-    async fn vmm_leave_multicast_group(
+    async fn instance_leave_multicast_group(
         rqctx: RequestContext<Self::Context>,
-        path_params: Path<VmmPathParam>,
-        body: TypedBody<InstanceMulticastBody>,
+        path_params: Path<InstancePathParam>,
+        body: TypedBody<InstanceMulticastMembership>,
     ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
         let sa = rqctx.context();
-        let propolis_id = path_params.into_inner().propolis_id;
-        let body_args = body.into_inner();
+        let instance_id = path_params.into_inner().instance_id;
+        let membership = body.into_inner();
+        sa.instance_leave_multicast_group(instance_id, &membership).await?;
+        Ok(HttpResponseUpdatedNoContent())
+    }
 
-        match body_args {
-            InstanceMulticastBody::Leave(membership) => {
-                sa.instance_leave_multicast_group(propolis_id, &membership)
-                    .await?;
-            }
-            InstanceMulticastBody::Join(_) => {
-                // This endpoint is for leaving - reject join operations
-                return Err(HttpError::for_bad_request(
-                    None,
-                    "Leave endpoint cannot process Join operations".to_string(),
-                ));
-            }
-        }
+    // v7 shims exist on the trait for spec compatibility. The sim has no
+    // caller for them.
+    async fn vmm_join_multicast_group_v7(
+        _rqctx: RequestContext<Self::Context>,
+        _path_params: Path<v1::instance::VmmPathParam>,
+        _body: TypedBody<v7::instance::InstanceMulticastBody>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        unimplemented!()
+    }
 
-        Ok(HttpResponseUpdatedNoContent())
+    async fn vmm_leave_multicast_group_v7(
+        _rqctx: RequestContext<Self::Context>,
+        _path_params: Path<v1::instance::VmmPathParam>,
+        _body: TypedBody<v7::instance::InstanceMulticastBody>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        unimplemented!()
     }
 
     async fn disk_put(
@@ -390,6 +386,66 @@ impl SledAgentApi for SledAgentSimImpl {
         Ok(HttpResponseOk(vnics))
     }
 
+    async fn set_mcast_m2p(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<Mcast2PhysMapping>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        let sa = rqctx.context();
+        sa.set_mcast_m2p(&body.into_inner())
+            .map_err(|e| HttpError::for_internal_error(e.to_string()))?;
+        Ok(HttpResponseUpdatedNoContent())
+    }
+
+    async fn clear_mcast_m2p(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<ClearMcast2Phys>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        let sa = rqctx.context();
+        sa.clear_mcast_m2p(&body.into_inner())
+            .map_err(|e| HttpError::for_internal_error(e.to_string()))?;
+        Ok(HttpResponseUpdatedNoContent())
+    }
+
+    async fn set_mcast_fwd(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<McastForwardingEntry>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        let sa = rqctx.context();
+        sa.set_mcast_fwd(&body.into_inner())
+            .map_err(|e| HttpError::for_internal_error(e.to_string()))?;
+        Ok(HttpResponseUpdatedNoContent())
+    }
+
+    async fn clear_mcast_fwd(
+        rqctx: RequestContext<Self::Context>,
+        body: TypedBody<ClearMcastForwarding>,
+    ) -> Result<HttpResponseUpdatedNoContent, HttpError> {
+        let sa = rqctx.context();
+        sa.clear_mcast_fwd(&body.into_inner())
+            .map_err(|e| HttpError::for_internal_error(e.to_string()))?;
+        Ok(HttpResponseUpdatedNoContent())
+    }
+
+    async fn list_mcast_m2p(
+        rqctx: RequestContext<Self::Context>,
+    ) -> Result<HttpResponseOk<Vec<Mcast2PhysMapping>>, HttpError> {
+        let sa = rqctx.context();
+        let m2p = sa
+            .list_mcast_m2p()
+            .map_err(|e| HttpError::for_internal_error(e.to_string()))?;
+        Ok(HttpResponseOk(m2p))
+    }
+
+    async fn list_mcast_fwd(
+        rqctx: RequestContext<Self::Context>,
+    ) -> Result<HttpResponseOk<Vec<McastForwardingEntry>>, HttpError> {
+        let sa = rqctx.context();
+        let fwd = sa
+            .list_mcast_fwd()
+            .map_err(|e| HttpError::for_internal_error(e.to_string()))?;
+        Ok(HttpResponseOk(fwd))
+    }
+
     async fn uplink_ensure(
         _rqctx: RequestContext<Self::Context>,
         _body: TypedBody<SwitchPorts>,
diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs
index 384a1ed6ea7..5f8f1446b1c 100644
--- a/sled-agent/src/sim/sled_agent.rs
+++ b/sled-agent/src/sim/sled_agent.rs
@@ -38,8 +38,8 @@ use omicron_common::disk::{
     DisksManagementResult, OmicronPhysicalDisksConfig,
 };
 use omicron_uuid_kinds::{
-    DatasetUuid, GenericUuid, PhysicalDiskUuid, PropolisUuid, SledUuid,
-    SupportBundleUuid, ZpoolUuid,
+    DatasetUuid, GenericUuid, InstanceUuid, PhysicalDiskUuid, PropolisUuid,
+    SledUuid, SupportBundleUuid, ZpoolUuid,
 };
 use oxnet::{IpNet, Ipv6Net};
 use propolis_client::instance_spec::FileStorageBackend;
@@ -66,6 +66,10 @@ use sled_agent_types::inventory::{
     OmicronFileSourceResolverInventory, OmicronSledConfig, OmicronZonesConfig,
     SingleMeasurementInventory, SledRole, ZpoolHealth,
 };
+use sled_agent_types::multicast::{
+    ClearMcast2Phys, ClearMcastForwarding, Mcast2PhysMapping,
+    McastForwardingEntry, McastForwardingNextHop,
+};
 use sled_agent_types::support_bundle::SupportBundleMetadata;
 use sled_agent_types::system_networking::SystemNetworkingConfig;
 
@@ -96,6 +100,8 @@ pub struct SledAgent {
     pub nexus_client: Arc<NexusClient>,
     pub simulated_upstairs: Arc<SimulatedUpstairs>,
     pub v2p_mappings: Mutex<HashSet<VirtualNetworkInterfaceHost>>,
+    pub m2p_mappings: Mutex<HashSet<(IpAddr, Ipv6Addr)>>,
+    pub mcast_fwd: Mutex<HashMap<Ipv6Addr, Vec<McastForwardingNextHop>>>,
     mock_propolis: futures::lock::Mutex<
         Option<(propolis_mock_server::Server, PropolisClient)>,
     >,
@@ -105,9 +111,9 @@ pub struct SledAgent {
     /// subnets attached to instances.
     pub attached_subnets:
         Mutex<HashMap<PropolisUuid, IdOrdMap<AttachedSubnet>>>,
-    /// multicast group memberships for instances
-    pub multicast_groups:
-        Mutex<HashMap<PropolisUuid, HashSet<InstanceMulticastMembership>>>,
+    /// multicast group memberships, keyed by instance.
+    pub instance_multicast_groups:
+        Mutex<HashMap<InstanceUuid, HashSet<InstanceMulticastMembership>>>,
     pub vpc_routes: Mutex<HashMap<RouterId, RouteSet>>,
     config: Config,
     fake_zones: Mutex<OmicronZonesConfig>,
@@ -188,9 +194,11 @@ impl SledAgent {
             nexus_client,
             simulated_upstairs,
             v2p_mappings: Mutex::new(HashSet::new()),
+            m2p_mappings: Mutex::new(HashSet::new()),
+            mcast_fwd: Mutex::new(HashMap::new()),
             external_ips: Mutex::new(HashMap::new()),
             attached_subnets: Mutex::new(HashMap::new()),
-            multicast_groups: Mutex::new(HashMap::new()),
+            instance_multicast_groups: Mutex::new(HashMap::new()),
             vpc_routes: Mutex::new(HashMap::new()),
             mock_propolis: futures::lock::Mutex::new(None),
             config: config.clone(),
@@ -676,6 +684,58 @@ impl SledAgent {
         Ok(Vec::from_iter(v2p_mappings.clone()))
     }
 
+    pub fn set_mcast_m2p(&self, req: &Mcast2PhysMapping) -> Result<(), Error> {
+        let mut m2p = self.m2p_mappings.lock().unwrap();
+        m2p.insert((req.group, req.underlay));
+        Ok(())
+    }
+
+    pub fn clear_mcast_m2p(&self, req: &ClearMcast2Phys) -> Result<(), Error> {
+        let mut m2p = self.m2p_mappings.lock().unwrap();
+        m2p.remove(&(req.group, req.underlay));
+        Ok(())
+    }
+
+    pub fn set_mcast_fwd(
+        &self,
+        req: &McastForwardingEntry,
+    ) -> Result<(), Error> {
+        let mut fwd = self.mcast_fwd.lock().unwrap();
+        fwd.insert(req.underlay, req.next_hops.clone());
+        Ok(())
+    }
+
+    pub fn clear_mcast_fwd(
+        &self,
+        req: &ClearMcastForwarding,
+    ) -> Result<(), Error> {
+        let mut fwd = self.mcast_fwd.lock().unwrap();
+        fwd.remove(&req.underlay);
+        Ok(())
+    }
+
+    pub fn list_mcast_m2p(&self) -> Result<Vec<Mcast2PhysMapping>, Error> {
+        let m2p = self.m2p_mappings.lock().unwrap();
+        Ok(m2p
+            .iter()
+            .map(|(group, underlay)| Mcast2PhysMapping {
+                group: *group,
+                underlay: *underlay,
+            })
+            .collect())
+    }
+
+    pub fn list_mcast_fwd(&self) -> Result<Vec<McastForwardingEntry>, Error> {
+        let fwd = self.mcast_fwd.lock().unwrap();
+        Ok(fwd
+            .iter()
+            .map(|(underlay, next_hops)| McastForwardingEntry {
+                underlay: *underlay,
+                next_hops: next_hops.clone(),
+            })
+            .collect())
+    }
+
     pub async fn instance_put_external_ip(
         &self,
         propolis_id: PropolisUuid,
@@ -801,39 +861,21 @@ impl SledAgent {
 
     pub async fn instance_join_multicast_group(
         &self,
-        propolis_id: PropolisUuid,
+        instance_id: InstanceUuid,
         membership: &InstanceMulticastMembership,
     ) -> Result<(), Error> {
-        if !self.vmms.contains_key(&propolis_id.into_untyped_uuid()).await {
-            return Err(Error::internal_error(
-                "can't join multicast group for VMM that's not registered",
-            ));
-        }
-
-        let mut groups = self.multicast_groups.lock().unwrap();
-        let my_groups = groups.entry(propolis_id).or_default();
-
-        my_groups.insert(membership.clone());
-
+        let mut groups = self.instance_multicast_groups.lock().unwrap();
+        groups.entry(instance_id).or_default().insert(membership.clone());
         Ok(())
     }
 
     pub async fn instance_leave_multicast_group(
         &self,
-        propolis_id: PropolisUuid,
+        instance_id: InstanceUuid,
         membership: &InstanceMulticastMembership,
     ) -> Result<(), Error> {
-        if !self.vmms.contains_key(&propolis_id.into_untyped_uuid()).await {
-            return Err(Error::internal_error(
-                "can't leave multicast group for VMM that's not registered",
-            ));
-        }
-
-        let mut groups = self.multicast_groups.lock().unwrap();
-        let my_groups = groups.entry(propolis_id).or_default();
-
-        my_groups.remove(membership);
-
+        let mut groups = self.instance_multicast_groups.lock().unwrap();
+        groups.entry(instance_id).or_default().remove(membership);
         Ok(())
     }
 
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index e0be3e87daa..17ab7857c1b 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -56,7 +56,7 @@ use omicron_common::api::internal::shared::{
 use omicron_common::zpool_name::ZpoolName;
 use omicron_ddm_admin_client::Client as DdmAdminClient;
 use omicron_uuid_kinds::{
-    GenericUuid, MupdateOverrideUuid, PropolisUuid, SledUuid,
+    GenericUuid, InstanceUuid, MupdateOverrideUuid, PropolisUuid, SledUuid,
 };
 use oximeter_instruments::http::LatencyTracker;
 use oxnet::IpNet;
@@ -75,11 +75,15 @@ use sled_agent_types::disk::DiskStateRequested;
 use sled_agent_types::early_networking::EarlyNetworkConfigEnvelope;
 use sled_agent_types::instance::ResolvedVpcFirewallRule;
 use sled_agent_types::instance::{
-    InstanceEnsureBody, InstanceExternalIpBody, InstanceMulticastBody,
+    InstanceEnsureBody, InstanceExternalIpBody, InstanceMulticastMembership,
     SledVmmState, VmmPutStateResponse, VmmStateRequested,
     VmmUnregisterResponse,
 };
 use sled_agent_types::inventory::{Inventory, OmicronSledConfig, SledRole};
+use sled_agent_types::multicast::{
+    ClearMcast2Phys, ClearMcastForwarding, Mcast2PhysMapping,
+    McastForwardingEntry,
+};
 use sled_agent_types::probes::ProbeCreate;
 use sled_agent_types::resolvable_files::{
     PreparedOmicronZone, RemoveMupdateOverrideResult, ResolverStatus,
@@ -411,7 +415,6 @@ struct SledAgentInner {
 
     // A handle to the trust quorum.
     trust_quorum: trust_quorum::NodeTaskHandle,
-
     // A handle to the hardware monitor.
     hardware_monitor: HardwareMonitorHandle,
 
@@ -580,6 +583,7 @@ impl SledAgent {
         let port_manager = PortManager::new(
             parent_log.new(o!("component" => "PortManager")),
             *sled_address.ip(),
+            &underlay_nics,
         );
 
         // The VMM reservoir is configured with respect to what's left after
@@ -1034,30 +1038,56 @@ impl SledAgent {
             .map_err(|e| Error::Instance(e))
     }
 
+    /// Subscribe an instance's active VMM OPTE port to a multicast group.
+    ///
+    /// The active Propolis ID is resolved inside the instance manager so
+    /// that resolution and dispatch are serialized with other per-instance
+    /// state changes. A no-active-VMM result is treated as a successful
+    /// no-op since the OPTE port no longer exists.
     pub async fn instance_join_multicast_group(
         &self,
-        propolis_id: PropolisUuid,
-        multicast_body: &InstanceMulticastBody,
+        instance_id: InstanceUuid,
+        membership: &InstanceMulticastMembership,
     ) -> Result<(), Error> {
         self.inner
             .instances
-            .join_multicast_group(propolis_id, multicast_body)
+            .join_multicast_group_by_instance(instance_id, membership)
             .await
             .map_err(|e| Error::Instance(e))
     }
 
+    /// Unsubscribe an instance's active VMM OPTE port from a multicast group.
+    ///
+    /// See [`Self::instance_join_multicast_group`] for the resolution and
+    /// no-active-VMM semantics.
     pub async fn instance_leave_multicast_group(
         &self,
-        propolis_id: PropolisUuid,
-        multicast_body: &InstanceMulticastBody,
+        instance_id: InstanceUuid,
+        membership: &InstanceMulticastMembership,
     ) -> Result<(), Error> {
         self.inner
             .instances
-            .leave_multicast_group(propolis_id, multicast_body)
+            .leave_multicast_group_by_instance(instance_id, membership)
             .await
             .map_err(|e| Error::Instance(e))
     }
 
+    /// Resolve a Propolis ID to its registered instance ID.
+    ///
+    /// # Returns
+    ///
+    /// `Ok(None)` if no instance is registered with that Propolis ID.
+    pub async fn instance_id_for_propolis(
+        &self,
+        propolis_id: PropolisUuid,
+    ) -> Result<Option<InstanceUuid>, Error> {
+        self.inner
+            .instances
+            .instance_id_for_propolis(propolis_id)
+            .await
+            .map_err(Error::Instance)
+    }
+
     /// Returns the state of the instance with the provided ID.
     pub async fn instance_get_state(
         &self,
@@ -1138,6 +1168,52 @@ impl SledAgent {
             .map_err(Error::from)
     }
 
+    /// Install a multicast overlay-to-underlay (M2P) mapping in OPTE.
+    pub async fn set_mcast_m2p(
+        &self,
+        req: &Mcast2PhysMapping,
+    ) -> Result<(), Error> {
+        self.inner.port_manager.set_mcast_m2p(req).map_err(Error::from)
+    }
+
+    /// Remove a multicast overlay-to-underlay (M2P) mapping from OPTE.
+    pub async fn clear_mcast_m2p(
+        &self,
+        req: &ClearMcast2Phys,
+    ) -> Result<(), Error> {
+        self.inner.port_manager.clear_mcast_m2p(req).map_err(Error::from)
+    }
+
+    /// Set multicast forwarding next hops for an underlay group address.
+    pub async fn set_mcast_fwd(
+        &self,
+        req: &McastForwardingEntry,
+    ) -> Result<(), Error> {
+        self.inner.port_manager.set_mcast_fwd(req).map_err(Error::from)
+    }
+
+    /// Remove multicast forwarding entries for an underlay group address.
+    pub async fn clear_mcast_fwd(
+        &self,
+        req: &ClearMcastForwarding,
+    ) -> Result<(), Error> {
+        self.inner.port_manager.clear_mcast_fwd(req).map_err(Error::from)
+    }
+
+    /// List all multicast M2P mappings from OPTE.
+    pub async fn list_mcast_m2p(
+        &self,
+    ) -> Result<Vec<Mcast2PhysMapping>, Error> {
+        self.inner.port_manager.list_mcast_m2p().map_err(Error::from)
+    }
+
+    /// List all multicast forwarding entries from OPTE.
+    pub async fn list_mcast_fwd(
+        &self,
+    ) -> Result<Vec<McastForwardingEntry>, Error> {
+        self.inner.port_manager.list_mcast_fwd().map_err(Error::from)
+    }
+
     pub async fn ensure_scrimlet_host_ports(
         &self,
         uplinks: Vec<HostPortConfig>,
diff --git a/sled-agent/types/src/lib.rs b/sled-agent/types/src/lib.rs
index 2d87bbc1761..a1885b4aa66 100644
--- a/sled-agent/types/src/lib.rs
+++ b/sled-agent/types/src/lib.rs
@@ -16,6 +16,7 @@ pub mod early_networking;
 pub mod firewall_rules;
 pub mod instance;
 pub mod inventory;
+pub mod multicast;
 pub mod probes;
 pub mod rack_init;
 pub mod resolvable_files;
diff --git a/sled-agent/types/src/multicast.rs b/sled-agent/types/src/multicast.rs
new file mode 100644
index 00000000000..27e95a0d94c
--- /dev/null
+++ b/sled-agent/types/src/multicast.rs
@@ -0,0 +1,7 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Multicast networking types for the Sled Agent API.
+
+pub use sled_agent_types_versions::latest::multicast::*;
diff --git a/sled-agent/types/versions/src/latest.rs b/sled-agent/types/versions/src/latest.rs
index a89d8fedf2b..d988af59722 100644
--- a/sled-agent/types/versions/src/latest.rs
+++ b/sled-agent/types/versions/src/latest.rs
@@ -89,6 +89,18 @@ pub mod firewall_rules {
     pub use crate::v31::firewall_rules::VpcFirewallRulesEnsureBody;
 }
 
+pub mod multicast {
+    pub use crate::v36::multicast::ClearMcast2Phys;
+    pub use crate::v36::multicast::ClearMcastForwarding;
+    pub use crate::v36::multicast::Mcast2PhysMapping;
+    pub use crate::v36::multicast::McastFilterMode;
+    pub use crate::v36::multicast::McastForwardingEntry;
+    pub use crate::v36::multicast::McastForwardingNextHop;
+    pub use crate::v36::multicast::McastReplication;
+    pub use crate::v36::multicast::McastSourceFilter;
+    pub use crate::v36::multicast::MulticastGroupCfg;
+}
+
 pub mod instance {
     pub use crate::v1::instance::InstanceExternalIpBody;
     pub use crate::v1::instance::InstanceMetadata;
@@ -121,6 +133,8 @@ pub mod instance {
     pub use crate::v32::instance::ExternalIpv6Config;
     pub use crate::v32::instance::InstanceEnsureBody;
     pub use crate::v32::instance::InstanceSledLocalConfig;
+
+    pub use crate::v36::instance::InstancePathParam;
 }
 
 pub mod inventory {
diff --git a/sled-agent/types/versions/src/lib.rs b/sled-agent/types/versions/src/lib.rs
index 69a6e70fcd9..5a2e2fe7cb3 100644
--- a/sled-agent/types/versions/src/lib.rs
+++ b/sled-agent/types/versions/src/lib.rs
@@ -79,6 +79,8 @@ pub mod v32;
 pub mod v33;
 #[path = "modify_svcs_types/mod.rs"]
 pub mod v34;
+#[path = "mcast_m2p_forwarding/mod.rs"]
+pub mod v36;
 #[path = "add_nexus_lockstep_port_to_inventory/mod.rs"]
 pub mod v4;
 #[path = "add_probe_put_endpoint/mod.rs"]
diff --git a/sled-agent/types/versions/src/mcast_m2p_forwarding/instance.rs b/sled-agent/types/versions/src/mcast_m2p_forwarding/instance.rs
new file mode 100644
index 00000000000..15d0b48417b
--- /dev/null
+++ b/sled-agent/types/versions/src/mcast_m2p_forwarding/instance.rs
@@ -0,0 +1,13 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use omicron_uuid_kinds::InstanceUuid;
+use schemars::JsonSchema;
+use serde::Deserialize;
+
+/// Path parameters for an instance-scoped request.
+#[derive(Deserialize, JsonSchema)]
+pub struct InstancePathParam {
+    pub instance_id: InstanceUuid,
+}
diff --git a/sled-agent/types/versions/src/mcast_m2p_forwarding/mod.rs b/sled-agent/types/versions/src/mcast_m2p_forwarding/mod.rs
new file mode 100644
index 00000000000..6914c2d856b
--- /dev/null
+++ b/sled-agent/types/versions/src/mcast_m2p_forwarding/mod.rs
@@ -0,0 +1,11 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Version `MCAST_M2P_FORWARDING` of the Sled Agent API.
+//!
+//! Adds multicast-to-physical mapping and forwarding types, and moves the
+//! multicast subscription endpoints from VMM-keyed to instance-keyed.
+
+pub mod instance;
+pub mod multicast;
diff --git a/sled-agent/types/versions/src/mcast_m2p_forwarding/multicast.rs b/sled-agent/types/versions/src/mcast_m2p_forwarding/multicast.rs
new file mode 100644
index 00000000000..5c2247c1159
--- /dev/null
+++ b/sled-agent/types/versions/src/mcast_m2p_forwarding/multicast.rs
@@ -0,0 +1,132 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+// Copyright 2026 Oxide Computer Company
+
+//! Multicast networking types for the sled-agent API.
+//!
+//! These types support overlay-to-underlay multicast mapping and
+//! multicast forwarding configuration via OPTE. The underlay address
+//! space is ff04::/64, a subset of admin-local scope per
+//! [RFC 7346](https://www.rfc-editor.org/rfc/rfc7346).
+
+use std::net::IpAddr;
+use std::net::Ipv6Addr;
+
+use schemars::JsonSchema;
+use serde::Deserialize;
+use serde::Serialize;
+
+/// Mapping from an overlay multicast group to an underlay multicast
+/// address.
+///
+/// The underlay address must be within the underlay multicast subnet
+/// (ff04::/64). This invariant is enforced by mapping in Nexus, not
+/// validated at this layer.
+#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
+pub struct Mcast2PhysMapping {
+    /// Overlay multicast group address.
+    pub group: IpAddr,
+    /// Underlay IPv6 multicast address (ff04::/64).
+    pub underlay: Ipv6Addr,
+}
+
+/// Clear a mapping from an overlay multicast group to an underlay
+/// multicast address.
+#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
+pub struct ClearMcast2Phys {
+    /// Overlay multicast group address.
+    pub group: IpAddr,
+    /// Underlay IPv6 multicast address. See [`Mcast2PhysMapping::underlay`].
+    pub underlay: Ipv6Addr,
+}
+
+/// Forwarding entry for an underlay multicast address, specifying
+/// which next hops should receive replicated packets.
+#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
+pub struct McastForwardingEntry {
+    /// Underlay IPv6 multicast address. See [`Mcast2PhysMapping::underlay`].
+    pub underlay: Ipv6Addr,
+    /// Next hops with replication and source filter configuration.
+    pub next_hops: Vec<McastForwardingNextHop>,
+}
+
+/// Clear all forwarding entries for an underlay multicast address.
+#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
+pub struct ClearMcastForwarding {
+    /// Underlay IPv6 multicast address. See [`Mcast2PhysMapping::underlay`].
+    pub underlay: Ipv6Addr,
+}
+
+/// A forwarding next hop with replication mode and aggregated
+/// source filter.
+#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)]
+pub struct McastForwardingNextHop {
+    /// Unicast IPv6 address of the destination sled.
+    pub next_hop: Ipv6Addr,
+    /// Replication mode for this next hop.
+    pub replication: McastReplication,
+    /// Aggregated source filter for this destination.
+    pub filter: McastSourceFilter,
+}
+
+/// Replication mode for multicast forwarding.
+#[derive(
+    Clone, Copy, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq,
+)]
+#[serde(rename_all = "snake_case")]
+pub enum McastReplication {
+    /// Replicate to front panel ports (egress to external networks).
+    External,
+    /// Replicate to sled underlay ports.
+    Underlay,
+    /// Replicate to both external and underlay ports.
+    Both,
+}
+
+/// Source filter for multicast forwarding.
+#[derive(
+    Clone, Debug, Default, Deserialize, Serialize, JsonSchema, PartialEq,
+)]
+pub struct McastSourceFilter {
+    /// Filter mode.
+    pub mode: McastFilterMode,
+    /// Source addresses to include or exclude.
+    pub sources: Vec<IpAddr>,
+}
+
+/// Filter mode for multicast source filtering.
+#[derive(
+    Clone,
+    Copy,
+    Debug,
+    Default,
+    Deserialize,
+    Serialize,
+    JsonSchema,
+    PartialEq,
+    Eq,
+)]
+#[serde(rename_all = "snake_case")]
+pub enum McastFilterMode {
+    /// Accept only packets from listed sources (SSM).
+    Include,
+    /// Accept packets from all sources except those listed.
+    /// With an empty sources list this is any-source multicast (ASM).
+    #[default]
+    Exclude,
+}
+
+/// Declarative multicast group subscription for an OPTE port.
+///
+/// Represents a single group membership with optional source filtering.
+/// Empty `sources` means any-source multicast (ASM) and non-empty means
+/// source-specific multicast (SSM).
+#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)]
+pub struct MulticastGroupCfg {
+    /// The multicast group IP address (IPv4 or IPv6).
+    pub group_ip: IpAddr,
+    /// Source addresses for source-filtered multicast.
+    pub sources: Vec<IpAddr>,
+}
diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml
index 7a43386e375..9f36f737dde 100644
--- a/smf/nexus/multi-sled/config-partial.toml
+++ b/smf/nexus/multi-sled/config-partial.toml
@@ -112,9 +112,6 @@ fm.rendezvous_period_secs = 300
 probe_distributor.period_secs = 60
 multicast_reconciler.period_secs = 60
 trust_quorum.period_secs = 60
-# TTL for sled-to-backplane-port mapping cache
-# Default: 3600 seconds (1 hour) - detects new sleds and inventory changes
-# multicast_reconciler.sled_cache_ttl_secs = 3600
 # TTL for backplane topology cache (static platform configuration)
 # Default: 86400 seconds (24 hours) - refreshed on-demand when validation fails
 # multicast_reconciler.backplane_cache_ttl_secs = 86400
diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml
index 11863e1c681..01f32525e24 100644
--- a/smf/nexus/single-sled/config-partial.toml
+++ b/smf/nexus/single-sled/config-partial.toml
@@ -112,9 +112,6 @@ fm.rendezvous_period_secs = 300
 probe_distributor.period_secs = 60
 trust_quorum.period_secs = 60
 multicast_reconciler.period_secs = 60
-# TTL for sled-to-backplane-port mapping cache
-# Default: 3600 seconds (1 hour) - detects new sleds and inventory changes
-# multicast_reconciler.sled_cache_ttl_secs = 3600
 # TTL for backplane topology cache (static platform configuration)
 # Default: 86400 seconds (24 hours) - refreshed on-demand when validation fails
 # multicast_reconciler.backplane_cache_ttl_secs = 86400
diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml
index ad4b8d303c7..a53a72af2b2 100644
--- a/test-utils/Cargo.toml
+++ b/test-utils/Cargo.toml
@@ -26,6 +26,8 @@ pem.workspace = true
 regex.workspace = true
 ring.workspace = true
 rustls.workspace = true
+schemars.workspace = true
+serde.workspace = true
 slog.workspace = true
 subprocess.workspace = true
 tempfile.workspace = true
diff --git a/test-utils/src/dev/maghemite.rs b/test-utils/src/dev/maghemite.rs
index 4c2d85df3ee..e3880c3a622 100644
--- a/test-utils/src/dev/maghemite.rs
+++ b/test-utils/src/dev/maghemite.rs
@@ -163,6 +163,167 @@ async fn find_mgd_port_in_log(logfile: String) -> Result<u16, anyhow::Error> {
     }
 }
 
+/// Simulated DDM admin API instance for integration tests.
+///
+/// `ddmd` only runs on real switch zones (requires `libnet`,
+/// `opte-ioctl`, and physical network interfaces), so it is not
+/// available in the test toolchain like `mgd` is. This provides a
+/// dropshot server implementing the DDM admin API endpoints that
+/// omicron consumes, backed by in-memory state.
+///
+/// Peers can be provided at construction time and updated at runtime
+/// via `set_peers`.
+pub struct DdmInstance {
+    pub port: u16,
+    server: Option<dropshot::HttpServer<DdmSimContext>>,
+}
+
+/// Server-side peer info type for the DDM sim.
+///
+/// Mirrors the wire format of the progenitor-generated
+/// `ddm_admin_client::types::PeerInfo` but derives `JsonSchema` so
+/// dropshot can serve it directly.
+#[derive(
+    Clone, Debug, serde::Serialize, serde::Deserialize, schemars::JsonSchema,
+)]
+pub struct SimPeerInfo {
+    pub addr: std::net::Ipv6Addr,
+    pub host: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub if_name: Option<String>,
+    pub kind: i64,
+    pub status: SimPeerStatus,
+}
+
+#[derive(
+    Clone,
+    Debug,
+    serde::Serialize,
+    serde::Deserialize,
+    schemars::JsonSchema,
+    PartialEq,
+)]
+pub enum SimPeerStatus {
+    NoContact,
+    Active,
+    Expired,
+}
+
+pub type PeerMap = std::collections::HashMap<String, SimPeerInfo>;
+
+pub struct DdmSimContext {
+    peers: std::sync::Mutex<PeerMap>,
+}
+
+/// Configuration for starting a `DdmInstance`.
+pub struct DdmInstanceConfig {
+    /// Initial peers to serve from `GET /peers`.
+    pub initial_peers: PeerMap,
+    /// Port to bind on (0 for auto-assign).
+    pub port: u16,
+    /// Logger for the dropshot server.
+    pub log: slog::Logger,
+}
+
+impl Default for DdmInstanceConfig {
+    fn default() -> Self {
+        Self {
+            initial_peers: PeerMap::new(),
+            port: 0,
+            log: slog::Logger::root(slog::Discard, slog::o!()),
+        }
+    }
+}
+
+impl DdmInstance {
+    /// Start a DDM sim server with default configuration (no peers,
+    /// auto-assigned port).
+    pub async fn start() -> Result<Self, anyhow::Error> {
+        Self::start_with_config(DdmInstanceConfig::default()).await
+    }
+
+    /// Start a DDM sim server with the provided configuration.
+    pub async fn start_with_config(
+        config: DdmInstanceConfig,
+    ) -> Result<Self, anyhow::Error> {
+        let context = DdmSimContext {
+            peers: std::sync::Mutex::new(config.initial_peers),
+        };
+
+        let dropshot_config = dropshot::ConfigDropshot {
+            bind_address: std::net::SocketAddr::V6(
+                std::net::SocketAddrV6::new(
+                    std::net::Ipv6Addr::LOCALHOST,
+                    config.port,
+                    0,
+                    0,
+                ),
+            ),
+            ..Default::default()
+        };
+
+        let mut api = dropshot::ApiDescription::new();
+        api.register(ddm_get_peers).unwrap();
+
+        let server = dropshot::HttpServerStarter::new(
+            &dropshot_config,
+            api,
+            context,
+            &config.log,
+        )
+        .map_err(|e| anyhow::anyhow!("failed to start DDM sim server: {e}"))?
+        .start();
+
+        let port = server.local_addr().port();
+        Ok(Self { port, server: Some(server) })
+    }
+
+    /// Replace the peers returned by `GET /peers`.
+    pub fn set_peers(&self, peers: PeerMap) {
+        let server = self.server.as_ref().expect("DDM sim server not running");
+        *server.app_private().peers.lock().unwrap() = peers;
+    }
+
+    pub async fn cleanup(&mut self) {
+        if let Some(server) = self.server.take() {
+            server.close().await.expect("failed to close DDM sim server");
+        }
+    }
+}
+
+/// Build a `SimPeerInfo` for a sled connected through the specified
+/// switch port interface.
+///
+/// `host` identifies the peer in DDM (typically the sled hostname).
+/// `if_name` is the switch port interface (e.g., `"tfportrear0_0"`).
+/// `kind` is the DDM router kind (0 = server, 1 = transit).
+pub fn sim_peer_info(
+    addr: std::net::Ipv6Addr,
+    host: &str,
+    if_name: &str,
+    kind: i64,
+    status: SimPeerStatus,
+) -> SimPeerInfo {
+    SimPeerInfo {
+        addr,
+        host: host.to_string(),
+        if_name: Some(if_name.to_string()),
+        kind,
+        status,
+    }
+}
+
+#[dropshot::endpoint {
+    method = GET,
+    path = "/peers",
+}]
+async fn ddm_get_peers(
+    rqctx: dropshot::RequestContext<DdmSimContext>,
+) -> Result<dropshot::HttpResponseOk<PeerMap>, dropshot::HttpError> {
+    let peers = rqctx.context().peers.lock().unwrap().clone();
+    Ok(dropshot::HttpResponseOk(peers))
+}
+
 #[cfg(test)]
 mod tests {
     use super::find_mgd_port_in_log;
diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums
index 58203e10a2b..9839d02f01f 100644
--- a/tools/dendrite_stub_checksums
+++ b/tools/dendrite_stub_checksums
@@ -1,3 +1,3 @@
-CIDL_SHA256_ILLUMOS="d899f9a761bb04bc9b9c88995883196dd691b758de547f7b1836344db5bd5080"
-CIDL_SHA256_LINUX_DPD="642b4c99fd9ac3ed7ee8785b2716135c2fe8347812d22bd9f53bbdd3ef9d5e0d"
+CIDL_SHA256_ILLUMOS="bf93c4d2c6139dca1bf0abab39be25b20b434d998212d08fd6b2df7b015af268"
+CIDL_SHA256_LINUX_DPD="090cb4aa86b674e8cc69ccc5f8d7e7cd146641d6d18bdecff89cc330ab956ab4"
 CIDL_SHA256_LINUX_SWADM="26c52328b5db50a77f903579521c7e6a89d36e851ba9ecf536aa6db749a7c0e4"
diff --git a/tools/dendrite_version b/tools/dendrite_version
index db21e2d6820..884f50ff594 100644
--- a/tools/dendrite_version
+++ b/tools/dendrite_version
@@ -1 +1 @@
-COMMIT="1ddaa5d6b101fbaa2c29eca847111cbef1a272ad"
+COMMIT="e10e4f5a993fe950ab1b478abb5dcbfa7aa92091"
diff --git a/tools/install_opte.sh b/tools/install_opte.sh
index d56523764d9..1f649ec473d 100755
--- a/tools/install_opte.sh
+++ b/tools/install_opte.sh
@@ -51,6 +51,14 @@ fi
 # Grab the version of the opte package to install
 OPTE_VERSION="$(cat "$OMICRON_TOP/tools/opte_version")"
 
+# Check for an OPTE override. When set, the desired OPTE version isn't
+# published to the helios pkg repo yet, so we download and install directly
+# from the override p5p built by OPTE CI on buildomat.
+source "$OMICRON_TOP/tools/opte_version_override"
+if [[ "x$OPTE_COMMIT" != "x" ]]; then
+    echo "OPTE override active: installing from p5p for commit $OPTE_COMMIT"
+fi
+
 OMICRON_FROZEN_PKG_COMMENT="OMICRON-PINNED-PACKAGE"
 
 # Once we install, we mark the package as frozen at that particular version.
@@ -71,16 +79,36 @@ if PKG_FROZEN=$(pkg freeze | grep driver/network/opte); then
     pfexec pkg unfreeze driver/network/opte
 fi
 
-# Actually install the xde kernel module and opteadm tool
-RC=0
-pfexec pkg install -v pkg://helios-dev/driver/network/opte@"$OPTE_VERSION" || RC=$?
-if [[ "$RC" -eq 0 ]]; then
-    echo "xde driver installed successfully"
-elif [[ "$RC" -eq 4 ]]; then
-    echo "Correct xde driver already installed"
+if [[ "x$OPTE_COMMIT" != "x" ]]; then
+    # Install from the override p5p archive built by OPTE CI.
+    P5P_URL="https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/repo/$OPTE_COMMIT/opte.p5p"
+    P5P_PATH="/tmp/opte-override.p5p"
+    echo "Downloading override p5p from $P5P_URL"
+    curl -fL -o "$P5P_PATH" "$P5P_URL"
+
+    RC=0
+    pfexec pkg install -g "$P5P_PATH" "driver/network/opte@$OPTE_VERSION" || RC=$?
+    if [[ "$RC" -eq 0 ]]; then
+        echo "xde driver installed from override p5p"
+    elif [[ "$RC" -eq 4 ]]; then
+        echo "Correct xde driver already installed"
+    else
+        echo "Installing xde driver from override p5p failed"
+        exit "$RC"
+    fi
+    rm -f "$P5P_PATH"
 else
-    echo "Installing xde driver failed"
-    exit "$RC"
+    # Install the published version from the helios pkg repo.
+    RC=0
+    pfexec pkg install -v pkg://helios-dev/driver/network/opte@"$OPTE_VERSION" || RC=$?
+    if [[ "$RC" -eq 0 ]]; then
+        echo "xde driver installed successfully"
+    elif [[ "$RC" -eq 4 ]]; then
+        echo "Correct xde driver already installed"
+    else
+        echo "Installing xde driver failed"
+        exit "$RC"
+    fi
 fi
 
 RC=0
@@ -97,13 +125,3 @@ if [[ "$RC" -ne 0 ]]; then
     echo "The \`opteadm\` administration tool is not on your path."
     echo "You may add \"/opt/oxide/opte/bin\" to your path to access it."
 fi
-
-source $OMICRON_TOP/tools/opte_version_override
-
-if [[ "x$OPTE_COMMIT" != "x" ]]; then
-    set +x
-    curl -fOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde
-    pfexec rem_drv xde || true
-    pfexec mv xde /kernel/drv/amd64/xde
-    pfexec add_drv xde || true
-fi
diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version
index e1553c6f6af..7580a9c8471 100644
--- a/tools/maghemite_ddm_openapi_version
+++ b/tools/maghemite_ddm_openapi_version
@@ -1 +1 @@
-COMMIT="4d1f20f793da102b29b914569725ebc9fdf746dd"
+COMMIT="c3c3032f8bdc91d6faf2b36e05b8375a0980765c"
diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version
index e1553c6f6af..7580a9c8471 100644
--- a/tools/maghemite_mg_openapi_version
+++ b/tools/maghemite_mg_openapi_version
@@ -1 +1 @@
-COMMIT="4d1f20f793da102b29b914569725ebc9fdf746dd"
+COMMIT="c3c3032f8bdc91d6faf2b36e05b8375a0980765c"
diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums
index 36429923417..9ff2d430404 100644
--- a/tools/maghemite_mgd_checksums
+++ b/tools/maghemite_mgd_checksums
@@ -1,2 +1,2 @@
-CIDL_SHA256="dd07d2ea491842cce28fd4eabc0f957f7672a75a8e4d92c31630d4332cb40ebd"
-MGD_LINUX_SHA256="64aadc95e3e9bbd8aa70fa15aada3667932e252d1b4dd253ecc76f2d8552d6cd"
\ No newline at end of file
+CIDL_SHA256="f65bf058322013feb2b5771e24046b0c6953d4e9324f8f48374caf7565845851"
+MGD_LINUX_SHA256="98037bfe6718840beccab1e3f9d063406e2aabfe4a5b548d7301a2a9637042a8"
\ No newline at end of file
diff --git a/tools/opte_version_override b/tools/opte_version_override
index 8d57f7ae9f4..66995188d0d 100644
--- a/tools/opte_version_override
+++ b/tools/opte_version_override
@@ -1,5 +1,18 @@
 #!/usr/bin/env bash
 
-# only set this if you want to override the version of opte/xde installed by the
-# install_opte.sh script
-OPTE_COMMIT=""
+# Override for using an unpublished OPTE version. When OPTE_COMMIT is set,
+# the override p5p package is downloaded from buildomat and used instead of
+# the version published in the helios pkg repo. The p5p is built by the
+# opte-p5p buildomat job and published at:
+#   https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/repo/{commit}/opte.p5p
+#
+# Consumers:
+#   - install_opte.sh installs directly from the override p5p
+#   - releng image builds use extra_packages with the p5p as a pkg source
+#   - deploy.sh installs from the override p5p on the running system
+#   - ci_check_opte_ver.sh skips version consistency checks
+#
+# To activate: set OPTE_COMMIT to the git commit hash of the OPTE build.
+#
+# To deactivate (once the new version is published): set OPTE_COMMIT to "".
+OPTE_COMMIT="c570ac2126dbbebbd8e98e73b580c5be6b7e460e"