itpplasma · krystophny · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026
diff --git a/.github/workflows/benchmarks.yaml b/.github/workflows/benchmarks.yaml
@@ -36,7 +36,11 @@ jobs:
           pytest benchmarks/test_benchmarks.py --benchmark-json=benchmark_results.json
 
       - name: Compare benchmark result
-        if: github.event_name == 'pull_request'
+        # Skip the result upload/compare for fork PRs: their GITHUB_TOKEN is
+        # read-only, so comment-on-alert/auto-push hit 'Resource not accessible
+        # by integration'. The benchmarks still run above; only the write-back
+        # is skipped for forks.
+        if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
         uses: benchmark-action/github-action-benchmark@v1.21.0
         with:
           tool: "pytest"

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -50,15 +50,33 @@ jobs:
         run: |
           # install VMEC++ deps as well as VMEC2000 deps (we need to import VMEC2000 in a test)
           sudo apt-get update && sudo apt-get install -y build-essential cmake libnetcdf-dev liblapack-dev libomp-dev
+      - name: Cache the VMEC2000 wheel
+        if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
+        uses: actions/cache@v4
+        with:
+          path: /tmp/vmec2000-wheel
+          # Keyed on the pinned VMEC2000 source commit: rebuild only when it changes.
+          key: vmec2000-728af8bd6c79-cp310-ubuntu22.04
       - name: Also install VMEC2000 (only on Ubuntu 22.04)
         if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
         run: |
-          # mpi4py is needed for VMEC2000
-          sudo apt-get install -y libopenmpi-dev
+          sudo apt-get install -y libopenmpi-dev libnetcdff-dev libscalapack-mpi-dev \
+            libopenblas-dev ninja-build
           python -m pip install mpi4py
-          # custom wheel for VMEC2000, needed for some VMEC++/VMEC2000 compatibility tests
-          # NOTE: this wheel is only guaranteed to work on Ubuntu 22.04
-          python -m pip install vmec@https://anaconda.org/eguiraud-pf/vmec/0.0.6/download/vmec-0.0.6-cp310-cp310-linux_x86_64.whl
+          if ! ls /tmp/vmec2000-wheel/vmec-*.whl >/dev/null 2>&1; then
+            # Build with the SYSTEM cmake + ninja; do NOT pip-install the
+            # cmake/ninja wheels (they shadow the system cmake that
+            # scikit-build-core's editable vmecpp rebuild records).
+            python -m pip install numpy scikit-build f90wrap setuptools wheel
+            git clone https://github.com/hiddenSymmetries/VMEC2000.git /tmp/VMEC2000
+            git -C /tmp/VMEC2000 checkout 728af8bd6c796b36a0aa85fe298e507791e57c6e
+            cp /tmp/VMEC2000/cmake/machines/ubuntu.json /tmp/VMEC2000/cmake_config_file.json
+            LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
+              python -m pip wheel /tmp/VMEC2000 --no-build-isolation -w /tmp/vmec2000-wheel
+          fi
+          python -m pip install /tmp/vmec2000-wheel/vmec-*.whl
+          # fail loudly here if the binding is still broken, not in the test step
+          python -c "import vmec; print('VMEC2000 import OK')"
       - name: Install package
         run: |
           # on Ubuntu we would not need this, but on MacOS we need to point CMake to gfortran-14 and gcc-14

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -66,10 +66,9 @@ find_package(LAPACK REQUIRED)
 FetchContent_Declare(
   abseil-cpp
   GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
-  # 20260107.1 LTS: required for Clang >= 21, where the 2024 pin fails to
-  # compile (absl::Nonnull SFINAE in absl/strings/ascii.cc). Clang is the
-  # compiler used for the Enzyme autodiff build.
-  GIT_TAG 20260107.1
+  # 20260107.1 LTS: older abseil fails to compile under Clang >= 21 (the
+  # Enzyme build) on absl::Nonnull SFINAE in absl/strings/ascii.cc.
+  GIT_TAG 255c84dadd029fd8ad25c5efb5933e47beaa00c7
   GIT_SHALLOW TRUE
 )
 FetchContent_Declare(

diff --git a/src/vmecpp/cpp/vmecpp/common/enzyme/local_force_hessian_test.cc b/src/vmecpp/cpp/vmecpp/common/enzyme/local_force_hessian_test.cc
@@ -5,14 +5,18 @@
 
 // Exact Hessian of VMEC's local force map via composed autodiff.
 //
-// The six shared force-chain kernels (Jacobian, metric, B^contra, B_cov,
-// magnetic pressure, MHD force density) compose into the local map
+// The shared force-chain kernels (Jacobian, metric, B^contra, B_cov, magnetic
+// pressure, MHD force density, and the hybrid lambda force) compose into the
+// local map
 //   g : real-space geometry -> real-space force density,
-// the nonlinear core of VMEC's force. The full MHD force is Tᵀ . g . T with the
+// the nonlinear core of VMEC's force. The full force is Tᵀ . g . T with the
 // linear spectral transforms T, Tᵀ; the exact force Hessian-vector product is
-// therefore Tᵀ . J_g . T . v, and J_g is what Enzyme provides here.
+// therefore Tᵀ . J_g . T . v, and J_g is what Enzyme provides here. The
+// remaining augmented term, the spectral-condensation constraint force, also
+// carries a linear Fourier bandpass and is validated end-to-end against the
+// finite-difference HVP in the pybind exact-HVP path.
 //
-// This test composes the six production kernels over flat buffers and takes the
+// This test composes the production kernels over flat buffers and takes the
 // Jacobian of g by forward and reverse mode, checks both against central finite
 // differences and against each other, and times one forward Jacobian-vector
 // pass against the two force evaluations a finite-difference Hessian-vector
@@ -30,29 +34,33 @@
 #include "vmecpp/vmec/ideal_mhd_model/bco_kernel.h"
 #include "vmecpp/vmec/ideal_mhd_model/bcontra_kernel.h"
 #include "vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h"
+#include "vmecpp/vmec/ideal_mhd_model/lambda_force_kernel.h"
 #include "vmecpp/vmec/ideal_mhd_model/metric_kernel.h"
 #include "vmecpp/vmec/ideal_mhd_model/mhdforce_kernel.h"
 #include "vmecpp/vmec/ideal_mhd_model/pressure_kernel.h"
 
 // Problem dimensions and the constant (non-differentiated) context.
 struct Ctx {
-  int nZnT, nsH;       // half-grid surfaces; full-grid has nsH + 1
-  int nFull, nHalf;    // (nsH+1)*nZnT, nsH*nZnT
-  const double* sqrtSF;  // [nsH+1]
-  const double* sqrtSH;  // [nsH]
-  const double* chipH;   // [nsH]
-  const double* presH;   // [nsH]
+  int nZnT, nsH;                 // half-grid surfaces; full-grid has nsH + 1
+  int nFull, nHalf;              // (nsH+1)*nZnT, nsH*nZnT
+  const double* sqrtSF;          // [nsH+1]
+  const double* sqrtSH;          // [nsH]
+  const double* chipH;           // [nsH]
+  const double* presH;           // [nsH]
+  const double* radialBlending;  // [nsH+1]
   double deltaS;
+  double lamscale;
   bool lthreed;
 };
 
-// 16 geometry blocks (each nFull) packed into geom; 12 force blocks (each nHalf)
-// in force; everything else is intermediate work sliced from work.
-enum { kGeomBlocks = 16, kForceBlocks = 12 };
+// 16 geometry blocks (each nFull) packed into geom; force blocks (each nHalf):
+// 12 MHD force-density blocks + 4 lambda-force blocks (blmn_e/o, clmn_e/o).
+// Everything else is intermediate work sliced from work.
+enum { kGeomBlocks = 16, kForceBlocks = 16 };
 
-// g: geometry -> force density, composing the six shared kernels.
+// g: geometry -> force density, composing the MHD and lambda-force kernels.
 __attribute__((noinline)) void LocalForce(const double* geom, double* work,
-                                           double* force, const Ctx* c) {
+                                          double* force, const Ctx* c) {
   const int nF = c->nFull;
   const int nH = c->nHalf;
   const int nZnT = c->nZnT, nsH = c->nsH;
@@ -75,30 +83,45 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work,
   const double* lvo = geom + 15 * nF;
   // half-grid intermediates from work
   double* p = work;
-  double* r12 = p; p += nH;
-  double* ru12 = p; p += nH;
-  double* zu12 = p; p += nH;
-  double* rs = p; p += nH;
-  double* zs = p; p += nH;
-  double* tau = p; p += nH;
-  double* gsqrt = p; p += nH;
-  double* guu = p; p += nH;
-  double* guv = p; p += nH;
-  double* gvv = p; p += nH;
-  double* bsupu = p; p += nH;
-  double* bsupv = p; p += nH;
-  double* bsubu = p; p += nH;
-  double* bsubv = p; p += nH;
-  double* tp = p; p += nH;
+  double* r12 = p;
+  p += nH;
+  double* ru12 = p;
+  p += nH;
+  double* zu12 = p;
+  p += nH;
+  double* rs = p;
+  p += nH;
+  double* zs = p;
+  p += nH;
+  double* tau = p;
+  p += nH;
+  double* gsqrt = p;
+  p += nH;
+  double* guu = p;
+  p += nH;
+  double* guv = p;
+  p += nH;
+  double* gvv = p;
+  p += nH;
+  double* bsupu = p;
+  p += nH;
+  double* bsupv = p;
+  p += nH;
+  double* bsubu = p;
+  p += nH;
+  double* bsubv = p;
+  p += nH;
+  double* tp = p;
+  p += nH;
   // per-nZnT scratch for the force kernel (26 blocks)
   double* sc = p;  // 26 * nZnT
 
-  vmecpp::ComputeHalfGridJacobian(r1e, r1o, z1e, z1o, rue, ruo, zue, zuo, c->sqrtSH,
-                          c->deltaS, /*dSHalfDsInterp=*/0.25, nZnT, 0, 0, nsH,
-                          r12, ru12, zu12, rs, zs, tau);
-  vmecpp::ComputeMetricElements(r1e, r1o, rue, ruo, zue, zuo, rve, rvo, zve, zvo,
-                                tau, r12, c->sqrtSF, c->sqrtSH, c->lthreed, nZnT,
-                                0, 0, nsH, gsqrt, guu, guv, gvv);
+  vmecpp::ComputeHalfGridJacobian(
+      r1e, r1o, z1e, z1o, rue, ruo, zue, zuo, c->sqrtSH, c->deltaS,
+      /*dSHalfDsInterp=*/0.25, nZnT, 0, 0, nsH, r12, ru12, zu12, rs, zs, tau);
+  vmecpp::ComputeMetricElements(r1e, r1o, rue, ruo, zue, zuo, rve, rvo, zve,
+                                zvo, tau, r12, c->sqrtSF, c->sqrtSH, c->lthreed,
+                                nZnT, 0, 0, nsH, gsqrt, guu, guv, gvv);
   vmecpp::ComputeBsupContra(lue, luo, lve, lvo, gsqrt, c->sqrtSH, c->lthreed,
                             nZnT, 0, 0, nsH, bsupu, bsupv);
   for (int jH = 0; jH < nsH; ++jH) {
@@ -113,19 +136,67 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work,
     for (int kl = 0; kl < nZnT; ++kl) tp[jH * nZnT + kl] += c->presH[jH];
   }
   double* s = sc;
-  double* P_i = s; s += nZnT; double* rup_i = s; s += nZnT;
-  double* zup_i = s; s += nZnT; double* rsp_i = s; s += nZnT;
-  double* zsp_i = s; s += nZnT; double* taup_i = s; s += nZnT;
-  double* gbubu_i = s; s += nZnT; double* gbubv_i = s; s += nZnT;
-  double* gbvbv_i = s; s += nZnT; double* P_o = s; s += nZnT;
-  double* rup_o = s; s += nZnT; double* zup_o = s; s += nZnT;
-  double* rsp_o = s; s += nZnT; double* zsp_o = s; s += nZnT;
-  double* taup_o = s; s += nZnT; double* gbubu_o = s; s += nZnT;
-  double* gbubv_o = s; s += nZnT; double* gbvbv_o = s; s += nZnT;
-  double* P_avg = s; s += nZnT; double* P_wavg = s; s += nZnT;
-  double* gbubu_avg = s; s += nZnT; double* gbubu_wavg = s; s += nZnT;
-  double* gbvbv_avg = s; s += nZnT; double* gbvbv_wavg = s; s += nZnT;
-  double* gbubv_avg = s; s += nZnT; double* gbubv_wavg = s; s += nZnT;
+  double* P_i = s;
+  s += nZnT;
+  double* rup_i = s;
+  s += nZnT;
+  double* zup_i = s;
+  s += nZnT;
+  double* rsp_i = s;
+  s += nZnT;
+  double* zsp_i = s;
+  s += nZnT;
+  double* taup_i = s;
+  s += nZnT;
+  double* gbubu_i = s;
+  s += nZnT;
+  double* gbubv_i = s;
+  s += nZnT;
+  double* gbvbv_i = s;
+  s += nZnT;
+  double* P_o = s;
+  s += nZnT;
+  double* rup_o = s;
+  s += nZnT;
+  double* zup_o = s;
+  s += nZnT;
+  double* rsp_o = s;
+  s += nZnT;
+  double* zsp_o = s;
+  s += nZnT;
+  double* taup_o = s;
+  s += nZnT;
+  double* gbubu_o = s;
+  s += nZnT;
+  double* gbubv_o = s;
+  s += nZnT;
+  double* gbvbv_o = s;
+  s += nZnT;
+  double* P_avg = s;
+  s += nZnT;
+  double* P_wavg = s;
+  s += nZnT;
+  double* gbubu_avg = s;
+  s += nZnT;
+  double* gbubu_wavg = s;
+  s += nZnT;
+  double* gbvbv_avg = s;
+  s += nZnT;
+  double* gbvbv_wavg = s;
+  s += nZnT;
+  double* gbubv_avg = s;
+  s += nZnT;
+  double* gbubv_wavg = s;
+  s += nZnT;
+  // lambda-force radial-sweep scratch (carried inside half-grid point)
+  double* bsubu_i = s;
+  s += nZnT;
+  double* bsubv_i = s;
+  s += nZnT;
+  double* gvv_gsqrt_i = s;
+  s += nZnT;
+  double* guv_bsupu_i = s;
+  s += nZnT;
   double* armn_e = force + 0 * nH;
   double* armn_o = force + 1 * nH;
   double* azmn_e = force + 2 * nH;
@@ -139,17 +210,28 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work,
   double* czmn_e = force + 10 * nH;
   double* czmn_o = force + 11 * nH;
   vmecpp::ComputeMHDForceDensity(
-      r1e, r1o, rue, ruo, zue, zuo, z1o, rve, rvo, zve, zvo, r12, ru12, zu12, rs,
-      zs, tau, tp, gsqrt, bsupu, bsupv, c->sqrtSF, c->sqrtSH, P_i, rup_i, zup_i,
-      rsp_i, zsp_i, taup_i, gbubu_i, gbubv_i, gbvbv_i, P_o, rup_o, zup_o, rsp_o,
-      zsp_o, taup_o, gbubu_o, gbubv_o, gbvbv_o, P_avg, P_wavg, gbubu_avg,
+      r1e, r1o, rue, ruo, zue, zuo, z1o, rve, rvo, zve, zvo, r12, ru12, zu12,
+      rs, zs, tau, tp, gsqrt, bsupu, bsupv, c->sqrtSF, c->sqrtSH, P_i, rup_i,
+      zup_i, rsp_i, zsp_i, taup_i, gbubu_i, gbubv_i, gbvbv_i, P_o, rup_o, zup_o,
+      rsp_o, zsp_o, taup_o, gbubu_o, gbubv_o, gbvbv_o, P_avg, P_wavg, gbubu_avg,
       gbubu_wavg, gbvbv_avg, gbvbv_wavg, gbubv_avg, gbubv_wavg, c->deltaS, nZnT,
       /*nsMinF=*/0, 0, 0, nsH, /*jMaxRZ=*/nsH, c->lthreed, armn_e, armn_o,
       azmn_e, azmn_o, brmn_e, brmn_o, bzmn_e, bzmn_o, crmn_e, crmn_o, czmn_e,
       czmn_o);
+  // lambda force (blmn_e/o, clmn_e/o) from the shared kernel
+  double* blmn_e = force + 12 * nH;
+  double* blmn_o = force + 13 * nH;
+  double* clmn_e = force + 14 * nH;
+  double* clmn_o = force + 15 * nH;
+  vmecpp::ComputeHybridLambdaForce(
+      bsubu, bsubv, gvv, gsqrt, guv, bsupu, lue, luo, c->sqrtSH, c->sqrtSF,
+      c->radialBlending, c->lamscale, c->lthreed, nZnT, /*nsMinF=*/0,
+      /*nsMinF1=*/0, /*nsMinH=*/0, nsH, /*nsMaxFIncludingLcfs=*/nsH, bsubu_i,
+      bsubv_i, gvv_gsqrt_i, guv_bsupu_i, blmn_e, blmn_o, clmn_e, clmn_o);
 }
 
-// Scalar objective L = 0.5 ||force||^2; work and force are caller-owned scratch.
+// Scalar objective L = 0.5 ||force||^2; work and force are caller-owned
+// scratch.
 __attribute__((noinline)) double Loss(const double* geom, double* work,
                                       double* force, const Ctx* c) {
   LocalForce(geom, work, force, c);
@@ -167,11 +249,16 @@ int main() {
   c.nFull = (nsH + 1) * nZnT;
   c.nHalf = nsH * nZnT;
   c.deltaS = 0.1;
+  c.lamscale = 0.7;
   c.lthreed = true;
   std::mt19937 rng(3);
   std::uniform_real_distribution<double> d(0.5, 1.5), s(-1.0, 1.0);
-  std::vector<double> sqrtSF(nsH + 1), sqrtSH(nsH), chipH(nsH), presH(nsH);
-  for (int j = 0; j <= nsH; ++j) sqrtSF[j] = std::sqrt(0.05 + 0.9 * j / nsH);
+  std::vector<double> sqrtSF(nsH + 1), sqrtSH(nsH), chipH(nsH), presH(nsH),
+      radialBlending(nsH + 1);
+  for (int j = 0; j <= nsH; ++j) {
+    sqrtSF[j] = std::sqrt(0.05 + 0.9 * j / nsH);
+    radialBlending[j] = 0.3 + 0.4 * j / nsH;
+  }
   for (int j = 0; j < nsH; ++j) {
     sqrtSH[j] = std::sqrt(0.05 + 0.9 * (j + 0.5) / nsH);
     chipH[j] = 0.3 + 0.1 * j;
@@ -181,9 +268,10 @@ int main() {
   c.sqrtSH = sqrtSH.data();
   c.chipH = chipH.data();
   c.presH = presH.data();
+  c.radialBlending = radialBlending.data();
 
   const int nG = kGeomBlocks * c.nFull;
-  const int nW = 15 * c.nHalf + 26 * nZnT;
+  const int nW = 15 * c.nHalf + 30 * nZnT;
   const int nFc = kForceBlocks * c.nHalf;
   std::vector<double> geom(nG), v(nG);
   for (double& x : geom) x = d(rng);
@@ -206,17 +294,19 @@ int main() {
     gp[i] = geom[i] + h * v[i];
     gm[i] = geom[i] - h * v[i];
   }
-  const double dfd =
-      (Loss(gp.data(), w2.data(), f2.data(), &c) -
-       Loss(gm.data(), w2.data(), f2.data(), &c)) /
-      (2 * h);
-  const double drev = std::inner_product(grev.begin(), grev.end(), v.begin(), 0.0);
+  const double dfd = (Loss(gp.data(), w2.data(), f2.data(), &c) -
+                      Loss(gm.data(), w2.data(), f2.data(), &c)) /
+                     (2 * h);
+  const double drev =
+      std::inner_product(grev.begin(), grev.end(), v.begin(), 0.0);
   const double scale = std::fabs(dfd) + 1e-300;
 
-  printf("exact Hessian of VMEC local force map (composed 6 kernels)\n");
+  printf("exact Hessian of VMEC local force map (MHD + lambda kernels)\n");
   printf("  geom dofs=%d  force outputs=%d\n", nG, nFc);
-  printf("  reverse dL.v vs finite-diff : %.2e\n", std::fabs(drev - dfd) / scale);
-  printf("  forward dL.v vs finite-diff : %.2e\n", std::fabs(dfwd - dfd) / scale);
+  printf("  reverse dL.v vs finite-diff : %.2e\n",
+         std::fabs(drev - dfd) / scale);
+  printf("  forward dL.v vs finite-diff : %.2e\n",
+         std::fabs(dfwd - dfd) / scale);
   printf("  forward / reverse agreement : %.2e\n",
          std::fabs(dfwd - drev) / (std::fabs(drev) + 1e-300));
 
@@ -225,7 +315,8 @@ int main() {
   for (int r = 0; r < reps; ++r) {
     volatile double q = __enzyme_fwddiff<double>(
         (void*)Loss, enzyme_dup, geom.data(), v.data(), enzyme_dup, work.data(),
-        dwork.data(), enzyme_dup, force.data(), dforce.data(), enzyme_const, &c);
+        dwork.data(), enzyme_dup, force.data(), dforce.data(), enzyme_const,
+        &c);
     (void)q;
   }
   auto t1 = std::chrono::steady_clock::now();