diff --git a/.github/workflows/benchmarks.yaml b/.github/workflows/benchmarks.yaml index c2f3fdf4e..54326ec6a 100644 --- a/.github/workflows/benchmarks.yaml +++ b/.github/workflows/benchmarks.yaml @@ -36,7 +36,11 @@ jobs: pytest benchmarks/test_benchmarks.py --benchmark-json=benchmark_results.json - name: Compare benchmark result - if: github.event_name == 'pull_request' + # Skip the result upload/compare for fork PRs: their GITHUB_TOKEN is + # read-only, so comment-on-alert/auto-push hit 'Resource not accessible + # by integration'. The benchmarks still run above; only the write-back + # is skipped for forks. + if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository uses: benchmark-action/github-action-benchmark@v1.21.0 with: tool: "pytest" diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index a22e0cbdc..0f68ab6e9 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -50,15 +50,33 @@ jobs: run: | # install VMEC++ deps as well as VMEC2000 deps (we need to import VMEC2000 in a test) sudo apt-get update && sudo apt-get install -y build-essential cmake libnetcdf-dev liblapack-dev libomp-dev + - name: Cache the VMEC2000 wheel + if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }} + uses: actions/cache@v4 + with: + path: /tmp/vmec2000-wheel + # Keyed on the pinned VMEC2000 source commit: rebuild only when it changes. + key: vmec2000-728af8bd6c79-cp310-ubuntu22.04 - name: Also install VMEC2000 (only on Ubuntu 22.04) if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }} run: | - # mpi4py is needed for VMEC2000 - sudo apt-get install -y libopenmpi-dev + sudo apt-get install -y libopenmpi-dev libnetcdff-dev libscalapack-mpi-dev \ + libopenblas-dev ninja-build python -m pip install mpi4py - # custom wheel for VMEC2000, needed for some VMEC++/VMEC2000 compatibility tests - # NOTE: this wheel is only guaranteed to work on Ubuntu 22.04 - python -m pip install vmec@https://anaconda.org/eguiraud-pf/vmec/0.0.6/download/vmec-0.0.6-cp310-cp310-linux_x86_64.whl + if ! ls /tmp/vmec2000-wheel/vmec-*.whl >/dev/null 2>&1; then + # Build with the SYSTEM cmake + ninja; do NOT pip-install the + # cmake/ninja wheels (they shadow the system cmake that + # scikit-build-core's editable vmecpp rebuild records). + python -m pip install numpy scikit-build f90wrap setuptools wheel + git clone https://github.com/hiddenSymmetries/VMEC2000.git /tmp/VMEC2000 + git -C /tmp/VMEC2000 checkout 728af8bd6c796b36a0aa85fe298e507791e57c6e + cp /tmp/VMEC2000/cmake/machines/ubuntu.json /tmp/VMEC2000/cmake_config_file.json + LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \ + python -m pip wheel /tmp/VMEC2000 --no-build-isolation -w /tmp/vmec2000-wheel + fi + python -m pip install /tmp/vmec2000-wheel/vmec-*.whl + # fail loudly here if the binding is still broken, not in the test step + python -c "import vmec; print('VMEC2000 import OK')" - name: Install package run: | # on Ubuntu we would not need this, but on MacOS we need to point CMake to gfortran-14 and gcc-14 diff --git a/CMakeLists.txt b/CMakeLists.txt index 813c0fe9c..b4b64d23f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,10 +66,9 @@ find_package(LAPACK REQUIRED) FetchContent_Declare( abseil-cpp GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git - # 20260107.1 LTS: required for Clang >= 21, where the 2024 pin fails to - # compile (absl::Nonnull SFINAE in absl/strings/ascii.cc). Clang is the - # compiler used for the Enzyme autodiff build. - GIT_TAG 20260107.1 + # 20260107.1 LTS: older abseil fails to compile under Clang >= 21 (the + # Enzyme build) on absl::Nonnull SFINAE in absl/strings/ascii.cc. + GIT_TAG 255c84dadd029fd8ad25c5efb5933e47beaa00c7 GIT_SHALLOW TRUE ) FetchContent_Declare( diff --git a/src/vmecpp/cpp/vmecpp/common/enzyme/local_force_hessian_test.cc b/src/vmecpp/cpp/vmecpp/common/enzyme/local_force_hessian_test.cc index 082d04030..84cf939fb 100644 --- a/src/vmecpp/cpp/vmecpp/common/enzyme/local_force_hessian_test.cc +++ b/src/vmecpp/cpp/vmecpp/common/enzyme/local_force_hessian_test.cc @@ -36,8 +36,8 @@ // Problem dimensions and the constant (non-differentiated) context. struct Ctx { - int nZnT, nsH; // half-grid surfaces; full-grid has nsH + 1 - int nFull, nHalf; // (nsH+1)*nZnT, nsH*nZnT + int nZnT, nsH; // half-grid surfaces; full-grid has nsH + 1 + int nFull, nHalf; // (nsH+1)*nZnT, nsH*nZnT const double* sqrtSF; // [nsH+1] const double* sqrtSH; // [nsH] const double* chipH; // [nsH] @@ -46,13 +46,13 @@ struct Ctx { bool lthreed; }; -// 16 geometry blocks (each nFull) packed into geom; 12 force blocks (each nHalf) -// in force; everything else is intermediate work sliced from work. +// 16 geometry blocks (each nFull) packed into geom; 12 force blocks (each +// nHalf) in force; everything else is intermediate work sliced from work. enum { kGeomBlocks = 16, kForceBlocks = 12 }; // g: geometry -> force density, composing the six shared kernels. __attribute__((noinline)) void LocalForce(const double* geom, double* work, - double* force, const Ctx* c) { + double* force, const Ctx* c) { const int nF = c->nFull; const int nH = c->nHalf; const int nZnT = c->nZnT, nsH = c->nsH; @@ -75,30 +75,45 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work, const double* lvo = geom + 15 * nF; // half-grid intermediates from work double* p = work; - double* r12 = p; p += nH; - double* ru12 = p; p += nH; - double* zu12 = p; p += nH; - double* rs = p; p += nH; - double* zs = p; p += nH; - double* tau = p; p += nH; - double* gsqrt = p; p += nH; - double* guu = p; p += nH; - double* guv = p; p += nH; - double* gvv = p; p += nH; - double* bsupu = p; p += nH; - double* bsupv = p; p += nH; - double* bsubu = p; p += nH; - double* bsubv = p; p += nH; - double* tp = p; p += nH; + double* r12 = p; + p += nH; + double* ru12 = p; + p += nH; + double* zu12 = p; + p += nH; + double* rs = p; + p += nH; + double* zs = p; + p += nH; + double* tau = p; + p += nH; + double* gsqrt = p; + p += nH; + double* guu = p; + p += nH; + double* guv = p; + p += nH; + double* gvv = p; + p += nH; + double* bsupu = p; + p += nH; + double* bsupv = p; + p += nH; + double* bsubu = p; + p += nH; + double* bsubv = p; + p += nH; + double* tp = p; + p += nH; // per-nZnT scratch for the force kernel (26 blocks) double* sc = p; // 26 * nZnT - vmecpp::ComputeHalfGridJacobian(r1e, r1o, z1e, z1o, rue, ruo, zue, zuo, c->sqrtSH, - c->deltaS, /*dSHalfDsInterp=*/0.25, nZnT, 0, 0, nsH, - r12, ru12, zu12, rs, zs, tau); - vmecpp::ComputeMetricElements(r1e, r1o, rue, ruo, zue, zuo, rve, rvo, zve, zvo, - tau, r12, c->sqrtSF, c->sqrtSH, c->lthreed, nZnT, - 0, 0, nsH, gsqrt, guu, guv, gvv); + vmecpp::ComputeHalfGridJacobian( + r1e, r1o, z1e, z1o, rue, ruo, zue, zuo, c->sqrtSH, c->deltaS, + /*dSHalfDsInterp=*/0.25, nZnT, 0, 0, nsH, r12, ru12, zu12, rs, zs, tau); + vmecpp::ComputeMetricElements(r1e, r1o, rue, ruo, zue, zuo, rve, rvo, zve, + zvo, tau, r12, c->sqrtSF, c->sqrtSH, c->lthreed, + nZnT, 0, 0, nsH, gsqrt, guu, guv, gvv); vmecpp::ComputeBsupContra(lue, luo, lve, lvo, gsqrt, c->sqrtSH, c->lthreed, nZnT, 0, 0, nsH, bsupu, bsupv); for (int jH = 0; jH < nsH; ++jH) { @@ -113,19 +128,58 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work, for (int kl = 0; kl < nZnT; ++kl) tp[jH * nZnT + kl] += c->presH[jH]; } double* s = sc; - double* P_i = s; s += nZnT; double* rup_i = s; s += nZnT; - double* zup_i = s; s += nZnT; double* rsp_i = s; s += nZnT; - double* zsp_i = s; s += nZnT; double* taup_i = s; s += nZnT; - double* gbubu_i = s; s += nZnT; double* gbubv_i = s; s += nZnT; - double* gbvbv_i = s; s += nZnT; double* P_o = s; s += nZnT; - double* rup_o = s; s += nZnT; double* zup_o = s; s += nZnT; - double* rsp_o = s; s += nZnT; double* zsp_o = s; s += nZnT; - double* taup_o = s; s += nZnT; double* gbubu_o = s; s += nZnT; - double* gbubv_o = s; s += nZnT; double* gbvbv_o = s; s += nZnT; - double* P_avg = s; s += nZnT; double* P_wavg = s; s += nZnT; - double* gbubu_avg = s; s += nZnT; double* gbubu_wavg = s; s += nZnT; - double* gbvbv_avg = s; s += nZnT; double* gbvbv_wavg = s; s += nZnT; - double* gbubv_avg = s; s += nZnT; double* gbubv_wavg = s; s += nZnT; + double* P_i = s; + s += nZnT; + double* rup_i = s; + s += nZnT; + double* zup_i = s; + s += nZnT; + double* rsp_i = s; + s += nZnT; + double* zsp_i = s; + s += nZnT; + double* taup_i = s; + s += nZnT; + double* gbubu_i = s; + s += nZnT; + double* gbubv_i = s; + s += nZnT; + double* gbvbv_i = s; + s += nZnT; + double* P_o = s; + s += nZnT; + double* rup_o = s; + s += nZnT; + double* zup_o = s; + s += nZnT; + double* rsp_o = s; + s += nZnT; + double* zsp_o = s; + s += nZnT; + double* taup_o = s; + s += nZnT; + double* gbubu_o = s; + s += nZnT; + double* gbubv_o = s; + s += nZnT; + double* gbvbv_o = s; + s += nZnT; + double* P_avg = s; + s += nZnT; + double* P_wavg = s; + s += nZnT; + double* gbubu_avg = s; + s += nZnT; + double* gbubu_wavg = s; + s += nZnT; + double* gbvbv_avg = s; + s += nZnT; + double* gbvbv_wavg = s; + s += nZnT; + double* gbubv_avg = s; + s += nZnT; + double* gbubv_wavg = s; + s += nZnT; double* armn_e = force + 0 * nH; double* armn_o = force + 1 * nH; double* azmn_e = force + 2 * nH; @@ -139,17 +193,18 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work, double* czmn_e = force + 10 * nH; double* czmn_o = force + 11 * nH; vmecpp::ComputeMHDForceDensity( - r1e, r1o, rue, ruo, zue, zuo, z1o, rve, rvo, zve, zvo, r12, ru12, zu12, rs, - zs, tau, tp, gsqrt, bsupu, bsupv, c->sqrtSF, c->sqrtSH, P_i, rup_i, zup_i, - rsp_i, zsp_i, taup_i, gbubu_i, gbubv_i, gbvbv_i, P_o, rup_o, zup_o, rsp_o, - zsp_o, taup_o, gbubu_o, gbubv_o, gbvbv_o, P_avg, P_wavg, gbubu_avg, + r1e, r1o, rue, ruo, zue, zuo, z1o, rve, rvo, zve, zvo, r12, ru12, zu12, + rs, zs, tau, tp, gsqrt, bsupu, bsupv, c->sqrtSF, c->sqrtSH, P_i, rup_i, + zup_i, rsp_i, zsp_i, taup_i, gbubu_i, gbubv_i, gbvbv_i, P_o, rup_o, zup_o, + rsp_o, zsp_o, taup_o, gbubu_o, gbubv_o, gbvbv_o, P_avg, P_wavg, gbubu_avg, gbubu_wavg, gbvbv_avg, gbvbv_wavg, gbubv_avg, gbubv_wavg, c->deltaS, nZnT, /*nsMinF=*/0, 0, 0, nsH, /*jMaxRZ=*/nsH, c->lthreed, armn_e, armn_o, azmn_e, azmn_o, brmn_e, brmn_o, bzmn_e, bzmn_o, crmn_e, crmn_o, czmn_e, czmn_o); } -// Scalar objective L = 0.5 ||force||^2; work and force are caller-owned scratch. +// Scalar objective L = 0.5 ||force||^2; work and force are caller-owned +// scratch. __attribute__((noinline)) double Loss(const double* geom, double* work, double* force, const Ctx* c) { LocalForce(geom, work, force, c); @@ -206,17 +261,19 @@ int main() { gp[i] = geom[i] + h * v[i]; gm[i] = geom[i] - h * v[i]; } - const double dfd = - (Loss(gp.data(), w2.data(), f2.data(), &c) - - Loss(gm.data(), w2.data(), f2.data(), &c)) / - (2 * h); - const double drev = std::inner_product(grev.begin(), grev.end(), v.begin(), 0.0); + const double dfd = (Loss(gp.data(), w2.data(), f2.data(), &c) - + Loss(gm.data(), w2.data(), f2.data(), &c)) / + (2 * h); + const double drev = + std::inner_product(grev.begin(), grev.end(), v.begin(), 0.0); const double scale = std::fabs(dfd) + 1e-300; printf("exact Hessian of VMEC local force map (composed 6 kernels)\n"); printf(" geom dofs=%d force outputs=%d\n", nG, nFc); - printf(" reverse dL.v vs finite-diff : %.2e\n", std::fabs(drev - dfd) / scale); - printf(" forward dL.v vs finite-diff : %.2e\n", std::fabs(dfwd - dfd) / scale); + printf(" reverse dL.v vs finite-diff : %.2e\n", + std::fabs(drev - dfd) / scale); + printf(" forward dL.v vs finite-diff : %.2e\n", + std::fabs(dfwd - dfd) / scale); printf(" forward / reverse agreement : %.2e\n", std::fabs(dfwd - drev) / (std::fabs(drev) + 1e-300)); @@ -225,7 +282,8 @@ int main() { for (int r = 0; r < reps; ++r) { volatile double q = __enzyme_fwddiff( (void*)Loss, enzyme_dup, geom.data(), v.data(), enzyme_dup, work.data(), - dwork.data(), enzyme_dup, force.data(), dforce.data(), enzyme_const, &c); + dwork.data(), enzyme_dup, force.data(), dforce.data(), enzyme_const, + &c); (void)q; } auto t1 = std::chrono::steady_clock::now(); diff --git a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/BUILD.bazel b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/BUILD.bazel index 5fca72477..e8df6160b 100644 --- a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/BUILD.bazel +++ b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/BUILD.bazel @@ -82,7 +82,16 @@ cc_test( cc_library( name = "ideal_mhd_model", srcs = ["ideal_mhd_model.cc"], - hdrs = ["ideal_mhd_model.h", "dft_data.h"], +# The shared force-chain kernels and the autodiff composition/JVP header are + # header-only and included by ideal_mhd_model.cc; declare whatever exists on + # this branch so the bazel sandbox can see them. The Enzyme JVP entry point + # (exact_force_jvp.cc) is built only by the CMake VMECPP_ENABLE_ENZYME path, + # not by bazel; its use in ideal_mhd_model.cc is guarded by that define. + hdrs = ["ideal_mhd_model.h", "dft_data.h"] + glob([ + "*_kernel.h", + "local_force_composition.h", + "exact_force_jvp.h", + ]), visibility = ["//visibility:public"], deps = [ ":dft_toroidal", diff --git a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/constraint_force_kernel.h b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/constraint_force_kernel.h new file mode 100644 index 000000000..b5078a6f1 --- /dev/null +++ b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/constraint_force_kernel.h @@ -0,0 +1,68 @@ +// SPDX-FileCopyrightText: 2024-present Proxima Fusion GmbH +// +// +// SPDX-License-Identifier: MIT +#ifndef VMECPP_VMEC_IDEAL_MHD_MODEL_CONSTRAINT_FORCE_KERNEL_H_ +#define VMECPP_VMEC_IDEAL_MHD_MODEL_CONSTRAINT_FORCE_KERNEL_H_ + +#include + +namespace vmecpp { + +// The two local (non-transform) pieces of VMEC's spectral-condensation +// constraint force. The Fourier-space bandpass between them is the separate +// shared free function deAliasConstraintForce. All full-grid buffers are +// indexed (jF-nsMinF)*nZnT except sqrtSF, which is indexed (jF-nsMinF1). +// Allocation-free, shared between the solver and the Enzyme autodiff path. + +// Effective constraint force gConEff = (rCon - rCon0) ru + (zCon - zCon0) zu. +// No constraint on the axis (it has no poloidal angle), so the axis surface is +// skipped, matching deAliasConstraintForce. +inline void ComputeEffectiveConstraintForce( + const double* rCon, const double* rCon0, const double* zCon, + const double* zCon0, const double* ruFull, const double* zuFull, int nZnT, + int nsMinF, int nsMaxFIncludingLcfs, double* gConEff) { + int jMin = 0; + if (nsMinF == 0) { + jMin = 1; + } + for (int jF = std::max(jMin, nsMinF); jF < nsMaxFIncludingLcfs; ++jF) { + for (int kl = 0; kl < nZnT; ++kl) { + int idx_kl = (jF - nsMinF) * nZnT + kl; + gConEff[idx_kl] = (rCon[idx_kl] - rCon0[idx_kl]) * ruFull[idx_kl] + + (zCon[idx_kl] - zCon0[idx_kl]) * zuFull[idx_kl]; + } // kl + } // jF +} + +// Add the bandpass-filtered constraint force gCon back into the MHD R/Z forces +// (brmn, bzmn) and write the constraint-force outputs (frcon, fzcon). +inline void AddConstraintForces( + const double* rCon, const double* rCon0, const double* zCon, + const double* zCon0, const double* ruFull, const double* zuFull, + const double* gCon, const double* sqrtSF, int nZnT, int nsMinF, int nsMinF1, + int nsMaxF, double* brmn_e, double* brmn_o, double* bzmn_e, double* bzmn_o, + double* frcon_e, double* frcon_o, double* fzcon_e, double* fzcon_o) { + for (int jF = nsMinF; jF < nsMaxF; ++jF) { + for (int kl = 0; kl < nZnT; ++kl) { + int idx_kl = (jF - nsMinF) * nZnT + kl; + + double brcon = (rCon[idx_kl] - rCon0[idx_kl]) * gCon[idx_kl]; + double bzcon = (zCon[idx_kl] - zCon0[idx_kl]) * gCon[idx_kl]; + + brmn_e[idx_kl] += brcon; + bzmn_e[idx_kl] += bzcon; + brmn_o[idx_kl] += brcon * sqrtSF[jF - nsMinF1]; + bzmn_o[idx_kl] += bzcon * sqrtSF[jF - nsMinF1]; + + frcon_e[idx_kl] = ruFull[idx_kl] * gCon[idx_kl]; + fzcon_e[idx_kl] = zuFull[idx_kl] * gCon[idx_kl]; + frcon_o[idx_kl] = frcon_e[idx_kl] * sqrtSF[jF - nsMinF1]; + fzcon_o[idx_kl] = fzcon_e[idx_kl] * sqrtSF[jF - nsMinF1]; + } // kl + } // jF +} + +} // namespace vmecpp + +#endif // VMECPP_VMEC_IDEAL_MHD_MODEL_CONSTRAINT_FORCE_KERNEL_H_ diff --git a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/dft_toroidal.cc b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/dft_toroidal.cc index fb3a6e2db..279386d9e 100644 --- a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/dft_toroidal.cc +++ b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/dft_toroidal.cc @@ -34,6 +34,11 @@ void ForcesToFourier3DSymmFastPoloidal( // axis lambda stays zero (no contribution from any m) const int jMinL = 1; + // Reused scratch for the assembled R/Z forces: sized on first use, so the + // inner loop allocates nothing. Per-thread safe via the stack frame, since + // each OpenMP thread runs this on its own radial slice. + Eigen::VectorXd tempR_seg, tempZ_seg; + for (int jF = rp.nsMinF; jF < jMaxRZ; ++jF) { const int mmax = jF == 0 ? 1 : s.mpol; for (int m = 0; m < mmax; ++m) { @@ -93,10 +98,9 @@ void ForcesToFourier3DSymmFastPoloidal( double zmksc_n = -czmn_seg.dot(sinmui_seg); // Assemble effective R and Z forces from MHD and spectral condensation - // contributions. Materialize to avoid re-evaluation in each dot product, - // reusing per-thread scratch so the inner loop allocates nothing (and - // stays Enzyme-differentiable). Same arithmetic as a fresh .eval(). - thread_local Eigen::VectorXd tempR_seg, tempZ_seg; + // contributions. Materialize to avoid re-evaluation in each dot + // product, reusing the function-scope scratch so the inner loop + // allocates nothing. Same arithmetic as a fresh .eval(). tempR_seg.noalias() = armn_seg + xmpq[m] * frcon_seg; tempZ_seg.noalias() = azmn_seg + xmpq[m] * fzcon_seg; diff --git a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/ideal_mhd_model.cc b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/ideal_mhd_model.cc index 8fc57ad30..1232d6e41 100644 --- a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/ideal_mhd_model.cc +++ b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/ideal_mhd_model.cc @@ -22,6 +22,7 @@ #include "vmecpp/vmec/handover_storage/handover_storage.h" #include "vmecpp/vmec/ideal_mhd_model/bco_kernel.h" #include "vmecpp/vmec/ideal_mhd_model/bcontra_kernel.h" +#include "vmecpp/vmec/ideal_mhd_model/constraint_force_kernel.h" #include "vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h" #include "vmecpp/vmec/ideal_mhd_model/lambda_force_kernel.h" #include "vmecpp/vmec/ideal_mhd_model/metric_kernel.h" @@ -1607,8 +1608,8 @@ void IdealMhdModel::hybridLambdaForce() { #pragma omp barrier #endif // _OPENMP - // Lambda force on the full grid via the shared kernel (lambda_force_kernel.h), - // also used by the Enzyme autodiff path. + // Lambda force on the full grid via the shared kernel + // (lambda_force_kernel.h), also used by the Enzyme autodiff path. ComputeHybridLambdaForce( bsubu.data(), bsubv.data(), gvv.data(), gsqrt.data(), guv.data(), bsupu.data(), lu_e.data(), lu_o.data(), m_p_.sqrtSH.data(), @@ -2043,21 +2044,12 @@ absl::Status IdealMhdModel::constraintForceMultiplier() { } void IdealMhdModel::effectiveConstraintForce() { - // gConEff - - // no constraint on axis --> has no poloidal angle - int jMin = 0; - if (r_.nsMinF == 0) { - jMin = 1; - } - - for (int jF = std::max(jMin, r_.nsMinF); jF < r_.nsMaxFIncludingLcfs; ++jF) { - for (int kl = 0; kl < s_.nZnT; ++kl) { - int idx_kl = (jF - r_.nsMinF) * s_.nZnT + kl; - gConEff[idx_kl] = (rCon[idx_kl] - rCon0[idx_kl]) * ruFull[idx_kl] + - (zCon[idx_kl] - zCon0[idx_kl]) * zuFull[idx_kl]; - } // kl - } // jF + // gConEff via the shared kernel (constraint_force_kernel.h), also used by the + // Enzyme autodiff path. + ComputeEffectiveConstraintForce(rCon.data(), rCon0.data(), zCon.data(), + zCon0.data(), ruFull.data(), zuFull.data(), + s_.nZnT, r_.nsMinF, r_.nsMaxFIncludingLcfs, + gConEff.data()); } // perform Fourier-space bandpass filtering of constraint force @@ -2088,24 +2080,15 @@ void IdealMhdModel::assembleTotalForces() { } } - for (int jF = r_.nsMinF; jF < r_.nsMaxF; ++jF) { - for (int kl = 0; kl < s_.nZnT; ++kl) { - int idx_kl = (jF - r_.nsMinF) * s_.nZnT + kl; - - double brcon = (rCon[idx_kl] - rCon0[idx_kl]) * gCon[idx_kl]; - double bzcon = (zCon[idx_kl] - zCon0[idx_kl]) * gCon[idx_kl]; - - brmn_e[idx_kl] += brcon; - bzmn_e[idx_kl] += bzcon; - brmn_o[idx_kl] += brcon * m_p_.sqrtSF[jF - r_.nsMinF1]; - bzmn_o[idx_kl] += bzcon * m_p_.sqrtSF[jF - r_.nsMinF1]; - - frcon_e[idx_kl] = ruFull[idx_kl] * gCon[idx_kl]; - fzcon_e[idx_kl] = zuFull[idx_kl] * gCon[idx_kl]; - frcon_o[idx_kl] = frcon_e[idx_kl] * m_p_.sqrtSF[jF - r_.nsMinF1]; - fzcon_o[idx_kl] = fzcon_e[idx_kl] * m_p_.sqrtSF[jF - r_.nsMinF1]; - } - } + // add the bandpass-filtered constraint force to the MHD R/Z forces and write + // the constraint outputs via the shared kernel (constraint_force_kernel.h), + // also used by the Enzyme autodiff path. + AddConstraintForces(rCon.data(), rCon0.data(), zCon.data(), zCon0.data(), + ruFull.data(), zuFull.data(), gCon.data(), + m_p_.sqrtSF.data(), s_.nZnT, r_.nsMinF, r_.nsMinF1, + r_.nsMaxF, brmn_e.data(), brmn_o.data(), bzmn_e.data(), + bzmn_o.data(), frcon_e.data(), frcon_o.data(), + fzcon_e.data(), fzcon_o.data()); } void IdealMhdModel::forcesToFourier(FourierForces& m_physical_f) { diff --git a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h index 11cc24ecb..79ce3a39c 100644 --- a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h +++ b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h @@ -23,11 +23,14 @@ namespace vmecpp { // half-grid; sqrtSH is indexed jH - nsMinH. The half-grid point jH sits between // full-grid surfaces jH (inside) and jH + 1 (outside). inline void ComputeHalfGridJacobian( - const double* r1e, const double* r1o, const double* z1e, const double* z1o, - const double* rue, const double* ruo, const double* zue, const double* zuo, - const double* sqrtSH, double deltaS, double dSHalfDsInterp, int nZnT, - int nsMinF1, int nsMinH, int nsMaxH, double* r12, double* ru12, - double* zu12, double* rs, double* zs, double* tau) { + const double* __restrict r1e, const double* __restrict r1o, + const double* __restrict z1e, const double* __restrict z1o, + const double* __restrict rue, const double* __restrict ruo, + const double* __restrict zue, const double* __restrict zuo, + const double* __restrict sqrtSH, double deltaS, double dSHalfDsInterp, + int nZnT, int nsMinF1, int nsMinH, int nsMaxH, double* __restrict r12, + double* __restrict ru12, double* __restrict zu12, double* __restrict rs, + double* __restrict zs, double* __restrict tau) { for (int jH = nsMinH; jH < nsMaxH; ++jH) { const double sH = sqrtSH[jH - nsMinH]; for (int kl = 0; kl < nZnT; ++kl) { diff --git a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/lambda_force_kernel.h b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/lambda_force_kernel.h index be7d54599..105df3730 100644 --- a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/lambda_force_kernel.h +++ b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/lambda_force_kernel.h @@ -9,11 +9,11 @@ namespace vmecpp { // Hybrid lambda force on the full grid: blmn (and clmn in 3D). The covariant // field bsubv is interpolated from the half grid two ways (a plain average and -// an alternative from gvv/gsqrt times lambda plus, in 3D, guv*bsupu) and blended -// with the radialBlending profile. The radial sweep carries the inside half-grid -// point in scratch (bsubu_i, bsubv_i, gvv_gsqrt_i, guv_bsupu_i, each nZnT) and -// shifts it outward each surface. Allocation-free over flat buffers, shared -// between IdealMhdModel::hybridLambdaForce and the Enzyme autodiff path. +// an alternative from gvv/gsqrt times lambda plus, in 3D, guv*bsupu) and +// blended with the radialBlending profile. The radial sweep carries the inside +// half-grid point in scratch (bsubu_i, bsubv_i, gvv_gsqrt_i, guv_bsupu_i, each +// nZnT) and shifts it outward each surface. Allocation-free over flat buffers, +// shared between IdealMhdModel::hybridLambdaForce and the Enzyme autodiff path. // // Half-grid inputs (index (jH-nsMinH)*nZnT): bsubu, bsubv, gvv, gsqrt, guv, // bsupu. Full-grid lambda (index (jF-nsMinF1)*nZnT): lu_e, lu_o. Profiles: @@ -72,8 +72,8 @@ inline void ComputeHybridLambdaForce( } } - double gvv_gsqrt_lu_e = - 0.5 * (gvv_gsqrt_i[kl] + gvv_gsqrt_o) * lu_e[(jF - nsMinF1) * nZnT + kl]; + double gvv_gsqrt_lu_e = 0.5 * (gvv_gsqrt_i[kl] + gvv_gsqrt_o) * + lu_e[(jF - nsMinF1) * nZnT + kl]; double gvv_gsqrt_lu_o = 0.5 * (gvv_gsqrt_i[kl] * sqrtSHi + gvv_gsqrt_o * sqrtSHo) * lu_o[(jF - nsMinF1) * nZnT + kl]; diff --git a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/metric_kernel.h b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/metric_kernel.h index ede5caacd..d014ec606 100644 --- a/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/metric_kernel.h +++ b/src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/metric_kernel.h @@ -14,12 +14,16 @@ namespace vmecpp { // IdealMhdModel::computeMetricElements and the Enzyme autodiff path. Same // indexing conventions as jacobian_kernel.h. sqrtSF is indexed jF - nsMinF1. inline void ComputeMetricElements( - const double* r1e, const double* r1o, const double* rue, const double* ruo, - const double* zue, const double* zuo, const double* rve, const double* rvo, - const double* zve, const double* zvo, const double* tau, const double* r12, - const double* sqrtSF, const double* sqrtSH, bool lthreed, int nZnT, - int nsMinF1, int nsMinH, int nsMaxH, double* gsqrt, double* guu, - double* guv, double* gvv) { + const double* __restrict r1e, const double* __restrict r1o, + const double* __restrict rue, const double* __restrict ruo, + const double* __restrict zue, const double* __restrict zuo, + const double* __restrict rve, const double* __restrict rvo, + const double* __restrict zve, const double* __restrict zvo, + const double* __restrict tau, const double* __restrict r12, + const double* __restrict sqrtSF, const double* __restrict sqrtSH, + bool lthreed, int nZnT, int nsMinF1, int nsMinH, int nsMaxH, + double* __restrict gsqrt, double* __restrict guu, double* __restrict guv, + double* __restrict gvv) { for (int jH = nsMinH; jH < nsMaxH; ++jH) { const double sF_i = sqrtSF[jH - nsMinF1] * sqrtSF[jH - nsMinF1]; const double sF_o = sqrtSF[jH + 1 - nsMinF1] * sqrtSF[jH + 1 - nsMinF1]; diff --git a/tests/test_simsopt_compat.py b/tests/test_simsopt_compat.py index 609dffea2..1443d8cbc 100644 --- a/tests/test_simsopt_compat.py +++ b/tests/test_simsopt_compat.py @@ -303,6 +303,11 @@ def test_ensure_vmec2000_input_from_vmecpp_input(): if varname[1:-1] == "axis_": # these are called differently in VMEC2000, e.g. raxis_c -> raxis_cc varname_vmec2000 = f"{varname[:-1]}c{varname[-1]}" + if not hasattr(vmec2000.indata, varname_vmec2000): + # vmecpp-only field (e.g. free_boundary_method) with no counterpart + # in the legacy VMEC2000 INDATA namelist; not part of the common + # subset under test. + continue vmec2000_var = getattr(vmec2000.indata, varname_vmec2000) if vmecpp_var is None: