Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ jobs:
pytest benchmarks/test_benchmarks.py --benchmark-json=benchmark_results.json
- name: Compare benchmark result
if: github.event_name == 'pull_request'
# Skip the result upload/compare for fork PRs: their GITHUB_TOKEN is
# read-only, so comment-on-alert/auto-push hit 'Resource not accessible
# by integration'. The benchmarks still run above; only the write-back
# is skipped for forks.
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
uses: benchmark-action/github-action-benchmark@v1.21.0
with:
tool: "pytest"
Expand Down
28 changes: 23 additions & 5 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,33 @@ jobs:
run: |
# install VMEC++ deps as well as VMEC2000 deps (we need to import VMEC2000 in a test)
sudo apt-get update && sudo apt-get install -y build-essential cmake libnetcdf-dev liblapack-dev libomp-dev
- name: Cache the VMEC2000 wheel
if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
uses: actions/cache@v4
with:
path: /tmp/vmec2000-wheel
# Keyed on the pinned VMEC2000 source commit: rebuild only when it changes.
key: vmec2000-728af8bd6c79-cp310-ubuntu22.04
- name: Also install VMEC2000 (only on Ubuntu 22.04)
if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
run: |
# mpi4py is needed for VMEC2000
sudo apt-get install -y libopenmpi-dev
sudo apt-get install -y libopenmpi-dev libnetcdff-dev libscalapack-mpi-dev \
libopenblas-dev ninja-build
python -m pip install mpi4py
# custom wheel for VMEC2000, needed for some VMEC++/VMEC2000 compatibility tests
# NOTE: this wheel is only guaranteed to work on Ubuntu 22.04
python -m pip install vmec@https://anaconda.org/eguiraud-pf/vmec/0.0.6/download/vmec-0.0.6-cp310-cp310-linux_x86_64.whl
if ! ls /tmp/vmec2000-wheel/vmec-*.whl >/dev/null 2>&1; then
# Build with the SYSTEM cmake + ninja; do NOT pip-install the
# cmake/ninja wheels (they shadow the system cmake that
# scikit-build-core's editable vmecpp rebuild records).
python -m pip install numpy scikit-build f90wrap setuptools wheel
git clone https://github.com/hiddenSymmetries/VMEC2000.git /tmp/VMEC2000
git -C /tmp/VMEC2000 checkout 728af8bd6c796b36a0aa85fe298e507791e57c6e
cp /tmp/VMEC2000/cmake/machines/ubuntu.json /tmp/VMEC2000/cmake_config_file.json
LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
python -m pip wheel /tmp/VMEC2000 --no-build-isolation -w /tmp/vmec2000-wheel
fi
python -m pip install /tmp/vmec2000-wheel/vmec-*.whl
# fail loudly here if the binding is still broken, not in the test step
python -c "import vmec; print('VMEC2000 import OK')"
- name: Install package
run: |
# on Ubuntu we would not need this, but on MacOS we need to point CMake to gfortran-14 and gcc-14
Expand Down
7 changes: 3 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,9 @@ find_package(LAPACK REQUIRED)
FetchContent_Declare(
abseil-cpp
GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
# 20260107.1 LTS: required for Clang >= 21, where the 2024 pin fails to
# compile (absl::Nonnull SFINAE in absl/strings/ascii.cc). Clang is the
# compiler used for the Enzyme autodiff build.
GIT_TAG 20260107.1
# 20260107.1 LTS: older abseil fails to compile under Clang >= 21 (the
# Enzyme build) on absl::Nonnull SFINAE in absl/strings/ascii.cc.
GIT_TAG 255c84dadd029fd8ad25c5efb5933e47beaa00c7
GIT_SHALLOW TRUE
)
FetchContent_Declare(
Expand Down
225 changes: 158 additions & 67 deletions src/vmecpp/cpp/vmecpp/common/enzyme/local_force_hessian_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@

// Exact Hessian of VMEC's local force map via composed autodiff.
//
// The six shared force-chain kernels (Jacobian, metric, B^contra, B_cov,
// magnetic pressure, MHD force density) compose into the local map
// The shared force-chain kernels (Jacobian, metric, B^contra, B_cov, magnetic
// pressure, MHD force density, and the hybrid lambda force) compose into the
// local map
// g : real-space geometry -> real-space force density,
// the nonlinear core of VMEC's force. The full MHD force is Tᵀ . g . T with the
// the nonlinear core of VMEC's force. The full force is Tᵀ . g . T with the
// linear spectral transforms T, Tᵀ; the exact force Hessian-vector product is
// therefore Tᵀ . J_g . T . v, and J_g is what Enzyme provides here.
// therefore Tᵀ . J_g . T . v, and J_g is what Enzyme provides here. The
// remaining augmented term, the spectral-condensation constraint force, also
// carries a linear Fourier bandpass and is validated end-to-end against the
// finite-difference HVP in the pybind exact-HVP path.
//
// This test composes the six production kernels over flat buffers and takes the
// This test composes the production kernels over flat buffers and takes the
// Jacobian of g by forward and reverse mode, checks both against central finite
// differences and against each other, and times one forward Jacobian-vector
// pass against the two force evaluations a finite-difference Hessian-vector
Expand All @@ -30,29 +34,33 @@
#include "vmecpp/vmec/ideal_mhd_model/bco_kernel.h"
#include "vmecpp/vmec/ideal_mhd_model/bcontra_kernel.h"
#include "vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h"
#include "vmecpp/vmec/ideal_mhd_model/lambda_force_kernel.h"
#include "vmecpp/vmec/ideal_mhd_model/metric_kernel.h"
#include "vmecpp/vmec/ideal_mhd_model/mhdforce_kernel.h"
#include "vmecpp/vmec/ideal_mhd_model/pressure_kernel.h"

// Problem dimensions and the constant (non-differentiated) context.
struct Ctx {
int nZnT, nsH; // half-grid surfaces; full-grid has nsH + 1
int nFull, nHalf; // (nsH+1)*nZnT, nsH*nZnT
const double* sqrtSF; // [nsH+1]
const double* sqrtSH; // [nsH]
const double* chipH; // [nsH]
const double* presH; // [nsH]
int nZnT, nsH; // half-grid surfaces; full-grid has nsH + 1
int nFull, nHalf; // (nsH+1)*nZnT, nsH*nZnT
const double* sqrtSF; // [nsH+1]
const double* sqrtSH; // [nsH]
const double* chipH; // [nsH]
const double* presH; // [nsH]
const double* radialBlending; // [nsH+1]
double deltaS;
double lamscale;
bool lthreed;
};

// 16 geometry blocks (each nFull) packed into geom; 12 force blocks (each nHalf)
// in force; everything else is intermediate work sliced from work.
enum { kGeomBlocks = 16, kForceBlocks = 12 };
// 16 geometry blocks (each nFull) packed into geom; force blocks (each nHalf):
// 12 MHD force-density blocks + 4 lambda-force blocks (blmn_e/o, clmn_e/o).
// Everything else is intermediate work sliced from work.
enum { kGeomBlocks = 16, kForceBlocks = 16 };

// g: geometry -> force density, composing the six shared kernels.
// g: geometry -> force density, composing the MHD and lambda-force kernels.
__attribute__((noinline)) void LocalForce(const double* geom, double* work,
double* force, const Ctx* c) {
double* force, const Ctx* c) {
const int nF = c->nFull;
const int nH = c->nHalf;
const int nZnT = c->nZnT, nsH = c->nsH;
Expand All @@ -75,30 +83,45 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work,
const double* lvo = geom + 15 * nF;
// half-grid intermediates from work
double* p = work;
double* r12 = p; p += nH;
double* ru12 = p; p += nH;
double* zu12 = p; p += nH;
double* rs = p; p += nH;
double* zs = p; p += nH;
double* tau = p; p += nH;
double* gsqrt = p; p += nH;
double* guu = p; p += nH;
double* guv = p; p += nH;
double* gvv = p; p += nH;
double* bsupu = p; p += nH;
double* bsupv = p; p += nH;
double* bsubu = p; p += nH;
double* bsubv = p; p += nH;
double* tp = p; p += nH;
double* r12 = p;
p += nH;
double* ru12 = p;
p += nH;
double* zu12 = p;
p += nH;
double* rs = p;
p += nH;
double* zs = p;
p += nH;
double* tau = p;
p += nH;
double* gsqrt = p;
p += nH;
double* guu = p;
p += nH;
double* guv = p;
p += nH;
double* gvv = p;
p += nH;
double* bsupu = p;
p += nH;
double* bsupv = p;
p += nH;
double* bsubu = p;
p += nH;
double* bsubv = p;
p += nH;
double* tp = p;
p += nH;
// per-nZnT scratch for the force kernel (26 blocks)
double* sc = p; // 26 * nZnT

vmecpp::ComputeHalfGridJacobian(r1e, r1o, z1e, z1o, rue, ruo, zue, zuo, c->sqrtSH,
c->deltaS, /*dSHalfDsInterp=*/0.25, nZnT, 0, 0, nsH,
r12, ru12, zu12, rs, zs, tau);
vmecpp::ComputeMetricElements(r1e, r1o, rue, ruo, zue, zuo, rve, rvo, zve, zvo,
tau, r12, c->sqrtSF, c->sqrtSH, c->lthreed, nZnT,
0, 0, nsH, gsqrt, guu, guv, gvv);
vmecpp::ComputeHalfGridJacobian(
r1e, r1o, z1e, z1o, rue, ruo, zue, zuo, c->sqrtSH, c->deltaS,
/*dSHalfDsInterp=*/0.25, nZnT, 0, 0, nsH, r12, ru12, zu12, rs, zs, tau);
vmecpp::ComputeMetricElements(r1e, r1o, rue, ruo, zue, zuo, rve, rvo, zve,
zvo, tau, r12, c->sqrtSF, c->sqrtSH, c->lthreed,
nZnT, 0, 0, nsH, gsqrt, guu, guv, gvv);
vmecpp::ComputeBsupContra(lue, luo, lve, lvo, gsqrt, c->sqrtSH, c->lthreed,
nZnT, 0, 0, nsH, bsupu, bsupv);
for (int jH = 0; jH < nsH; ++jH) {
Expand All @@ -113,19 +136,67 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work,
for (int kl = 0; kl < nZnT; ++kl) tp[jH * nZnT + kl] += c->presH[jH];
}
double* s = sc;
double* P_i = s; s += nZnT; double* rup_i = s; s += nZnT;
double* zup_i = s; s += nZnT; double* rsp_i = s; s += nZnT;
double* zsp_i = s; s += nZnT; double* taup_i = s; s += nZnT;
double* gbubu_i = s; s += nZnT; double* gbubv_i = s; s += nZnT;
double* gbvbv_i = s; s += nZnT; double* P_o = s; s += nZnT;
double* rup_o = s; s += nZnT; double* zup_o = s; s += nZnT;
double* rsp_o = s; s += nZnT; double* zsp_o = s; s += nZnT;
double* taup_o = s; s += nZnT; double* gbubu_o = s; s += nZnT;
double* gbubv_o = s; s += nZnT; double* gbvbv_o = s; s += nZnT;
double* P_avg = s; s += nZnT; double* P_wavg = s; s += nZnT;
double* gbubu_avg = s; s += nZnT; double* gbubu_wavg = s; s += nZnT;
double* gbvbv_avg = s; s += nZnT; double* gbvbv_wavg = s; s += nZnT;
double* gbubv_avg = s; s += nZnT; double* gbubv_wavg = s; s += nZnT;
double* P_i = s;
s += nZnT;
double* rup_i = s;
s += nZnT;
double* zup_i = s;
s += nZnT;
double* rsp_i = s;
s += nZnT;
double* zsp_i = s;
s += nZnT;
double* taup_i = s;
s += nZnT;
double* gbubu_i = s;
s += nZnT;
double* gbubv_i = s;
s += nZnT;
double* gbvbv_i = s;
s += nZnT;
double* P_o = s;
s += nZnT;
double* rup_o = s;
s += nZnT;
double* zup_o = s;
s += nZnT;
double* rsp_o = s;
s += nZnT;
double* zsp_o = s;
s += nZnT;
double* taup_o = s;
s += nZnT;
double* gbubu_o = s;
s += nZnT;
double* gbubv_o = s;
s += nZnT;
double* gbvbv_o = s;
s += nZnT;
double* P_avg = s;
s += nZnT;
double* P_wavg = s;
s += nZnT;
double* gbubu_avg = s;
s += nZnT;
double* gbubu_wavg = s;
s += nZnT;
double* gbvbv_avg = s;
s += nZnT;
double* gbvbv_wavg = s;
s += nZnT;
double* gbubv_avg = s;
s += nZnT;
double* gbubv_wavg = s;
s += nZnT;
// lambda-force radial-sweep scratch (carried inside half-grid point)
double* bsubu_i = s;
s += nZnT;
double* bsubv_i = s;
s += nZnT;
double* gvv_gsqrt_i = s;
s += nZnT;
double* guv_bsupu_i = s;
s += nZnT;
double* armn_e = force + 0 * nH;
double* armn_o = force + 1 * nH;
double* azmn_e = force + 2 * nH;
Expand All @@ -139,17 +210,28 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work,
double* czmn_e = force + 10 * nH;
double* czmn_o = force + 11 * nH;
vmecpp::ComputeMHDForceDensity(
r1e, r1o, rue, ruo, zue, zuo, z1o, rve, rvo, zve, zvo, r12, ru12, zu12, rs,
zs, tau, tp, gsqrt, bsupu, bsupv, c->sqrtSF, c->sqrtSH, P_i, rup_i, zup_i,
rsp_i, zsp_i, taup_i, gbubu_i, gbubv_i, gbvbv_i, P_o, rup_o, zup_o, rsp_o,
zsp_o, taup_o, gbubu_o, gbubv_o, gbvbv_o, P_avg, P_wavg, gbubu_avg,
r1e, r1o, rue, ruo, zue, zuo, z1o, rve, rvo, zve, zvo, r12, ru12, zu12,
rs, zs, tau, tp, gsqrt, bsupu, bsupv, c->sqrtSF, c->sqrtSH, P_i, rup_i,
zup_i, rsp_i, zsp_i, taup_i, gbubu_i, gbubv_i, gbvbv_i, P_o, rup_o, zup_o,
rsp_o, zsp_o, taup_o, gbubu_o, gbubv_o, gbvbv_o, P_avg, P_wavg, gbubu_avg,
gbubu_wavg, gbvbv_avg, gbvbv_wavg, gbubv_avg, gbubv_wavg, c->deltaS, nZnT,
/*nsMinF=*/0, 0, 0, nsH, /*jMaxRZ=*/nsH, c->lthreed, armn_e, armn_o,
azmn_e, azmn_o, brmn_e, brmn_o, bzmn_e, bzmn_o, crmn_e, crmn_o, czmn_e,
czmn_o);
// lambda force (blmn_e/o, clmn_e/o) from the shared kernel
double* blmn_e = force + 12 * nH;
double* blmn_o = force + 13 * nH;
double* clmn_e = force + 14 * nH;
double* clmn_o = force + 15 * nH;
vmecpp::ComputeHybridLambdaForce(
bsubu, bsubv, gvv, gsqrt, guv, bsupu, lue, luo, c->sqrtSH, c->sqrtSF,
c->radialBlending, c->lamscale, c->lthreed, nZnT, /*nsMinF=*/0,
/*nsMinF1=*/0, /*nsMinH=*/0, nsH, /*nsMaxFIncludingLcfs=*/nsH, bsubu_i,
bsubv_i, gvv_gsqrt_i, guv_bsupu_i, blmn_e, blmn_o, clmn_e, clmn_o);
}

// Scalar objective L = 0.5 ||force||^2; work and force are caller-owned scratch.
// Scalar objective L = 0.5 ||force||^2; work and force are caller-owned
// scratch.
__attribute__((noinline)) double Loss(const double* geom, double* work,
double* force, const Ctx* c) {
LocalForce(geom, work, force, c);
Expand All @@ -167,11 +249,16 @@ int main() {
c.nFull = (nsH + 1) * nZnT;
c.nHalf = nsH * nZnT;
c.deltaS = 0.1;
c.lamscale = 0.7;
c.lthreed = true;
std::mt19937 rng(3);
std::uniform_real_distribution<double> d(0.5, 1.5), s(-1.0, 1.0);
std::vector<double> sqrtSF(nsH + 1), sqrtSH(nsH), chipH(nsH), presH(nsH);
for (int j = 0; j <= nsH; ++j) sqrtSF[j] = std::sqrt(0.05 + 0.9 * j / nsH);
std::vector<double> sqrtSF(nsH + 1), sqrtSH(nsH), chipH(nsH), presH(nsH),
radialBlending(nsH + 1);
for (int j = 0; j <= nsH; ++j) {
sqrtSF[j] = std::sqrt(0.05 + 0.9 * j / nsH);
radialBlending[j] = 0.3 + 0.4 * j / nsH;
}
for (int j = 0; j < nsH; ++j) {
sqrtSH[j] = std::sqrt(0.05 + 0.9 * (j + 0.5) / nsH);
chipH[j] = 0.3 + 0.1 * j;
Expand All @@ -181,9 +268,10 @@ int main() {
c.sqrtSH = sqrtSH.data();
c.chipH = chipH.data();
c.presH = presH.data();
c.radialBlending = radialBlending.data();

const int nG = kGeomBlocks * c.nFull;
const int nW = 15 * c.nHalf + 26 * nZnT;
const int nW = 15 * c.nHalf + 30 * nZnT;
const int nFc = kForceBlocks * c.nHalf;
std::vector<double> geom(nG), v(nG);
for (double& x : geom) x = d(rng);
Expand All @@ -206,17 +294,19 @@ int main() {
gp[i] = geom[i] + h * v[i];
gm[i] = geom[i] - h * v[i];
}
const double dfd =
(Loss(gp.data(), w2.data(), f2.data(), &c) -
Loss(gm.data(), w2.data(), f2.data(), &c)) /
(2 * h);
const double drev = std::inner_product(grev.begin(), grev.end(), v.begin(), 0.0);
const double dfd = (Loss(gp.data(), w2.data(), f2.data(), &c) -
Loss(gm.data(), w2.data(), f2.data(), &c)) /
(2 * h);
const double drev =
std::inner_product(grev.begin(), grev.end(), v.begin(), 0.0);
const double scale = std::fabs(dfd) + 1e-300;

printf("exact Hessian of VMEC local force map (composed 6 kernels)\n");
printf("exact Hessian of VMEC local force map (MHD + lambda kernels)\n");
printf(" geom dofs=%d force outputs=%d\n", nG, nFc);
printf(" reverse dL.v vs finite-diff : %.2e\n", std::fabs(drev - dfd) / scale);
printf(" forward dL.v vs finite-diff : %.2e\n", std::fabs(dfwd - dfd) / scale);
printf(" reverse dL.v vs finite-diff : %.2e\n",
std::fabs(drev - dfd) / scale);
printf(" forward dL.v vs finite-diff : %.2e\n",
std::fabs(dfwd - dfd) / scale);
printf(" forward / reverse agreement : %.2e\n",
std::fabs(dfwd - drev) / (std::fabs(drev) + 1e-300));

Expand All @@ -225,7 +315,8 @@ int main() {
for (int r = 0; r < reps; ++r) {
volatile double q = __enzyme_fwddiff<double>(
(void*)Loss, enzyme_dup, geom.data(), v.data(), enzyme_dup, work.data(),
dwork.data(), enzyme_dup, force.data(), dforce.data(), enzyme_const, &c);
dwork.data(), enzyme_dup, force.data(), dforce.data(), enzyme_const,
&c);
(void)q;
}
auto t1 = std::chrono::steady_clock::now();
Expand Down
Loading