Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ jobs:
pytest benchmarks/test_benchmarks.py --benchmark-json=benchmark_results.json
- name: Compare benchmark result
if: github.event_name == 'pull_request'
# Skip the result upload/compare for fork PRs: their GITHUB_TOKEN is
# read-only, so comment-on-alert/auto-push hit 'Resource not accessible
# by integration'. The benchmarks still run above; only the write-back
# is skipped for forks.
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
uses: benchmark-action/github-action-benchmark@v1.21.0
with:
tool: "pytest"
Expand Down
28 changes: 23 additions & 5 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,33 @@ jobs:
run: |
# install VMEC++ deps as well as VMEC2000 deps (we need to import VMEC2000 in a test)
sudo apt-get update && sudo apt-get install -y build-essential cmake libnetcdf-dev liblapack-dev libomp-dev
- name: Cache the VMEC2000 wheel
if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
uses: actions/cache@v4
with:
path: /tmp/vmec2000-wheel
# Keyed on the pinned VMEC2000 source commit: rebuild only when it changes.
key: vmec2000-728af8bd6c79-cp310-ubuntu22.04
- name: Also install VMEC2000 (only on Ubuntu 22.04)
if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
run: |
# mpi4py is needed for VMEC2000
sudo apt-get install -y libopenmpi-dev
sudo apt-get install -y libopenmpi-dev libnetcdff-dev libscalapack-mpi-dev \
libopenblas-dev ninja-build
python -m pip install mpi4py
# custom wheel for VMEC2000, needed for some VMEC++/VMEC2000 compatibility tests
# NOTE: this wheel is only guaranteed to work on Ubuntu 22.04
python -m pip install vmec@https://anaconda.org/eguiraud-pf/vmec/0.0.6/download/vmec-0.0.6-cp310-cp310-linux_x86_64.whl
if ! ls /tmp/vmec2000-wheel/vmec-*.whl >/dev/null 2>&1; then
# Build with the SYSTEM cmake + ninja; do NOT pip-install the
# cmake/ninja wheels (they shadow the system cmake that
# scikit-build-core's editable vmecpp rebuild records).
python -m pip install numpy scikit-build f90wrap setuptools wheel
git clone https://github.com/hiddenSymmetries/VMEC2000.git /tmp/VMEC2000
git -C /tmp/VMEC2000 checkout 728af8bd6c796b36a0aa85fe298e507791e57c6e
cp /tmp/VMEC2000/cmake/machines/ubuntu.json /tmp/VMEC2000/cmake_config_file.json
LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
python -m pip wheel /tmp/VMEC2000 --no-build-isolation -w /tmp/vmec2000-wheel
fi
python -m pip install /tmp/vmec2000-wheel/vmec-*.whl
# fail loudly here if the binding is still broken, not in the test step
python -c "import vmec; print('VMEC2000 import OK')"
- name: Install package
run: |
# on Ubuntu we would not need this, but on MacOS we need to point CMake to gfortran-14 and gcc-14
Expand Down
16 changes: 12 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,9 @@ find_package(LAPACK REQUIRED)
FetchContent_Declare(
abseil-cpp
GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
# 20260107.1 LTS: required for Clang >= 21, where the 2024 pin fails to
# compile (absl::Nonnull SFINAE in absl/strings/ascii.cc). Clang is the
# compiler used for the Enzyme autodiff build.
GIT_TAG 20260107.1
# 20260107.1 LTS: older abseil fails to compile under Clang >= 21 (the
# Enzyme build) on absl::Nonnull SFINAE in absl/strings/ascii.cc.
GIT_TAG 255c84dadd029fd8ad25c5efb5933e47beaa00c7
GIT_SHALLOW TRUE
)
FetchContent_Declare(
Expand Down Expand Up @@ -205,4 +204,13 @@ if(VMECPP_ENABLE_ENZYME)
target_compile_options(enzyme_smoke_test PRIVATE
-O2 -fplugin=${VMECPP_ENZYME_PLUGIN})
add_test(NAME enzyme_smoke COMMAND enzyme_smoke_test)

# Exact autodiff (forward and reverse) of a real VMEC nonlinear kernel: the
# half-grid Jacobian. Self-contained over flat buffers; checks Jv and J^T u
# against finite differences and against each other (adjoint identity).
add_executable(jacobian_kernel_autodiff_test
${PROJECT_SOURCE_DIR}/src/vmecpp/cpp/vmecpp/common/enzyme/jacobian_kernel_autodiff_test.cc)
target_compile_options(jacobian_kernel_autodiff_test PRIVATE
-O2 -fplugin=${VMECPP_ENZYME_PLUGIN})
add_test(NAME jacobian_kernel_autodiff COMMAND jacobian_kernel_autodiff_test)
endif()
172 changes: 172 additions & 0 deletions src/vmecpp/cpp/vmecpp/common/enzyme/jacobian_kernel_autodiff_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// SPDX-FileCopyrightText: 2024-present Proxima Fusion GmbH
// <info@proximafusion.com>
//
// SPDX-License-Identifier: MIT

// Exact autodiff of a real VMEC nonlinear kernel, forward vs reverse mode.
//
// JacobianKernel reproduces IdealMhdModel::computeJacobian: it maps the
// full-grid geometry (R, Z and their poloidal derivatives, even/odd parity) to
// the half-grid metric quantities r12, ru12, zu12, rs, zs and the Jacobian tau.
// tau is nonlinear in the geometry (products ru12*zs - rs*zu12, ...), so its
// Jacobian is a genuine building block of the MHD force Hessian (the force is
// the gradient of VMEC's functional; chain rule composes this kernel's Jacobian
// with the linear spectral transforms to give the Hessian-vector product).
//
// The kernel is written allocation-free over flat buffers (scalar locals,
// Eigen-free), which is the form Enzyme differentiates. We take:
// * a Jacobian-vector product J v by forward mode (__enzyme_fwddiff), and
// * a vector-Jacobian product J^T u by reverse mode (__enzyme_autodiff),
// check both against central finite differences, and time them so the
// forward/reverse trade-off is explicit.

#include <algorithm>
#include <chrono>
#include <cmath>
#include <cstdio>
#include <numeric>
#include <random>
#include <vector>

#include "vmecpp/common/enzyme/enzyme.h"
#include "vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h"

// Types used as Enzyme intrinsic arguments need external linkage (an anonymous
// namespace type cannot instantiate the __enzyme_* templates).

// JacobianKernel is the shared production kernel ComputeHalfGridJacobian
// (jacobian_kernel.h); differentiated here exactly as the solver uses it.

constexpr int kNgeom = 8; // r1e r1o z1e z1o rue ruo zue zuo
constexpr int kNout = 6; // r12 ru12 zu12 rs zs tau

struct Problem {
int nZnT, nsH, n_in, n_out;
std::vector<double> geom; // kNgeom blocks of (nsH+1)*nZnT
std::vector<double> sqrtSH;
double deltaS, dSHalfDsInterp;
};

Problem MakeProblem(int nZnT, int nsH) {
Problem p{nZnT, nsH, kNgeom * (nsH + 1) * nZnT, kNout * nsH * nZnT, {}, {},
0.0, 0.0};
std::mt19937 rng(7);
std::uniform_real_distribution<double> d(0.5, 1.5);
p.geom.resize(p.n_in);
for (double& x : p.geom) x = d(rng);
p.sqrtSH.resize(nsH);
for (int j = 0; j < nsH; ++j) p.sqrtSH[j] = std::sqrt(0.05 + 0.9 * j / nsH);
p.deltaS = 0.1;
p.dSHalfDsInterp = 0.25;
return p;
}

// Evaluate the kernel from a flat geometry buffer into a flat output buffer.
__attribute__((noinline)) void Eval(const double* geom, const Problem& p,
double* out) {
const int s = (p.nsH + 1) * p.nZnT;
const int o = p.nsH * p.nZnT;
vmecpp::ComputeHalfGridJacobian(
geom, geom + s, geom + 2 * s, geom + 3 * s, geom + 4 * s, geom + 5 * s,
geom + 6 * s, geom + 7 * s, p.sqrtSH.data(), p.deltaS, p.dSHalfDsInterp,
p.nZnT, /*nsMinF1=*/0, /*nsMinH=*/0,
/*nsMaxH=*/p.nsH, out, out + o, out + 2 * o, out + 3 * o, out + 4 * o,
out + 5 * o);
}

double MaxAbs(const std::vector<double>& a, const std::vector<double>& b) {
double m = 0.0;
for (size_t i = 0; i < a.size(); ++i)
m = std::fmax(m, std::fabs(a[i] - b[i]));
return m;
}

// Scalar objective L(geom) = 0.5 * sum of squares of all kernel outputs. out is
// caller-owned scratch (an intermediate of the differentiated call). This is
// the scalar-over-buffers form Enzyme differentiates in both modes.
__attribute__((noinline)) double Loss(const double* geom, double* out,
const Problem* p) {
Eval(geom, *p, out);
double s = 0.0;
for (int i = 0; i < p->n_out; ++i) s += 0.5 * out[i] * out[i];
return s;
}

int main() {
const Problem p = MakeProblem(/*nZnT=*/32, /*nsH=*/12);
std::mt19937 rng(11);
std::uniform_real_distribution<double> dist(-1.0, 1.0);
std::vector<double> v(p.n_in);
for (double& x : v) x = dist(rng);

// Reverse mode: full gradient dL/dgeom in one pass.
std::vector<double> g_rev(p.n_in, 0.0), out_s(p.n_out, 0.0),
out_sh(p.n_out, 0.0);
__enzyme_autodiff((void*)Loss, enzyme_dup, p.geom.data(), g_rev.data(),
enzyme_dup, out_s.data(), out_sh.data(), enzyme_const, &p);

// Forward mode: directional derivative dL . v in one pass.
std::vector<double> out_t(p.n_out, 0.0);
const double d_fwd = __enzyme_fwddiff<double>(
(void*)Loss, enzyme_dup, p.geom.data(), v.data(), enzyme_dup,
out_s.data(), out_t.data(), enzyme_const, &p);

// Central finite differences of the directional derivative.
const double h = 1e-6;
std::vector<double> gp(p.geom), gm(p.geom), os(p.n_out);
for (int i = 0; i < p.n_in; ++i) {
gp[i] = p.geom[i] + h * v[i];
gm[i] = p.geom[i] - h * v[i];
}
const double d_fd =
(Loss(gp.data(), os.data(), &p) - Loss(gm.data(), os.data(), &p)) /
(2 * h);
const double d_rev =
std::inner_product(g_rev.begin(), g_rev.end(), v.begin(), 0.0);

const double scale = std::fabs(d_fd) + 1e-300;
printf("exact autodiff of VMEC Jacobian kernel (n_in=%d n_out=%d)\n", p.n_in,
p.n_out);
printf(" reverse dL.v = %.10e (rel err vs FD %.2e)\n", d_rev,
std::fabs(d_rev - d_fd) / scale);
printf(" forward dL.v = %.10e (rel err vs FD %.2e)\n", d_fwd,
std::fabs(d_fwd - d_fd) / scale);
printf(" forward/reverse agreement: %.2e\n",
std::fabs(d_fwd - d_rev) / (std::fabs(d_rev) + 1e-300));

// Performance: reverse returns the whole gradient (n_in) in one pass; forward
// returns one directional derivative per pass. For a scalar objective reverse
// wins; for a single Jacobian/Hessian-vector product forward is the cheaper
// primitive. Time both.
const int reps = 2000;
auto t0 = std::chrono::steady_clock::now();
for (int r = 0; r < reps; ++r) {
std::fill(g_rev.begin(), g_rev.end(), 0.0);
__enzyme_autodiff((void*)Loss, enzyme_dup, p.geom.data(), g_rev.data(),
enzyme_dup, out_s.data(), out_sh.data(), enzyme_const,
&p);
}
auto t1 = std::chrono::steady_clock::now();
for (int r = 0; r < reps; ++r) {
out_t.assign(p.n_out, 0.0);
volatile double s = __enzyme_fwddiff<double>(
(void*)Loss, enzyme_dup, p.geom.data(), v.data(), enzyme_dup,
out_s.data(), out_t.data(), enzyme_const, &p);
(void)s;
}
auto t2 = std::chrono::steady_clock::now();
const double us_rev =
std::chrono::duration<double, std::micro>(t1 - t0).count() / reps;
const double us_fwd =
std::chrono::duration<double, std::micro>(t2 - t1).count() / reps;
printf(
" performance: reverse %.2f us/pass (full gradient), forward %.2f "
"us/pass (one direction)\n",
us_rev, us_fwd);

const bool ok = std::fabs(d_rev - d_fd) < 1e-5 * scale &&
std::fabs(d_fwd - d_fd) < 1e-5 * scale &&
std::fabs(d_fwd - d_rev) < 1e-9 * (std::fabs(d_rev) + 1e-300);
printf("%s\n", ok ? "PASS" : "FAIL");
return ok ? 0 : 1;
}
11 changes: 10 additions & 1 deletion src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,16 @@ cc_test(
cc_library(
name = "ideal_mhd_model",
srcs = ["ideal_mhd_model.cc"],
hdrs = ["ideal_mhd_model.h", "dft_data.h"],
# The shared force-chain kernels and the autodiff composition/JVP header are
# header-only and included by ideal_mhd_model.cc; declare whatever exists on
# this branch so the bazel sandbox can see them. The Enzyme JVP entry point
# (exact_force_jvp.cc) is built only by the CMake VMECPP_ENABLE_ENZYME path,
# not by bazel; its use in ideal_mhd_model.cc is guarded by that define.
hdrs = ["ideal_mhd_model.h", "dft_data.h"] + glob([
"*_kernel.h",
"local_force_composition.h",
"exact_force_jvp.h",
]),
visibility = ["//visibility:public"],
deps = [
":dft_toroidal",
Expand Down
106 changes: 21 additions & 85 deletions src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/ideal_mhd_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "vmecpp/common/util/util.h"
#include "vmecpp/vmec/fourier_geometry/fourier_geometry.h"
#include "vmecpp/vmec/handover_storage/handover_storage.h"
#include "vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h"
#include "vmecpp/vmec/radial_partitioning/radial_partitioning.h"
#include "vmecpp/vmec/radial_profiles/radial_profiles.h"
#include "vmecpp/vmec/vmec_constants/vmec_algorithm_constants.h"
Expand Down Expand Up @@ -1194,95 +1195,30 @@ void IdealMhdModel::rzConIntoVolume() {
}

void IdealMhdModel::computeJacobian() {
// r12, ru12, zu12, rs, zs, tau

// Half-grid r12, ru12, zu12, rs, zs and the Jacobian tau. The arithmetic
// lives in the shared, allocation-free kernel (jacobian_kernel.h) so the
// solver and the Enzyme autodiff test use one implementation.
ComputeHalfGridJacobian(
r1_e.data(), r1_o.data(), z1_e.data(), z1_o.data(), ru_e.data(),
ru_o.data(), zu_e.data(), zu_o.data(), m_p_.sqrtSH.data(), m_fc_.deltaS,
dSHalfDsInterp, s_.nZnT, r_.nsMinF1, r_.nsMinH, r_.nsMaxH, r12.data(),
ru12.data(), zu12.data(), rs.data(), zs.data(), tau.data());

// Jacobian sign check: same running min/max over tau as before, now scanned
// after the kernel (identical values, hence identical verdict).
double minTau = 0.0;
double maxTau = 0.0;

// contributions from full-grid surface _i_nside j-th half-grid surface
int j0 = r_.nsMinF1;
for (int kl = 0; kl < s_.nZnT; ++kl) {
m_ls_.r1e_i[kl] = r1_e[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.r1o_i[kl] = r1_o[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.z1e_i[kl] = z1_e[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.z1o_i[kl] = z1_o[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.rue_i[kl] = ru_e[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.ruo_i[kl] = ru_o[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.zue_i[kl] = zu_e[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.zuo_i[kl] = zu_o[(j0 - r_.nsMinF1) * s_.nZnT + kl];
const int nTau = (r_.nsMaxH - r_.nsMinH) * s_.nZnT;
for (int i = 0; i < nTau; ++i) {
const double t = tau[i];
if (t < minTau || minTau == 0.0) {
minTau = t;
}
if (t > maxTau || maxTau == 0.0) {
maxTau = t;
}
}

for (int jH = r_.nsMinH; jH < r_.nsMaxH; ++jH) {
// sqrt(s) on j-th half-grid pos
double sqrtSH = m_p_.sqrtSH[jH - r_.nsMinH];

for (int kl = 0; kl < s_.nZnT; ++kl) {
// contributions from full-grid surface _o_utside j-th half-grid surface
double r1e_o = r1_e[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double r1o_o = r1_o[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double z1e_o = z1_e[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double z1o_o = z1_o[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double rue_o = ru_e[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double ruo_o = ru_o[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double zue_o = zu_e[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double zuo_o = zu_o[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];

int iHalf = (jH - r_.nsMinH) * s_.nZnT + kl;

// R on half-grid
r12[iHalf] = 0.5 * ((m_ls_.r1e_i[kl] + r1e_o) +
sqrtSH * (m_ls_.r1o_i[kl] + r1o_o));

// dRdTheta on half-grid
ru12[iHalf] = 0.5 * ((m_ls_.rue_i[kl] + rue_o) +
sqrtSH * (m_ls_.ruo_i[kl] + ruo_o));

// dZdTheta on half-grid
zu12[iHalf] = 0.5 * ((m_ls_.zue_i[kl] + zue_o) +
sqrtSH * (m_ls_.zuo_i[kl] + zuo_o));

// \tilde{dRds} on half-grid
rs[iHalf] =
((r1e_o - m_ls_.r1e_i[kl]) + sqrtSH * (r1o_o - m_ls_.r1o_i[kl])) /
m_fc_.deltaS;

// \tilde{dZds} on half-grid
zs[iHalf] =
((z1e_o - m_ls_.z1e_i[kl]) + sqrtSH * (z1o_o - m_ls_.z1o_i[kl])) /
m_fc_.deltaS;

// sqrt(g)/R on half-grid: assemble as governed by product rule
double tau1 = ru12[iHalf] * zs[iHalf] - rs[iHalf] * zu12[iHalf];
double tau2 = ruo_o * z1o_o + m_ls_.ruo_i[kl] * m_ls_.z1o_i[kl] -
zuo_o * r1o_o - m_ls_.zuo_i[kl] * m_ls_.r1o_i[kl] +
(rue_o * z1o_o + m_ls_.rue_i[kl] * m_ls_.z1o_i[kl] -
zue_o * r1o_o - m_ls_.zue_i[kl] * m_ls_.r1o_i[kl]) /
sqrtSH;
double tau_val = tau1 + dSHalfDsInterp * tau2;

if (tau_val < minTau || minTau == 0.0) {
minTau = tau_val;
}
if (tau_val > maxTau || maxTau == 0.0) {
maxTau = tau_val;
}

tau[iHalf] = tau_val;

// hand over to next iteration of radial loop
// --> what was outside in this loop iteration will be inside for next
// half-grid location
m_ls_.r1e_i[kl] = r1e_o;
m_ls_.r1o_i[kl] = r1o_o;
m_ls_.z1e_i[kl] = z1e_o;
m_ls_.z1o_i[kl] = z1o_o;
m_ls_.rue_i[kl] = rue_o;
m_ls_.ruo_i[kl] = ruo_o;
m_ls_.zue_i[kl] = zue_o;
m_ls_.zuo_i[kl] = zuo_o;
} // kl
} // j

bool localBadJacobian =
(minTau * maxTau < 0.0) || !std::isfinite(minTau * maxTau);

Expand Down
Loading
Loading