Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ jobs:
pytest benchmarks/test_benchmarks.py --benchmark-json=benchmark_results.json
- name: Compare benchmark result
if: github.event_name == 'pull_request'
# Skip the result upload/compare for fork PRs: their GITHUB_TOKEN is
# read-only, so comment-on-alert/auto-push hit 'Resource not accessible
# by integration'. The benchmarks still run above; only the write-back
# is skipped for forks.
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
uses: benchmark-action/github-action-benchmark@v1.21.0
with:
tool: "pytest"
Expand Down
28 changes: 23 additions & 5 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,33 @@ jobs:
run: |
# install VMEC++ deps as well as VMEC2000 deps (we need to import VMEC2000 in a test)
sudo apt-get update && sudo apt-get install -y build-essential cmake libnetcdf-dev liblapack-dev libomp-dev
- name: Cache the VMEC2000 wheel
if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
uses: actions/cache@v4
with:
path: /tmp/vmec2000-wheel
# Keyed on the pinned VMEC2000 source commit: rebuild only when it changes.
key: vmec2000-728af8bd6c79-cp310-ubuntu22.04
- name: Also install VMEC2000 (only on Ubuntu 22.04)
if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
run: |
# mpi4py is needed for VMEC2000
sudo apt-get install -y libopenmpi-dev
sudo apt-get install -y libopenmpi-dev libnetcdff-dev libscalapack-mpi-dev \
libopenblas-dev ninja-build
python -m pip install mpi4py
# custom wheel for VMEC2000, needed for some VMEC++/VMEC2000 compatibility tests
# NOTE: this wheel is only guaranteed to work on Ubuntu 22.04
python -m pip install vmec@https://anaconda.org/eguiraud-pf/vmec/0.0.6/download/vmec-0.0.6-cp310-cp310-linux_x86_64.whl
if ! ls /tmp/vmec2000-wheel/vmec-*.whl >/dev/null 2>&1; then
# Build with the SYSTEM cmake + ninja; do NOT pip-install the
# cmake/ninja wheels (they shadow the system cmake that
# scikit-build-core's editable vmecpp rebuild records).
python -m pip install numpy scikit-build f90wrap setuptools wheel
git clone https://github.com/hiddenSymmetries/VMEC2000.git /tmp/VMEC2000
git -C /tmp/VMEC2000 checkout 728af8bd6c796b36a0aa85fe298e507791e57c6e
cp /tmp/VMEC2000/cmake/machines/ubuntu.json /tmp/VMEC2000/cmake_config_file.json
LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
python -m pip wheel /tmp/VMEC2000 --no-build-isolation -w /tmp/vmec2000-wheel
fi
python -m pip install /tmp/vmec2000-wheel/vmec-*.whl
# fail loudly here if the binding is still broken, not in the test step
python -c "import vmec; print('VMEC2000 import OK')"
- name: Install package
run: |
# on Ubuntu we would not need this, but on MacOS we need to point CMake to gfortran-14 and gcc-14
Expand Down
7 changes: 3 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,9 @@ find_package(LAPACK REQUIRED)
FetchContent_Declare(
abseil-cpp
GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
# 20260107.1 LTS: required for Clang >= 21, where the 2024 pin fails to
# compile (absl::Nonnull SFINAE in absl/strings/ascii.cc). Clang is the
# compiler used for the Enzyme autodiff build.
GIT_TAG 20260107.1
# 20260107.1 LTS: older abseil fails to compile under Clang >= 21 (the
# Enzyme build) on absl::Nonnull SFINAE in absl/strings/ascii.cc.
GIT_TAG 255c84dadd029fd8ad25c5efb5933e47beaa00c7
GIT_SHALLOW TRUE
)
FetchContent_Declare(
Expand Down
162 changes: 110 additions & 52 deletions src/vmecpp/cpp/vmecpp/common/enzyme/local_force_hessian_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@

// Problem dimensions and the constant (non-differentiated) context.
struct Ctx {
int nZnT, nsH; // half-grid surfaces; full-grid has nsH + 1
int nFull, nHalf; // (nsH+1)*nZnT, nsH*nZnT
int nZnT, nsH; // half-grid surfaces; full-grid has nsH + 1
int nFull, nHalf; // (nsH+1)*nZnT, nsH*nZnT
const double* sqrtSF; // [nsH+1]
const double* sqrtSH; // [nsH]
const double* chipH; // [nsH]
Expand All @@ -46,13 +46,13 @@ struct Ctx {
bool lthreed;
};

// 16 geometry blocks (each nFull) packed into geom; 12 force blocks (each nHalf)
// in force; everything else is intermediate work sliced from work.
// 16 geometry blocks (each nFull) packed into geom; 12 force blocks (each
// nHalf) in force; everything else is intermediate work sliced from work.
enum { kGeomBlocks = 16, kForceBlocks = 12 };

// g: geometry -> force density, composing the six shared kernels.
__attribute__((noinline)) void LocalForce(const double* geom, double* work,
double* force, const Ctx* c) {
double* force, const Ctx* c) {
const int nF = c->nFull;
const int nH = c->nHalf;
const int nZnT = c->nZnT, nsH = c->nsH;
Expand All @@ -75,30 +75,45 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work,
const double* lvo = geom + 15 * nF;
// half-grid intermediates from work
double* p = work;
double* r12 = p; p += nH;
double* ru12 = p; p += nH;
double* zu12 = p; p += nH;
double* rs = p; p += nH;
double* zs = p; p += nH;
double* tau = p; p += nH;
double* gsqrt = p; p += nH;
double* guu = p; p += nH;
double* guv = p; p += nH;
double* gvv = p; p += nH;
double* bsupu = p; p += nH;
double* bsupv = p; p += nH;
double* bsubu = p; p += nH;
double* bsubv = p; p += nH;
double* tp = p; p += nH;
double* r12 = p;
p += nH;
double* ru12 = p;
p += nH;
double* zu12 = p;
p += nH;
double* rs = p;
p += nH;
double* zs = p;
p += nH;
double* tau = p;
p += nH;
double* gsqrt = p;
p += nH;
double* guu = p;
p += nH;
double* guv = p;
p += nH;
double* gvv = p;
p += nH;
double* bsupu = p;
p += nH;
double* bsupv = p;
p += nH;
double* bsubu = p;
p += nH;
double* bsubv = p;
p += nH;
double* tp = p;
p += nH;
// per-nZnT scratch for the force kernel (26 blocks)
double* sc = p; // 26 * nZnT

vmecpp::ComputeHalfGridJacobian(r1e, r1o, z1e, z1o, rue, ruo, zue, zuo, c->sqrtSH,
c->deltaS, /*dSHalfDsInterp=*/0.25, nZnT, 0, 0, nsH,
r12, ru12, zu12, rs, zs, tau);
vmecpp::ComputeMetricElements(r1e, r1o, rue, ruo, zue, zuo, rve, rvo, zve, zvo,
tau, r12, c->sqrtSF, c->sqrtSH, c->lthreed, nZnT,
0, 0, nsH, gsqrt, guu, guv, gvv);
vmecpp::ComputeHalfGridJacobian(
r1e, r1o, z1e, z1o, rue, ruo, zue, zuo, c->sqrtSH, c->deltaS,
/*dSHalfDsInterp=*/0.25, nZnT, 0, 0, nsH, r12, ru12, zu12, rs, zs, tau);
vmecpp::ComputeMetricElements(r1e, r1o, rue, ruo, zue, zuo, rve, rvo, zve,
zvo, tau, r12, c->sqrtSF, c->sqrtSH, c->lthreed,
nZnT, 0, 0, nsH, gsqrt, guu, guv, gvv);
vmecpp::ComputeBsupContra(lue, luo, lve, lvo, gsqrt, c->sqrtSH, c->lthreed,
nZnT, 0, 0, nsH, bsupu, bsupv);
for (int jH = 0; jH < nsH; ++jH) {
Expand All @@ -113,19 +128,58 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work,
for (int kl = 0; kl < nZnT; ++kl) tp[jH * nZnT + kl] += c->presH[jH];
}
double* s = sc;
double* P_i = s; s += nZnT; double* rup_i = s; s += nZnT;
double* zup_i = s; s += nZnT; double* rsp_i = s; s += nZnT;
double* zsp_i = s; s += nZnT; double* taup_i = s; s += nZnT;
double* gbubu_i = s; s += nZnT; double* gbubv_i = s; s += nZnT;
double* gbvbv_i = s; s += nZnT; double* P_o = s; s += nZnT;
double* rup_o = s; s += nZnT; double* zup_o = s; s += nZnT;
double* rsp_o = s; s += nZnT; double* zsp_o = s; s += nZnT;
double* taup_o = s; s += nZnT; double* gbubu_o = s; s += nZnT;
double* gbubv_o = s; s += nZnT; double* gbvbv_o = s; s += nZnT;
double* P_avg = s; s += nZnT; double* P_wavg = s; s += nZnT;
double* gbubu_avg = s; s += nZnT; double* gbubu_wavg = s; s += nZnT;
double* gbvbv_avg = s; s += nZnT; double* gbvbv_wavg = s; s += nZnT;
double* gbubv_avg = s; s += nZnT; double* gbubv_wavg = s; s += nZnT;
double* P_i = s;
s += nZnT;
double* rup_i = s;
s += nZnT;
double* zup_i = s;
s += nZnT;
double* rsp_i = s;
s += nZnT;
double* zsp_i = s;
s += nZnT;
double* taup_i = s;
s += nZnT;
double* gbubu_i = s;
s += nZnT;
double* gbubv_i = s;
s += nZnT;
double* gbvbv_i = s;
s += nZnT;
double* P_o = s;
s += nZnT;
double* rup_o = s;
s += nZnT;
double* zup_o = s;
s += nZnT;
double* rsp_o = s;
s += nZnT;
double* zsp_o = s;
s += nZnT;
double* taup_o = s;
s += nZnT;
double* gbubu_o = s;
s += nZnT;
double* gbubv_o = s;
s += nZnT;
double* gbvbv_o = s;
s += nZnT;
double* P_avg = s;
s += nZnT;
double* P_wavg = s;
s += nZnT;
double* gbubu_avg = s;
s += nZnT;
double* gbubu_wavg = s;
s += nZnT;
double* gbvbv_avg = s;
s += nZnT;
double* gbvbv_wavg = s;
s += nZnT;
double* gbubv_avg = s;
s += nZnT;
double* gbubv_wavg = s;
s += nZnT;
double* armn_e = force + 0 * nH;
double* armn_o = force + 1 * nH;
double* azmn_e = force + 2 * nH;
Expand All @@ -139,17 +193,18 @@ __attribute__((noinline)) void LocalForce(const double* geom, double* work,
double* czmn_e = force + 10 * nH;
double* czmn_o = force + 11 * nH;
vmecpp::ComputeMHDForceDensity(
r1e, r1o, rue, ruo, zue, zuo, z1o, rve, rvo, zve, zvo, r12, ru12, zu12, rs,
zs, tau, tp, gsqrt, bsupu, bsupv, c->sqrtSF, c->sqrtSH, P_i, rup_i, zup_i,
rsp_i, zsp_i, taup_i, gbubu_i, gbubv_i, gbvbv_i, P_o, rup_o, zup_o, rsp_o,
zsp_o, taup_o, gbubu_o, gbubv_o, gbvbv_o, P_avg, P_wavg, gbubu_avg,
r1e, r1o, rue, ruo, zue, zuo, z1o, rve, rvo, zve, zvo, r12, ru12, zu12,
rs, zs, tau, tp, gsqrt, bsupu, bsupv, c->sqrtSF, c->sqrtSH, P_i, rup_i,
zup_i, rsp_i, zsp_i, taup_i, gbubu_i, gbubv_i, gbvbv_i, P_o, rup_o, zup_o,
rsp_o, zsp_o, taup_o, gbubu_o, gbubv_o, gbvbv_o, P_avg, P_wavg, gbubu_avg,
gbubu_wavg, gbvbv_avg, gbvbv_wavg, gbubv_avg, gbubv_wavg, c->deltaS, nZnT,
/*nsMinF=*/0, 0, 0, nsH, /*jMaxRZ=*/nsH, c->lthreed, armn_e, armn_o,
azmn_e, azmn_o, brmn_e, brmn_o, bzmn_e, bzmn_o, crmn_e, crmn_o, czmn_e,
czmn_o);
}

// Scalar objective L = 0.5 ||force||^2; work and force are caller-owned scratch.
// Scalar objective L = 0.5 ||force||^2; work and force are caller-owned
// scratch.
__attribute__((noinline)) double Loss(const double* geom, double* work,
double* force, const Ctx* c) {
LocalForce(geom, work, force, c);
Expand Down Expand Up @@ -206,17 +261,19 @@ int main() {
gp[i] = geom[i] + h * v[i];
gm[i] = geom[i] - h * v[i];
}
const double dfd =
(Loss(gp.data(), w2.data(), f2.data(), &c) -
Loss(gm.data(), w2.data(), f2.data(), &c)) /
(2 * h);
const double drev = std::inner_product(grev.begin(), grev.end(), v.begin(), 0.0);
const double dfd = (Loss(gp.data(), w2.data(), f2.data(), &c) -
Loss(gm.data(), w2.data(), f2.data(), &c)) /
(2 * h);
const double drev =
std::inner_product(grev.begin(), grev.end(), v.begin(), 0.0);
const double scale = std::fabs(dfd) + 1e-300;

printf("exact Hessian of VMEC local force map (composed 6 kernels)\n");
printf(" geom dofs=%d force outputs=%d\n", nG, nFc);
printf(" reverse dL.v vs finite-diff : %.2e\n", std::fabs(drev - dfd) / scale);
printf(" forward dL.v vs finite-diff : %.2e\n", std::fabs(dfwd - dfd) / scale);
printf(" reverse dL.v vs finite-diff : %.2e\n",
std::fabs(drev - dfd) / scale);
printf(" forward dL.v vs finite-diff : %.2e\n",
std::fabs(dfwd - dfd) / scale);
printf(" forward / reverse agreement : %.2e\n",
std::fabs(dfwd - drev) / (std::fabs(drev) + 1e-300));

Expand All @@ -225,7 +282,8 @@ int main() {
for (int r = 0; r < reps; ++r) {
volatile double q = __enzyme_fwddiff<double>(
(void*)Loss, enzyme_dup, geom.data(), v.data(), enzyme_dup, work.data(),
dwork.data(), enzyme_dup, force.data(), dforce.data(), enzyme_const, &c);
dwork.data(), enzyme_dup, force.data(), dforce.data(), enzyme_const,
&c);
(void)q;
}
auto t1 = std::chrono::steady_clock::now();
Expand Down
11 changes: 10 additions & 1 deletion src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,16 @@ cc_test(
cc_library(
name = "ideal_mhd_model",
srcs = ["ideal_mhd_model.cc"],
hdrs = ["ideal_mhd_model.h", "dft_data.h"],
# The shared force-chain kernels and the autodiff composition/JVP header are
# header-only and included by ideal_mhd_model.cc; declare whatever exists on
# this branch so the bazel sandbox can see them. The Enzyme JVP entry point
# (exact_force_jvp.cc) is built only by the CMake VMECPP_ENABLE_ENZYME path,
# not by bazel; its use in ideal_mhd_model.cc is guarded by that define.
hdrs = ["ideal_mhd_model.h", "dft_data.h"] + glob([
"*_kernel.h",
"local_force_composition.h",
"exact_force_jvp.h",
]),
visibility = ["//visibility:public"],
deps = [
":dft_toroidal",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// SPDX-FileCopyrightText: 2024-present Proxima Fusion GmbH
// <info@proximafusion.com>
//
// SPDX-License-Identifier: MIT
#ifndef VMECPP_VMEC_IDEAL_MHD_MODEL_CONSTRAINT_FORCE_KERNEL_H_
#define VMECPP_VMEC_IDEAL_MHD_MODEL_CONSTRAINT_FORCE_KERNEL_H_

#include <algorithm>

namespace vmecpp {

// The two local (non-transform) pieces of VMEC's spectral-condensation
// constraint force. The Fourier-space bandpass between them is the separate
// shared free function deAliasConstraintForce. All full-grid buffers are
// indexed (jF-nsMinF)*nZnT except sqrtSF, which is indexed (jF-nsMinF1).
// Allocation-free, shared between the solver and the Enzyme autodiff path.

// Effective constraint force gConEff = (rCon - rCon0) ru + (zCon - zCon0) zu.
// No constraint on the axis (it has no poloidal angle), so the axis surface is
// skipped, matching deAliasConstraintForce.
inline void ComputeEffectiveConstraintForce(
const double* rCon, const double* rCon0, const double* zCon,
const double* zCon0, const double* ruFull, const double* zuFull, int nZnT,
int nsMinF, int nsMaxFIncludingLcfs, double* gConEff) {
int jMin = 0;
if (nsMinF == 0) {
jMin = 1;
}
for (int jF = std::max(jMin, nsMinF); jF < nsMaxFIncludingLcfs; ++jF) {
for (int kl = 0; kl < nZnT; ++kl) {
int idx_kl = (jF - nsMinF) * nZnT + kl;
gConEff[idx_kl] = (rCon[idx_kl] - rCon0[idx_kl]) * ruFull[idx_kl] +
(zCon[idx_kl] - zCon0[idx_kl]) * zuFull[idx_kl];
} // kl
} // jF
}

// Add the bandpass-filtered constraint force gCon back into the MHD R/Z forces
// (brmn, bzmn) and write the constraint-force outputs (frcon, fzcon).
inline void AddConstraintForces(
const double* rCon, const double* rCon0, const double* zCon,
const double* zCon0, const double* ruFull, const double* zuFull,
const double* gCon, const double* sqrtSF, int nZnT, int nsMinF, int nsMinF1,
int nsMaxF, double* brmn_e, double* brmn_o, double* bzmn_e, double* bzmn_o,
double* frcon_e, double* frcon_o, double* fzcon_e, double* fzcon_o) {
for (int jF = nsMinF; jF < nsMaxF; ++jF) {
for (int kl = 0; kl < nZnT; ++kl) {
int idx_kl = (jF - nsMinF) * nZnT + kl;

double brcon = (rCon[idx_kl] - rCon0[idx_kl]) * gCon[idx_kl];
double bzcon = (zCon[idx_kl] - zCon0[idx_kl]) * gCon[idx_kl];

brmn_e[idx_kl] += brcon;
bzmn_e[idx_kl] += bzcon;
brmn_o[idx_kl] += brcon * sqrtSF[jF - nsMinF1];
bzmn_o[idx_kl] += bzcon * sqrtSF[jF - nsMinF1];

frcon_e[idx_kl] = ruFull[idx_kl] * gCon[idx_kl];
fzcon_e[idx_kl] = zuFull[idx_kl] * gCon[idx_kl];
frcon_o[idx_kl] = frcon_e[idx_kl] * sqrtSF[jF - nsMinF1];
fzcon_o[idx_kl] = fzcon_e[idx_kl] * sqrtSF[jF - nsMinF1];
} // kl
} // jF
}

} // namespace vmecpp

#endif // VMECPP_VMEC_IDEAL_MHD_MODEL_CONSTRAINT_FORCE_KERNEL_H_
7 changes: 4 additions & 3 deletions src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/dft_toroidal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ void ForcesToFourier3DSymmFastPoloidal(
double zmksc_n = -czmn_seg.dot(sinmui_seg);

// Assemble effective R and Z forces from MHD and spectral condensation
// contributions. Materialize to avoid re-evaluation in each dot product,
// reusing per-thread scratch so the inner loop allocates nothing (and
// stays Enzyme-differentiable). Same arithmetic as a fresh .eval().
// contributions. Materialize to avoid re-evaluation in each dot
// product, reusing per-thread scratch so the inner loop allocates
// nothing (and stays Enzyme-differentiable). Same arithmetic as a fresh
// .eval().
thread_local Eigen::VectorXd tempR_seg, tempZ_seg;
tempR_seg.noalias() = armn_seg + xmpq[m] * frcon_seg;
tempZ_seg.noalias() = azmn_seg + xmpq[m] * fzcon_seg;
Expand Down
Loading
Loading