Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ jobs:
pytest benchmarks/test_benchmarks.py --benchmark-json=benchmark_results.json
- name: Compare benchmark result
if: github.event_name == 'pull_request'
# Skip the result upload/compare for fork PRs: their GITHUB_TOKEN is
# read-only, so comment-on-alert/auto-push hit 'Resource not accessible
# by integration'. The benchmarks still run above; only the write-back
# is skipped for forks.
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
uses: benchmark-action/github-action-benchmark@v1.21.0
with:
tool: "pytest"
Expand Down
28 changes: 23 additions & 5 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,33 @@ jobs:
run: |
# install VMEC++ deps as well as VMEC2000 deps (we need to import VMEC2000 in a test)
sudo apt-get update && sudo apt-get install -y build-essential cmake libnetcdf-dev liblapack-dev libomp-dev
- name: Cache the VMEC2000 wheel
if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
uses: actions/cache@v4
with:
path: /tmp/vmec2000-wheel
# Keyed on the pinned VMEC2000 source commit: rebuild only when it changes.
key: vmec2000-728af8bd6c79-cp310-ubuntu22.04
- name: Also install VMEC2000 (only on Ubuntu 22.04)
if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
run: |
# mpi4py is needed for VMEC2000
sudo apt-get install -y libopenmpi-dev
sudo apt-get install -y libopenmpi-dev libnetcdff-dev libscalapack-mpi-dev \
libopenblas-dev ninja-build
python -m pip install mpi4py
# custom wheel for VMEC2000, needed for some VMEC++/VMEC2000 compatibility tests
# NOTE: this wheel is only guaranteed to work on Ubuntu 22.04
python -m pip install vmec@https://anaconda.org/eguiraud-pf/vmec/0.0.6/download/vmec-0.0.6-cp310-cp310-linux_x86_64.whl
if ! ls /tmp/vmec2000-wheel/vmec-*.whl >/dev/null 2>&1; then
# Build with the SYSTEM cmake + ninja; do NOT pip-install the
# cmake/ninja wheels (they shadow the system cmake that
# scikit-build-core's editable vmecpp rebuild records).
python -m pip install numpy scikit-build f90wrap setuptools wheel
git clone https://github.com/hiddenSymmetries/VMEC2000.git /tmp/VMEC2000
git -C /tmp/VMEC2000 checkout 728af8bd6c796b36a0aa85fe298e507791e57c6e
cp /tmp/VMEC2000/cmake/machines/ubuntu.json /tmp/VMEC2000/cmake_config_file.json
LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
python -m pip wheel /tmp/VMEC2000 --no-build-isolation -w /tmp/vmec2000-wheel
fi
python -m pip install /tmp/vmec2000-wheel/vmec-*.whl
# fail loudly here if the binding is still broken, not in the test step
python -c "import vmec; print('VMEC2000 import OK')"
- name: Install package
run: |
# on Ubuntu we would not need this, but on MacOS we need to point CMake to gfortran-14 and gcc-14
Expand Down
7 changes: 3 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,9 @@ find_package(LAPACK REQUIRED)
FetchContent_Declare(
abseil-cpp
GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
# 20260107.1 LTS: required for Clang >= 21, where the 2024 pin fails to
# compile (absl::Nonnull SFINAE in absl/strings/ascii.cc). Clang is the
# compiler used for the Enzyme autodiff build.
GIT_TAG 20260107.1
# 20260107.1 LTS: older abseil fails to compile under Clang >= 21 (the
# Enzyme build) on absl::Nonnull SFINAE in absl/strings/ascii.cc.
GIT_TAG 255c84dadd029fd8ad25c5efb5933e47beaa00c7
GIT_SHALLOW TRUE
)
FetchContent_Declare(
Expand Down
11 changes: 10 additions & 1 deletion src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,16 @@ cc_test(
cc_library(
name = "ideal_mhd_model",
srcs = ["ideal_mhd_model.cc"],
hdrs = ["ideal_mhd_model.h", "dft_data.h"],
# The shared force-chain kernels and the autodiff composition/JVP header are
# header-only and included by ideal_mhd_model.cc; declare whatever exists on
# this branch so the bazel sandbox can see them. The Enzyme JVP entry point
# (exact_force_jvp.cc) is built only by the CMake VMECPP_ENABLE_ENZYME path,
# not by bazel; its use in ideal_mhd_model.cc is guarded by that define.
hdrs = ["ideal_mhd_model.h", "dft_data.h"] + glob([
"*_kernel.h",
"local_force_composition.h",
"exact_force_jvp.h",
]),
visibility = ["//visibility:public"],
deps = [
":dft_toroidal",
Expand Down
135 changes: 10 additions & 125 deletions src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/ideal_mhd_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "vmecpp/vmec/fourier_geometry/fourier_geometry.h"
#include "vmecpp/vmec/handover_storage/handover_storage.h"
#include "vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h"
#include "vmecpp/vmec/ideal_mhd_model/metric_kernel.h"
#include "vmecpp/vmec/radial_partitioning/radial_partitioning.h"
#include "vmecpp/vmec/radial_profiles/radial_profiles.h"
#include "vmecpp/vmec/vmec_constants/vmec_algorithm_constants.h"
Expand Down Expand Up @@ -1236,131 +1237,15 @@ void IdealMhdModel::computeJacobian() {
}

void IdealMhdModel::computeMetricElements() {
// gsqrt
// guu, guv, gvv

// contributions from full-grid surface _i_nside j-th half-grid surface
int j0 = r_.nsMinF1;
for (int kl = 0; kl < s_.nZnT; ++kl) {
m_ls_.r1e_i[kl] = r1_e[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.r1o_i[kl] = r1_o[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.z1e_i[kl] = z1_e[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.z1o_i[kl] = z1_o[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.rue_i[kl] = ru_e[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.ruo_i[kl] = ru_o[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.zue_i[kl] = zu_e[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.zuo_i[kl] = zu_o[(j0 - r_.nsMinF1) * s_.nZnT + kl];
if (s_.lthreed) {
m_ls_.rve_i[kl] = rv_e[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.rvo_i[kl] = rv_o[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.zve_i[kl] = zv_e[(j0 - r_.nsMinF1) * s_.nZnT + kl];
m_ls_.zvo_i[kl] = zv_o[(j0 - r_.nsMinF1) * s_.nZnT + kl];
}
}

// s on inner full-grid pos
double sF_i =
m_p_.sqrtSF[r_.nsMinH - r_.nsMinF1] * m_p_.sqrtSF[r_.nsMinH - r_.nsMinF1];

for (int jH = r_.nsMinH; jH < r_.nsMaxH; ++jH) {
// s on outside full-grid pos
double sF_o =
m_p_.sqrtSF[jH + 1 - r_.nsMinF1] * m_p_.sqrtSF[jH + 1 - r_.nsMinF1];

// sqrt(s) on j-th half-grid pos
double sqrtSH = m_p_.sqrtSH[jH - r_.nsMinH];

for (int kl = 0; kl < s_.nZnT; ++kl) {
int iHalf = (jH - r_.nsMinH) * s_.nZnT + kl;

// Re-use this loop to compute Jacobian gsqrt=tau*R
// only tau needed to be checked for a sign change,
// so skip the last part where gsqrt is computed
// if a sign changed happened by computing it only here
// (which will only be reached when tau did not change sign).
gsqrt[iHalf] = tau[iHalf] * r12[iHalf];

// contributions from full-grid surface _o_utside j-th half-grid surface
double r1e_o = r1_e[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double r1o_o = r1_o[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double rue_o = ru_e[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double ruo_o = ru_o[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double zue_o = zu_e[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double zuo_o = zu_o[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];

// g_{\theta,\theta} is needed for both 2D and 3D cases
guu[iHalf] = 0.5 * ((m_ls_.rue_i[kl] * m_ls_.rue_i[kl] +
m_ls_.zue_i[kl] * m_ls_.zue_i[kl]) +
(rue_o * rue_o + zue_o * zue_o) +
sF_i * (m_ls_.ruo_i[kl] * m_ls_.ruo_i[kl] +
m_ls_.zuo_i[kl] * m_ls_.zuo_i[kl]) +
sF_o * (ruo_o * ruo_o + zuo_o * zuo_o)) +
sqrtSH * ((m_ls_.rue_i[kl] * m_ls_.ruo_i[kl] +
m_ls_.zue_i[kl] * m_ls_.zuo_i[kl]) +
(rue_o * ruo_o + zue_o * zuo_o));

// g_{\zeta,\zeta} reduces to R^2 in the 2D case, so compute this always
gvv[iHalf] = 0.5 * (m_ls_.r1e_i[kl] * m_ls_.r1e_i[kl] + r1e_o * r1e_o +
sF_i * m_ls_.r1o_i[kl] * m_ls_.r1o_i[kl] +
sF_o * r1o_o * r1o_o) +
sqrtSH * (m_ls_.r1e_i[kl] * m_ls_.r1o_i[kl] + r1e_o * r1o_o);

if (s_.lthreed) {
double rve_o = rv_e[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double rvo_o = rv_o[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double zve_o = zv_e[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];
double zvo_o = zv_o[(jH + 1 - r_.nsMinF1) * s_.nZnT + kl];

// g_{\theta,\zeta} is only needed for the 3D case
guv[iHalf] = 0.5 * ((m_ls_.rue_i[kl] * m_ls_.rve_i[kl] +
m_ls_.zue_i[kl] * m_ls_.zve_i[kl]) +
(rue_o * rve_o + zue_o * zve_o) +
sF_i * (m_ls_.ruo_i[kl] * m_ls_.rvo_i[kl] +
m_ls_.zuo_i[kl] * m_ls_.zvo_i[kl]) +
sF_o * (ruo_o * rvo_o + zuo_o * zvo_o) +
sqrtSH * ((m_ls_.rue_i[kl] * m_ls_.rvo_i[kl] +
m_ls_.zue_i[kl] * m_ls_.zvo_i[kl]) +
(rue_o * rvo_o + zue_o * zvo_o) +
(m_ls_.rve_i[kl] * m_ls_.ruo_i[kl] +
m_ls_.zve_i[kl] * m_ls_.zuo_i[kl]) +
(rve_o * ruo_o + zve_o * zuo_o)));

// compute remaining contribution for 3D to g_{\zeta,\zeta}
gvv[iHalf] += 0.5 * ((m_ls_.rve_i[kl] * m_ls_.rve_i[kl] +
m_ls_.zve_i[kl] * m_ls_.zve_i[kl]) +
(rve_o * rve_o + zve_o * zve_o) +
sF_i * (m_ls_.rvo_i[kl] * m_ls_.rvo_i[kl] +
m_ls_.zvo_i[kl] * m_ls_.zvo_i[kl]) +
sF_o * (rvo_o * rvo_o + zvo_o * zvo_o)) +
sqrtSH * ((m_ls_.rve_i[kl] * m_ls_.rvo_i[kl] +
m_ls_.zve_i[kl] * m_ls_.zvo_i[kl]) +
(rve_o * rvo_o + zve_o * zvo_o));

// hand over to next iteration of radial loop
// --> what was outside in this loop iteration will be inside for next
// half-grid location
m_ls_.rve_i[kl] = rve_o;
m_ls_.rvo_i[kl] = rvo_o;
m_ls_.zve_i[kl] = zve_o;
m_ls_.zvo_i[kl] = zvo_o;
}

// hand over to next iteration of radial loop
// --> what was outside in this loop iteration will be inside for next
// half-grid location
m_ls_.r1e_i[kl] = r1e_o;
m_ls_.r1o_i[kl] = r1o_o;
m_ls_.rue_i[kl] = rue_o;
m_ls_.ruo_i[kl] = ruo_o;
m_ls_.zue_i[kl] = zue_o;
m_ls_.zuo_i[kl] = zuo_o;
} // kl

// hand over to next iteration of radial loop
// --> what was outside in this loop iteration will be inside for next
// half-grid location
sF_i = sF_o;
} // jH
// gsqrt = tau * r12, and the metric elements guu, guv, gvv. Arithmetic in the
// shared, allocation-free kernel (metric_kernel.h), used by both the solver
// and the Enzyme autodiff path.
ComputeMetricElements(r1_e.data(), r1_o.data(), ru_e.data(), ru_o.data(),
zu_e.data(), zu_o.data(), rv_e.data(), rv_o.data(),
zv_e.data(), zv_o.data(), tau.data(), r12.data(),
m_p_.sqrtSF.data(), m_p_.sqrtSH.data(), s_.lthreed,
s_.nZnT, r_.nsMinF1, r_.nsMinH, r_.nsMaxH, gsqrt.data(),
guu.data(), guv.data(), gvv.data());
}

/**
Expand Down
13 changes: 8 additions & 5 deletions src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@ namespace vmecpp {
// half-grid; sqrtSH is indexed jH - nsMinH. The half-grid point jH sits between
// full-grid surfaces jH (inside) and jH + 1 (outside).
inline void ComputeHalfGridJacobian(
const double* r1e, const double* r1o, const double* z1e, const double* z1o,
const double* rue, const double* ruo, const double* zue, const double* zuo,
const double* sqrtSH, double deltaS, double dSHalfDsInterp, int nZnT,
int nsMinF1, int nsMinH, int nsMaxH, double* r12, double* ru12,
double* zu12, double* rs, double* zs, double* tau) {
const double* __restrict r1e, const double* __restrict r1o,
const double* __restrict z1e, const double* __restrict z1o,
const double* __restrict rue, const double* __restrict ruo,
const double* __restrict zue, const double* __restrict zuo,
const double* __restrict sqrtSH, double deltaS, double dSHalfDsInterp,
int nZnT, int nsMinF1, int nsMinH, int nsMaxH, double* __restrict r12,
double* __restrict ru12, double* __restrict zu12, double* __restrict rs,
double* __restrict zs, double* __restrict tau) {
for (int jH = nsMinH; jH < nsMaxH; ++jH) {
const double sH = sqrtSH[jH - nsMinH];
for (int kl = 0; kl < nZnT; ++kl) {
Expand Down
84 changes: 84 additions & 0 deletions src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/metric_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// SPDX-FileCopyrightText: 2024-present Proxima Fusion GmbH
// <info@proximafusion.com>
//
// SPDX-License-Identifier: MIT
#ifndef VMECPP_VMEC_IDEAL_MHD_MODEL_METRIC_KERNEL_H_
#define VMECPP_VMEC_IDEAL_MHD_MODEL_METRIC_KERNEL_H_

namespace vmecpp {

// Half-grid metric kernel: gsqrt = tau * r12 and the metric elements guu, guv,
// gvv from the full-grid geometry (and the Jacobian tau, r12 from
// ComputeHalfGridJacobian). guv and the 3D part of gvv are computed only when
// lthreed. Shared, allocation-free over flat buffers, between
// IdealMhdModel::computeMetricElements and the Enzyme autodiff path. Same
// indexing conventions as jacobian_kernel.h. sqrtSF is indexed jF - nsMinF1.
inline void ComputeMetricElements(
const double* __restrict r1e, const double* __restrict r1o,
const double* __restrict rue, const double* __restrict ruo,
const double* __restrict zue, const double* __restrict zuo,
const double* __restrict rve, const double* __restrict rvo,
const double* __restrict zve, const double* __restrict zvo,
const double* __restrict tau, const double* __restrict r12,
const double* __restrict sqrtSF, const double* __restrict sqrtSH,
bool lthreed, int nZnT, int nsMinF1, int nsMinH, int nsMaxH,
double* __restrict gsqrt, double* __restrict guu, double* __restrict guv,
double* __restrict gvv) {
for (int jH = nsMinH; jH < nsMaxH; ++jH) {
const double sF_i = sqrtSF[jH - nsMinF1] * sqrtSF[jH - nsMinF1];
const double sF_o = sqrtSF[jH + 1 - nsMinF1] * sqrtSF[jH + 1 - nsMinF1];
const double sH = sqrtSH[jH - nsMinH];
for (int kl = 0; kl < nZnT; ++kl) {
const int i_in = (jH - nsMinF1) * nZnT + kl;
const int i_out = (jH + 1 - nsMinF1) * nZnT + kl;
const int ih = (jH - nsMinH) * nZnT + kl;

const double r1e_i = r1e[i_in], r1e_o = r1e[i_out];
const double r1o_i = r1o[i_in], r1o_o = r1o[i_out];
const double rue_i = rue[i_in], rue_o = rue[i_out];
const double ruo_i = ruo[i_in], ruo_o = ruo[i_out];
const double zue_i = zue[i_in], zue_o = zue[i_out];
const double zuo_i = zuo[i_in], zuo_o = zuo[i_out];

gsqrt[ih] = tau[ih] * r12[ih];

guu[ih] = 0.5 * ((rue_i * rue_i + zue_i * zue_i) +
(rue_o * rue_o + zue_o * zue_o) +
sF_i * (ruo_i * ruo_i + zuo_i * zuo_i) +
sF_o * (ruo_o * ruo_o + zuo_o * zuo_o)) +
sH * ((rue_i * ruo_i + zue_i * zuo_i) +
(rue_o * ruo_o + zue_o * zuo_o));

gvv[ih] = 0.5 * (r1e_i * r1e_i + r1e_o * r1e_o + sF_i * r1o_i * r1o_i +
sF_o * r1o_o * r1o_o) +
sH * (r1e_i * r1o_i + r1e_o * r1o_o);

if (lthreed) {
const double rve_i = rve[i_in], rve_o = rve[i_out];
const double rvo_i = rvo[i_in], rvo_o = rvo[i_out];
const double zve_i = zve[i_in], zve_o = zve[i_out];
const double zvo_i = zvo[i_in], zvo_o = zvo[i_out];

guv[ih] = 0.5 * ((rue_i * rve_i + zue_i * zve_i) +
(rue_o * rve_o + zue_o * zve_o) +
sF_i * (ruo_i * rvo_i + zuo_i * zvo_i) +
sF_o * (ruo_o * rvo_o + zuo_o * zvo_o) +
sH * ((rue_i * rvo_i + zue_i * zvo_i) +
(rue_o * rvo_o + zue_o * zvo_o) +
(rve_i * ruo_i + zve_i * zuo_i) +
(rve_o * ruo_o + zve_o * zuo_o)));

gvv[ih] += 0.5 * ((rve_i * rve_i + zve_i * zve_i) +
(rve_o * rve_o + zve_o * zve_o) +
sF_i * (rvo_i * rvo_i + zvo_i * zvo_i) +
sF_o * (rvo_o * rvo_o + zvo_o * zvo_o)) +
sH * ((rve_i * rvo_i + zve_i * zvo_i) +
(rve_o * rvo_o + zve_o * zvo_o));
}
}
}
}

} // namespace vmecpp

#endif // VMECPP_VMEC_IDEAL_MHD_MODEL_METRIC_KERNEL_H_
15 changes: 10 additions & 5 deletions src/vmecpp/cpp/vmecpp/vmec/output_quantities/output_quantities.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5168,7 +5168,12 @@ vmecpp::WOutFileContents vmecpp::ComputeWOutFileContents(

void vmecpp::CompareWOut(const WOutFileContents& test_wout,
const WOutFileContents& expected_wout,
double tolerance, bool check_equal_niter) {
double tolerance, bool check_equal_niter,
double current_density_tolerance) {
// jcuru, jcurv compare looser only if the caller opts in; otherwise
// tolerance.
const double current_tolerance =
current_density_tolerance > 0.0 ? current_density_tolerance : tolerance;
CHECK_EQ(test_wout.signgs, expected_wout.signgs);
CHECK_EQ(test_wout.gamma, expected_wout.gamma);
CHECK_EQ(test_wout.pcurr_type, expected_wout.pcurr_type);
Expand Down Expand Up @@ -5250,11 +5255,11 @@ void vmecpp::CompareWOut(const WOutFileContents& test_wout,
IsCloseRelAbs(expected_wout.phipf[jF], test_wout.phipf[jF], tolerance));
CHECK(
IsCloseRelAbs(expected_wout.chipf[jF], test_wout.chipf[jF], tolerance));
CHECK(
IsCloseRelAbs(expected_wout.jcuru[jF], test_wout.jcuru[jF], tolerance))
CHECK(IsCloseRelAbs(expected_wout.jcuru[jF], test_wout.jcuru[jF],
current_tolerance))
<< "jF = " << jF;
CHECK(
IsCloseRelAbs(expected_wout.jcurv[jF], test_wout.jcurv[jF], tolerance))
CHECK(IsCloseRelAbs(expected_wout.jcurv[jF], test_wout.jcurv[jF],
current_tolerance))
<< "jF = " << jF;
CHECK(
IsCloseRelAbs(expected_wout.specw[jF], test_wout.specw[jF], tolerance));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1521,10 +1521,15 @@ WOutFileContents ComputeWOutFileContents(
// Compare the contents of a test wout object against a reference wout object,
// exiting with an error in case of mismatches.
// The comparison is performed using the specified tolerance in the "relabs"
// metric.
// metric. The current densities jcuru, jcurv are curl(B) quantities that
// amplify the rounding of the converged state; under vectorized/optimized
// builds they do not reproduce to the same tolerance as the profiles across
// machines. Pass current_density_tolerance > 0 to compare those two with a
// looser bound while keeping every other quantity at tolerance.
void CompareWOut(const WOutFileContents& test_wout,
const WOutFileContents& expected_wout, double tolerance,
bool check_equal_niter = true);
bool check_equal_niter = true,
double current_density_tolerance = 0.0);
} // namespace vmecpp

#endif // VMECPP_VMEC_OUTPUT_QUANTITIES_OUTPUT_QUANTITIES_H_
Loading
Loading