Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ jobs:
pytest benchmarks/test_benchmarks.py --benchmark-json=benchmark_results.json

- name: Compare benchmark result
if: github.event_name == 'pull_request'
# Skip the result upload/compare for fork PRs: their GITHUB_TOKEN is
# read-only, so comment-on-alert/auto-push hit 'Resource not accessible
# by integration'. The benchmarks still run above; only the write-back
# is skipped for forks.
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
uses: benchmark-action/github-action-benchmark@v1.21.0
with:
tool: "pytest"
Expand Down
28 changes: 23 additions & 5 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,33 @@ jobs:
run: |
# install VMEC++ deps as well as VMEC2000 deps (we need to import VMEC2000 in a test)
sudo apt-get update && sudo apt-get install -y build-essential cmake libnetcdf-dev liblapack-dev libomp-dev
- name: Cache the VMEC2000 wheel
if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
uses: actions/cache@v4
with:
path: /tmp/vmec2000-wheel
# Keyed on the pinned VMEC2000 source commit: rebuild only when it changes.
key: vmec2000-728af8bd6c79-cp310-ubuntu22.04
- name: Also install VMEC2000 (only on Ubuntu 22.04)
if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10' }}
run: |
# mpi4py is needed for VMEC2000
sudo apt-get install -y libopenmpi-dev
sudo apt-get install -y libopenmpi-dev libnetcdff-dev libscalapack-mpi-dev \
libopenblas-dev ninja-build
python -m pip install mpi4py
# custom wheel for VMEC2000, needed for some VMEC++/VMEC2000 compatibility tests
# NOTE: this wheel is only guaranteed to work on Ubuntu 22.04
python -m pip install vmec@https://anaconda.org/eguiraud-pf/vmec/0.0.6/download/vmec-0.0.6-cp310-cp310-linux_x86_64.whl
if ! ls /tmp/vmec2000-wheel/vmec-*.whl >/dev/null 2>&1; then
# Build with the SYSTEM cmake + ninja; do NOT pip-install the
# cmake/ninja wheels (they shadow the system cmake that
# scikit-build-core's editable vmecpp rebuild records).
python -m pip install numpy scikit-build f90wrap setuptools wheel
git clone https://github.com/hiddenSymmetries/VMEC2000.git /tmp/VMEC2000
git -C /tmp/VMEC2000 checkout 728af8bd6c796b36a0aa85fe298e507791e57c6e
cp /tmp/VMEC2000/cmake/machines/ubuntu.json /tmp/VMEC2000/cmake_config_file.json
LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
python -m pip wheel /tmp/VMEC2000 --no-build-isolation -w /tmp/vmec2000-wheel
fi
python -m pip install /tmp/vmec2000-wheel/vmec-*.whl
# fail loudly here if the binding is still broken, not in the test step
python -c "import vmec; print('VMEC2000 import OK')"
- name: Install package
run: |
# on Ubuntu we would not need this, but on MacOS we need to point CMake to gfortran-14 and gcc-14
Expand Down
7 changes: 3 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,9 @@ find_package(LAPACK REQUIRED)
FetchContent_Declare(
abseil-cpp
GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
# 20260107.1 LTS: required for Clang >= 21, where the 2024 pin fails to
# compile (absl::Nonnull SFINAE in absl/strings/ascii.cc). Clang is the
# compiler used for the Enzyme autodiff build.
GIT_TAG 20260107.1
# 20260107.1 LTS: older abseil fails to compile under Clang >= 21 (the
# Enzyme build) on absl::Nonnull SFINAE in absl/strings/ascii.cc.
GIT_TAG 255c84dadd029fd8ad25c5efb5933e47beaa00c7
GIT_SHALLOW TRUE
)
FetchContent_Declare(
Expand Down
11 changes: 10 additions & 1 deletion src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,16 @@ cc_test(
cc_library(
name = "ideal_mhd_model",
srcs = ["ideal_mhd_model.cc"],
hdrs = ["ideal_mhd_model.h", "dft_data.h"],
# The shared force-chain kernels and the autodiff composition/JVP header are
# header-only and included by ideal_mhd_model.cc; declare whatever exists on
# this branch so the bazel sandbox can see them. The Enzyme JVP entry point
# (exact_force_jvp.cc) is built only by the CMake VMECPP_ENABLE_ENZYME path,
# not by bazel; its use in ideal_mhd_model.cc is guarded by that define.
hdrs = ["ideal_mhd_model.h", "dft_data.h"] + glob([
"*_kernel.h",
"local_force_composition.h",
"exact_force_jvp.h",
]),
visibility = ["//visibility:public"],
deps = [
":dft_toroidal",
Expand Down
14 changes: 9 additions & 5 deletions src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/dft_toroidal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ void ForcesToFourier3DSymmFastPoloidal(
// axis lambda stays zero (no contribution from any m)
const int jMinL = 1;

// Reused scratch for the assembled R/Z forces: sized on first use, so the
// inner loop allocates nothing. Per-thread safe via the stack frame, since
// each OpenMP thread runs this on its own radial slice.
Eigen::VectorXd tempR_seg, tempZ_seg;

for (int jF = rp.nsMinF; jF < jMaxRZ; ++jF) {
const int mmax = jF == 0 ? 1 : s.mpol;
for (int m = 0; m < mmax; ++m) {
Expand Down Expand Up @@ -94,11 +99,10 @@ void ForcesToFourier3DSymmFastPoloidal(

// Assemble effective R and Z forces from MHD and spectral condensation
// contributions. Materialize to avoid re-evaluation in each dot
// product.
const Eigen::VectorXd tempR_seg =
(armn_seg + xmpq[m] * frcon_seg).eval();
const Eigen::VectorXd tempZ_seg =
(azmn_seg + xmpq[m] * fzcon_seg).eval();
// product, reusing the function-scope scratch so the inner loop
// allocates nothing. Same arithmetic as a fresh .eval().
tempR_seg.noalias() = armn_seg + xmpq[m] * frcon_seg;
tempZ_seg.noalias() = azmn_seg + xmpq[m] * fzcon_seg;

double rmkcc = tempR_seg.dot(cosmui_seg) + brmn_seg.dot(sinmumi_seg);
double rmkss = tempR_seg.dot(sinmui_seg) + brmn_seg.dot(cosmumi_seg);
Expand Down
222 changes: 22 additions & 200 deletions src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/ideal_mhd_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "vmecpp/vmec/ideal_mhd_model/bcontra_kernel.h"
#include "vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h"
#include "vmecpp/vmec/ideal_mhd_model/metric_kernel.h"
#include "vmecpp/vmec/ideal_mhd_model/mhdforce_kernel.h"
#include "vmecpp/vmec/ideal_mhd_model/pressure_kernel.h"
#include "vmecpp/vmec/radial_partitioning/radial_partitioning.h"
#include "vmecpp/vmec/radial_profiles/radial_profiles.h"
Expand Down Expand Up @@ -1806,206 +1807,27 @@ void IdealMhdModel::computeMHDForces() {
if (m_fc_.lfreeb) {
jMaxRZ = std::min(r_.nsMaxF, m_fc_.ns);
}

// obtain first inside point
// stuff gets divided by sqrtSHi, so cannot be 0
double sqrtSHi = 1.0;
if (r_.nsMinF > 0) {
// for the rel-0-th forces full-grid point, the rel-0-th half-grid point is
// inside
int j0 = r_.nsMinH;
for (int kl = 0; kl < s_.nZnT; ++kl) {
int iHalf = (j0 - r_.nsMinH) * s_.nZnT + kl;
m_ls_.P_i[kl] = r12[iHalf] * totalPressure[iHalf];
m_ls_.rup_i[kl] = ru12[iHalf] * m_ls_.P_i[kl];
m_ls_.zup_i[kl] = zu12[iHalf] * m_ls_.P_i[kl];
m_ls_.rsp_i[kl] = rs[iHalf] * m_ls_.P_i[kl];
m_ls_.zsp_i[kl] = zs[iHalf] * m_ls_.P_i[kl];
m_ls_.taup_i[kl] = tau[iHalf] * totalPressure[iHalf];
m_ls_.gbubu_i[kl] = gsqrt[iHalf] * bsupu[iHalf] * bsupu[iHalf];
m_ls_.gbubv_i[kl] = gsqrt[iHalf] * bsupu[iHalf] * bsupv[iHalf];
m_ls_.gbvbv_i[kl] = gsqrt[iHalf] * bsupv[iHalf] * bsupv[iHalf];
} // kl
sqrtSHi = m_p_.sqrtSH[j0 - r_.nsMinH];
} else {
// defaults to 0: no contribution from half-grid point inside the axis
m_ls_.P_i.setZero();
m_ls_.rup_i.setZero();
m_ls_.zup_i.setZero();
m_ls_.rsp_i.setZero();
m_ls_.zsp_i.setZero();
m_ls_.taup_i.setZero();
m_ls_.gbubu_i.setZero();
m_ls_.gbubv_i.setZero();
m_ls_.gbvbv_i.setZero();
}

Eigen::VectorXd P_o =
Eigen::VectorXd::Zero(s_.nZnT); // r12 * totalPressure = P
Eigen::VectorXd rup_o = Eigen::VectorXd::Zero(s_.nZnT); // ru12 * P
Eigen::VectorXd zup_o = Eigen::VectorXd::Zero(s_.nZnT); // zu12 * P
Eigen::VectorXd rsp_o = Eigen::VectorXd::Zero(s_.nZnT); // rs * P
Eigen::VectorXd zsp_o = Eigen::VectorXd::Zero(s_.nZnT); // zs * P
Eigen::VectorXd taup_o = Eigen::VectorXd::Zero(s_.nZnT); // tau * P
Eigen::VectorXd gbubu_o =
Eigen::VectorXd::Zero(s_.nZnT); // gsqrt * bsupu * bsupu
Eigen::VectorXd gbubv_o =
Eigen::VectorXd::Zero(s_.nZnT); // gsqrt * bsupu * bsupv
Eigen::VectorXd gbvbv_o =
Eigen::VectorXd::Zero(s_.nZnT); // gsqrt * bsupv * bsupv

for (int jF = r_.nsMinF; jF < jMaxRZ; ++jF) {
const double sFull =
m_p_.sqrtSF[jF - r_.nsMinF1] * m_p_.sqrtSF[jF - r_.nsMinF1];
// stuff gets divided by sqrtSHo, so cannot be 0
double sqrtSHo = 1.0;
if (jF < r_.nsMaxH) {
sqrtSHo = m_p_.sqrtSH[jF - r_.nsMinH];
}

if (jF < r_.nsMaxH) {
const int iHalf_base = (jF - r_.nsMinH) * s_.nZnT;
for (int kl = 0; kl < s_.nZnT; ++kl) {
// obtain next outside point
// defaults to 0: no contribution from half-grid point outside LCFS
int iHalf = iHalf_base + kl;
P_o[kl] = r12[iHalf] * totalPressure[iHalf];
rup_o[kl] = ru12[iHalf] * P_o[kl];
zup_o[kl] = zu12[iHalf] * P_o[kl];
rsp_o[kl] = rs[iHalf] * P_o[kl];
zsp_o[kl] = zs[iHalf] * P_o[kl];
taup_o[kl] = tau[iHalf] * totalPressure[iHalf];
} // kl

for (int kl = 0; kl < s_.nZnT; ++kl) {
// obtain next outside point
// defaults to 0: no contribution from half-grid point outside LCFS
int iHalf = iHalf_base + kl;
gbubu_o[kl] = gsqrt[iHalf] * bsupu[iHalf] * bsupu[iHalf];
gbubv_o[kl] = gsqrt[iHalf] * bsupu[iHalf] * bsupv[iHalf];
gbvbv_o[kl] = gsqrt[iHalf] * bsupv[iHalf] * bsupv[iHalf];
} // kl
} else {
P_o.setZero();
rup_o.setZero();
zup_o.setZero();
rsp_o.setZero();
zsp_o.setZero();
taup_o.setZero();
gbubu_o.setZero();
gbubv_o.setZero();
gbvbv_o.setZero();
}

// Segment views into geometry and force arrays for this surface
const int nZnT = s_.nZnT;
const int g_off = (jF - r_.nsMinF1) * nZnT;
const int f_off = (jF - r_.nsMinF) * nZnT;
const auto r1e = r1_e.segment(g_off, nZnT);
const auto r1o = r1_o.segment(g_off, nZnT);
const auto rue = ru_e.segment(g_off, nZnT);
const auto ruo = ru_o.segment(g_off, nZnT);
const auto zue = zu_e.segment(g_off, nZnT);
const auto zuo = zu_o.segment(g_off, nZnT);
const auto z1o = z1_o.segment(g_off, nZnT);

// Pre-compute common sub-expressions (averages and weighted averages)
const double invDS = 1.0 / m_fc_.deltaS;
const double invSHo = 1.0 / sqrtSHo;
const double invSHi = 1.0 / sqrtSHi;
const Eigen::VectorXd P_avg = 0.5 * (P_o + m_ls_.P_i);
const Eigen::VectorXd P_wavg = 0.5 * (P_o * invSHo + m_ls_.P_i * invSHi);

const Eigen::VectorXd gbubu_avg = 0.5 * (gbubu_o + m_ls_.gbubu_i);
const Eigen::VectorXd gbubu_wavg =
0.5 * (gbubu_o * sqrtSHo + m_ls_.gbubu_i * sqrtSHi);
const Eigen::VectorXd gbvbv_avg = 0.5 * (gbvbv_o + m_ls_.gbvbv_i);
const Eigen::VectorXd gbvbv_wavg =
0.5 * (gbvbv_o * sqrtSHo + m_ls_.gbvbv_i * sqrtSHi);

// A_R force
armn_e.segment(f_off, nZnT) =
(zup_o - m_ls_.zup_i) * invDS + 0.5 * (taup_o + m_ls_.taup_i) -
gbvbv_avg.cwiseProduct(r1e) - gbvbv_wavg.cwiseProduct(r1o);
armn_o.segment(f_off, nZnT) =
(zup_o * sqrtSHo - m_ls_.zup_i * sqrtSHi) * invDS -
0.5 * P_wavg.cwiseProduct(zue) - 0.5 * P_avg.cwiseProduct(zuo) +
0.5 * (taup_o * sqrtSHo + m_ls_.taup_i * sqrtSHi) -
gbvbv_wavg.cwiseProduct(r1e) - gbvbv_avg.cwiseProduct(r1o) * sFull;

// A_Z force
azmn_e.segment(f_off, nZnT) = -(rup_o - m_ls_.rup_i) * invDS;
azmn_o.segment(f_off, nZnT) =
-(rup_o * sqrtSHo - m_ls_.rup_i * sqrtSHi) * invDS +
0.5 * P_wavg.cwiseProduct(rue) + 0.5 * P_avg.cwiseProduct(ruo);

// B_R force
brmn_e.segment(f_off, nZnT) =
0.5 * (zsp_o + m_ls_.zsp_i) + 0.5 * P_wavg.cwiseProduct(z1o) -
gbubu_avg.cwiseProduct(rue) - gbubu_wavg.cwiseProduct(ruo);
brmn_o.segment(f_off, nZnT) =
0.5 * (zsp_o * sqrtSHo + m_ls_.zsp_i * sqrtSHi) +
0.5 * P_avg.cwiseProduct(z1o) - gbubu_wavg.cwiseProduct(rue) -
gbubu_avg.cwiseProduct(ruo) * sFull;

// B_Z force
bzmn_e.segment(f_off, nZnT) =
-0.5 * (rsp_o + m_ls_.rsp_i) - 0.5 * P_wavg.cwiseProduct(r1o) -
gbubu_avg.cwiseProduct(zue) - gbubu_wavg.cwiseProduct(zuo);
bzmn_o.segment(f_off, nZnT) =
-0.5 * (rsp_o * sqrtSHo + m_ls_.rsp_i * sqrtSHi) -
0.5 * P_avg.cwiseProduct(r1o) - gbubu_wavg.cwiseProduct(zue) -
gbubu_avg.cwiseProduct(zuo) * sFull;

if (s_.lthreed) {
const Eigen::VectorXd gbubv_avg = 0.5 * (gbubv_o + m_ls_.gbubv_i);
const Eigen::VectorXd gbubv_wavg =
0.5 * (gbubv_o * sqrtSHo + m_ls_.gbubv_i * sqrtSHi);
const auto rve = rv_e.segment(g_off, nZnT);
const auto rvo = rv_o.segment(g_off, nZnT);
const auto zve = zv_e.segment(g_off, nZnT);
const auto zvo = zv_o.segment(g_off, nZnT);

// 3D contributions to B_R and B_Z
brmn_e.segment(f_off, nZnT) -=
gbubv_avg.cwiseProduct(rve) + gbubv_wavg.cwiseProduct(rvo);
brmn_o.segment(f_off, nZnT) -=
gbubv_wavg.cwiseProduct(rve) + gbubv_avg.cwiseProduct(rvo) * sFull;
bzmn_e.segment(f_off, nZnT) -=
gbubv_avg.cwiseProduct(zve) + gbubv_wavg.cwiseProduct(zvo);
bzmn_o.segment(f_off, nZnT) -=
gbubv_wavg.cwiseProduct(zve) + gbubv_avg.cwiseProduct(zvo) * sFull;

// C_R force
crmn_e.segment(f_off, nZnT) =
gbubv_avg.cwiseProduct(rue) + gbubv_wavg.cwiseProduct(ruo) +
gbvbv_avg.cwiseProduct(rve) + gbvbv_wavg.cwiseProduct(rvo);
crmn_o.segment(f_off, nZnT) =
gbubv_wavg.cwiseProduct(rue) + gbubv_avg.cwiseProduct(ruo) * sFull +
gbvbv_wavg.cwiseProduct(rve) + gbvbv_avg.cwiseProduct(rvo) * sFull;

// C_Z force
czmn_e.segment(f_off, nZnT) =
gbubv_avg.cwiseProduct(zue) + gbubv_wavg.cwiseProduct(zuo) +
gbvbv_avg.cwiseProduct(zve) + gbvbv_wavg.cwiseProduct(zvo);
czmn_o.segment(f_off, nZnT) =
gbubv_wavg.cwiseProduct(zue) + gbubv_avg.cwiseProduct(zuo) * sFull +
gbvbv_wavg.cwiseProduct(zve) + gbvbv_avg.cwiseProduct(zvo) * sFull;
} // lthreed

// shift to next point
m_ls_.P_i = P_o;
m_ls_.rup_i = rup_o;
m_ls_.zup_i = zup_o;
m_ls_.rsp_i = rsp_o;
m_ls_.zsp_i = zsp_o;
m_ls_.taup_i = taup_o;
m_ls_.gbubu_i = gbubu_o;
m_ls_.gbubv_i = gbubv_o;
m_ls_.gbvbv_i = gbvbv_o;

sqrtSHi = sqrtSHo;
} // jF
// Real-space MHD force-density assembly in the shared, allocation-free kernel
// (mhdforce_kernel.h), used by both the solver and the Enzyme autodiff path.
ComputeMHDForceDensity(
r1_e.data(), r1_o.data(), ru_e.data(), ru_o.data(), zu_e.data(),
zu_o.data(), z1_o.data(), rv_e.data(), rv_o.data(), zv_e.data(),
zv_o.data(), r12.data(), ru12.data(), zu12.data(), rs.data(), zs.data(),
tau.data(), totalPressure.data(), gsqrt.data(), bsupu.data(),
bsupv.data(), m_p_.sqrtSF.data(), m_p_.sqrtSH.data(), m_ls_.P_i.data(),
m_ls_.rup_i.data(), m_ls_.zup_i.data(), m_ls_.rsp_i.data(),
m_ls_.zsp_i.data(), m_ls_.taup_i.data(), m_ls_.gbubu_i.data(),
m_ls_.gbubv_i.data(), m_ls_.gbvbv_i.data(), m_ls_.P_o.data(),
m_ls_.rup_o.data(), m_ls_.zup_o.data(), m_ls_.rsp_o.data(),
m_ls_.zsp_o.data(), m_ls_.taup_o.data(), m_ls_.gbubu_o.data(),
m_ls_.gbubv_o.data(), m_ls_.gbvbv_o.data(), m_ls_.P_avg.data(),
m_ls_.P_wavg.data(), m_ls_.gbubu_avg.data(), m_ls_.gbubu_wavg.data(),
m_ls_.gbvbv_avg.data(), m_ls_.gbvbv_wavg.data(), m_ls_.gbubv_avg.data(),
m_ls_.gbubv_wavg.data(), m_fc_.deltaS, s_.nZnT, r_.nsMinF, r_.nsMinF1,
r_.nsMinH, r_.nsMaxH, jMaxRZ, s_.lthreed, armn_e.data(), armn_o.data(),
azmn_e.data(), azmn_o.data(), brmn_e.data(), brmn_o.data(), bzmn_e.data(),
bzmn_o.data(), crmn_e.data(), crmn_o.data(), czmn_e.data(),
czmn_o.data());
}

bool IdealMhdModel::shouldUpdateRadialPreconditioner(int iter1,
Expand Down
13 changes: 8 additions & 5 deletions src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@ namespace vmecpp {
// half-grid; sqrtSH is indexed jH - nsMinH. The half-grid point jH sits between
// full-grid surfaces jH (inside) and jH + 1 (outside).
inline void ComputeHalfGridJacobian(
const double* r1e, const double* r1o, const double* z1e, const double* z1o,
const double* rue, const double* ruo, const double* zue, const double* zuo,
const double* sqrtSH, double deltaS, double dSHalfDsInterp, int nZnT,
int nsMinF1, int nsMinH, int nsMaxH, double* r12, double* ru12,
double* zu12, double* rs, double* zs, double* tau) {
const double* __restrict r1e, const double* __restrict r1o,
const double* __restrict z1e, const double* __restrict z1o,
const double* __restrict rue, const double* __restrict ruo,
const double* __restrict zue, const double* __restrict zuo,
const double* __restrict sqrtSH, double deltaS, double dSHalfDsInterp,
int nZnT, int nsMinF1, int nsMinH, int nsMaxH, double* __restrict r12,
double* __restrict ru12, double* __restrict zu12, double* __restrict rs,
double* __restrict zs, double* __restrict tau) {
for (int jH = nsMinH; jH < nsMaxH; ++jH) {
const double sH = sqrtSH[jH - nsMinH];
for (int kl = 0; kl < nZnT; ++kl) {
Expand Down
16 changes: 10 additions & 6 deletions src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/metric_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@ namespace vmecpp {
// IdealMhdModel::computeMetricElements and the Enzyme autodiff path. Same
// indexing conventions as jacobian_kernel.h. sqrtSF is indexed jF - nsMinF1.
inline void ComputeMetricElements(
const double* r1e, const double* r1o, const double* rue, const double* ruo,
const double* zue, const double* zuo, const double* rve, const double* rvo,
const double* zve, const double* zvo, const double* tau, const double* r12,
const double* sqrtSF, const double* sqrtSH, bool lthreed, int nZnT,
int nsMinF1, int nsMinH, int nsMaxH, double* gsqrt, double* guu,
double* guv, double* gvv) {
const double* __restrict r1e, const double* __restrict r1o,
const double* __restrict rue, const double* __restrict ruo,
const double* __restrict zue, const double* __restrict zuo,
const double* __restrict rve, const double* __restrict rvo,
const double* __restrict zve, const double* __restrict zvo,
const double* __restrict tau, const double* __restrict r12,
const double* __restrict sqrtSF, const double* __restrict sqrtSH,
bool lthreed, int nZnT, int nsMinF1, int nsMinH, int nsMaxH,
double* __restrict gsqrt, double* __restrict guu, double* __restrict guv,
double* __restrict gvv) {
for (int jH = nsMinH; jH < nsMaxH; ++jH) {
const double sF_i = sqrtSF[jH - nsMinF1] * sqrtSF[jH - nsMinF1];
const double sF_o = sqrtSF[jH + 1 - nsMinF1] * sqrtSF[jH + 1 - nsMinF1];
Expand Down
Loading
Loading