From de34df7706dd75e3ddc0e3646e553125c1164db5 Mon Sep 17 00:00:00 2001 From: Paul Zimmerman Date: Thu, 7 May 2026 15:28:08 -0400 Subject: [PATCH 1/4] various updates integrals.cpp now has a long-range barrier potential option also various changes to support Gaussian basis operations --- include/becke.h | 13 +- include/cintwrapper.h | 13 + include/gauss.h | 22 +- include/integrals.h | 4 +- src/integrals/becke.cpp | 753 +++++++++++++++++++------------- src/integrals/gauss.cpp | 540 +++++++++++++++++------ src/integrals/integrals.cpp | 60 ++- src/integrals/integrals_aux.cpp | 58 ++- src/libcintw/cintwrapper.cpp | 394 +++++++++++++++-- 9 files changed, 1359 insertions(+), 498 deletions(-) diff --git a/include/becke.h b/include/becke.h index 54d349a..71b2362 100644 --- a/include/becke.h +++ b/include/becke.h @@ -17,6 +17,10 @@ void atomic_domain_cell_wt(const int ta, const float alpha, const float beta, co void becke_weight_nc(int natoms, float* grid1, float* wt1, double* coords); void becke_weight_nc(int natoms, float* grid1, float* wt1, float* coordsf); +void becke_weight_nc(const int mu_order, const int natoms, float* Rs, const int gc, const int gs, float* grid1, float* wt1, int* atno, float* coords); +void becke_weight_ncg(const int mu_order, const int natoms, float* Rs, const int gc, const int gs, float* grid1, float* wt1, int* atno, float* coords); +void becke_weight_ncgd(const int mu_order, const int natoms, double* Rs, const int gc, const int gs, double* grid1, double* wt1, int* atno, double* coords); +void becke_weight_nc(const int mu_order, const int natoms, double* Rs, const int gc, const int gs, double* grid1, double* wt1, int* atno, double* coords); void becke_weight_3c(int gs, float* grid1, float* wt1, float* grid2, float* wt2, float* grid3, float* wt3, int Z1, int Z2, int Z3, float A2, float B2, float C2, float A3, float B3, float C3); @@ -25,10 +29,15 @@ void becke_weight_2c(int gs, float* grid1, float* wt1, float* grid2, float* wt2, void becke_weight_2d(int gs, double* grid1, double* wt1, double* grid2, double* wt2, double zeta1, double zeta2, double A2, double B2, double C2); +float becke_ar(float r1, float r2); +double becke_ard(double r1, double r2); + void get_becke_grid_full(float alpha, int natoms, int* atno, float* coords, int nrad, int nang, float* ang_g, float* ang_w, const int gc, float* grid, float* wt); +void get_becke_grid_full(int mu_order, float alpha, int natoms, int* atno, float* coords, int nrad, int nang, float* ang_g, float* ang_w, const int gc, float* grid, float* wt); void get_becke_grid_full(int natoms, int* atno, double* coords, int nrad, int nang, float* ang_g, float* ang_w, const int gc, float* grid, float* wt); void get_becke_grid_full(int natoms, int* atno, double* coords, int nrad, int nang, double* ang_g, double* ang_w, const int gc, float* grid, float* wt); void get_becke_grid_full(int natoms, int* atno, double* coords, int nrad, int nang, double* ang_g, double* ang_w, const int gc, double* grid, double* wt); +void get_becke_grid_full(int mu_order, int natoms, int* atno, double* coords, int nrad, int nang, double* ang_g, double* ang_w, const int gc, double* grid, double* wt); void get_becke_grid_sparse(double rmax, int natoms, int* atno, double* coords, int nrad, int nang, double* ang_g, double* ang_w, const int gc, double* grid, double* wt); void compute_rho(int natoms, int* atno, double* coords, vector > &basis, double* Pao, int nrad, int gsa, float* grid, double* rho, double* drho, int prl); @@ -87,8 +96,8 @@ void density_in_basis2(int natoms, int* atno, double* coords, vector > basis, double* jCA, int gs, float* grid, float* wt, double* TL, int prl); -void wf_to_grid_gh_ke_2(int natoms, int* atno, double* coords, vector > basis, int nbas, int nenv, int N, int* atm, int* bas, double* env, - double* jCA, int gs, float* grid, float* wt, double* Td, int prl); -void integrate_hole_para_gh(double* rdm, bool full_rdm, bool hfx_on, int Nc, int No, int M, int natoms, int* atno, double* coords, int gs, int gsb, vector >& basis, - double* Pao, double* Pmo, double* jCA, float* grid, float* gridb, float* wt, float* wtb, float* rho, float* vxch, int prl); + +void wf_to_grid_gh(bool divide_by_rho, bool calc_rho, int natoms, int* atno, double* coords, vector > basis, + double* Pao, double* gfao, int gs, double* grid, double* wt, + double* rho, double* gf, double* Td, double* TL, int prl); +//void wf_to_grid_gh_ke(int natoms, int* atno, double* coords, vector > basis, double* jCA, int gs, float* grid, float* wt, double* TL, int prl); +//void wf_to_grid_gh_ke_2(int natoms, int* atno, double* coords, vector > basis, int nbas, int nenv, int N, int* atm, int* bas, double* env, +// double* jCA, int gs, float* grid, float* wt, double* Td, int prl); +//void integrate_hole_para_gh(double* rdm, bool full_rdm, bool hfx_on, int Nc, int No, int M, int natoms, int* atno, double* coords, int gs, int gsb, vector >& basis, +// double* Pao, double* Pmo, double* jCA, float* grid, float* gridb, float* wt, float* wtb, float* rho, float* vxch, int prl); #endif diff --git a/include/integrals.h b/include/integrals.h index ae15936..3b1b342 100644 --- a/include/integrals.h +++ b/include/integrals.h @@ -78,7 +78,8 @@ void compute_Enp(int natoms, int* atno, float* coords, vector > & void compute_Enp_para(int ngpu, int natoms, int* atno, float* coords, vector > &basis, int nrad, int nang, double* ang_g0, double* ang_w0, double* En, double* pVp, int prl); void compute_Enp_para(int ngpu, int natoms, int* atno, float* coords, vector > &basis, int nrad, int nang, double* ang_g0, double* ang_w0, float* En, float* pVp, int prl); -//electric fields in x,y,z (centered at origin) +//electric fields in x,y,z (centered at origin) and confining potential +void compute_Exyz(double rconf, double pconf, int natoms, int* atno, double* coords, bool gbasis, vector > &basis, int nrad, int nang, double* ang_g, double* ang_w, double* S, double* E, int prl); void compute_Exyz(int natoms, int* atno, double* coords, bool gbasis, vector > &basis, int nrad, int nang, double* ang_g, double* ang_w, double* S, double* E, int prl); void compute_ST(int natoms, int* atno, float* coords, vector > &basis, int nrad, int nang, double* ang_g0, double* ang_w0, double* S, double* T, int prl); @@ -151,6 +152,7 @@ int get_imax_n2ip(int Nmax, int natoms, int N, vector >& basis, v int find_center_of_grid(float Z1, int nrad); void get_angular_grid(int size_ang, double* ang_g, double* ang_w); +void get_angular_grid(int size_ang, float* ang_g, float* ang_w); void acc_assign(int size, float* vec, float v1); void acc_assign(int size, double* vec, double v1); void acc_assign(int tid, int size, double* vec, double v1); diff --git a/src/integrals/becke.cpp b/src/integrals/becke.cpp index 39e3b0e..d489847 100644 --- a/src/integrals/becke.cpp +++ b/src/integrals/becke.cpp @@ -23,7 +23,7 @@ void print_vec(int gsa, float* grid, float* A, float* B); using namespace std; -//#pragma acc routine seq +#pragma acc routine seq int get_atom_grid(int n, int natoms, int gsa) { int f1 = gsa/natoms; @@ -37,7 +37,7 @@ int get_atom_grid(int n, int natoms, int gsa) double sigmoid(double v1) { - double val = 1./(1.f+exp(-v1)); + double val = 1./(1.+exp(-v1)); return 1. - val; } @@ -60,6 +60,8 @@ float tanhh(float a) void atomic_domain_cell_wt(const int ta, const float alpha, const float beta, const int natoms, const int gc, const int gs, double* grid1, double* wt1, int* atno, double* coords0) { + int mu_order = 3; + int gsa = gs*natoms; if (natoms==1) { @@ -112,6 +114,7 @@ void atomic_domain_cell_wt(const int ta, const float alpha, const float beta, co int nat2 = natoms*natoms; double Ra[nat2]; + for (int i=0;i0.) { @@ -221,8 +227,8 @@ void atomic_domain_cell_wt(const int ta, const float alpha, const float beta, co //"b" terms - //const double nu0 = beta*oR12; - const double nu0 = beta; + //const double nu0 = beta; + const double nu0 = 0.; if (alpha>0.) { //mu12 = r1-r2; //sigmoid not dependent on R12 @@ -246,8 +252,8 @@ void atomic_domain_cell_wt(const int ta, const float alpha, const float beta, co nu12 = mu12 + b12*omm12; nu21 = -mu12 + b21*omm12; - f12 = bf3d(nu12); - f21 = bf3d(nu21); + f12 = bf3d(mu_order,nu12); + f21 = bf3d(mu_order,nu21); } fpb[i*natoms+j] = f12; @@ -348,8 +354,9 @@ void atomic_domain_cell_wt(const int ta, const float alpha, const float beta, co } //n-center grid wts (float) -void becke_weight_nc(const int natoms, const int gc, const int gs, float* grid1, float* wt1, int* atno, float* coords) +void becke_weight_nc(const int mu_order, const int natoms, float* Rs, const int gc, const int gs, float* grid1, float* wt1, int* atno, float* coords) { + if (mu_order<1) { printf(" mu_order is zero. no Becke wts \n"); return; } if (natoms<2) return; int gsa = gs*natoms; @@ -376,8 +383,8 @@ void becke_weight_nc(const int natoms, const int gc, const int gs, float* grid1, for (int j=0;j1000) { printf("\n ERROR: static array allocation in becke_weight_ncg has a problem (natoms: %2i) \n",natoms); return; } + + int gsa = gs*natoms; + int nat2 = natoms*natoms; + + double Ra[nat2]; + double aij[nat2]; + #pragma acc enter data create(Ra[0:nat2],aij[0:nat2]) + + //atomic positions + #pragma acc parallel loop collapse(2) present(Ra[0:nat2],coords[0:3*natoms]) + for (int i=0;i1000) { printf("\n ERROR: static array allocation in becke_weight_ncg has a problem (natoms: %2i) \n",natoms); return; } + + int gsa = gs*natoms; + int nat2 = natoms*natoms; + + float Ra[nat2]; + float aij[nat2]; + #pragma acc enter data create(Ra[0:nat2],aij[0:nat2]) + + //atomic positions + #pragma acc parallel loop collapse(2) present(Ra[0:nat2],coords[0:3*natoms]) + for (int i=0;iZmax) + Zmax = atno[n]; + float* Rs = new float[Zmax+1](); + for (int m=0;mZmax) + Zmax = atno[n]; + double* Rs = new double[Zmax+1](); + for (int m=0;m > &basis, bool need_wt, int gsa, float* grid, float* wt, double* vxc, double* vxcs, double* fxc, int prl) -{ - if (!gbasis) - { printf("\n ERROR shouldn't be here in compute_fxc (Gaussian basis version) \n"); exit(-1); } - - if (prl>1) printf(" compute_fxc: Gaussian basis \n"); - - //int gsa = gs*natoms; - int gsa6 = gsa*6; - - int N = basis.size(); - int N2 = N*N; - int* n2i = new int[natoms]; - int imaxN = get_imax_n2i(natoms,N,basis,n2i); - - int iN = imaxN; - float** val1 = new float*[iN]; - float** val2 = new float*[iN]; - for (int i=0;i0) s1 = n2i[m-1]; int s2 = n2i[m]; - - float Z1 = (float)atno[m]; - double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; - - copy_grid(gsa,grid1,grid); - recenter_grid_zero(gsa,grid1,-A1,-B1,-C1); - - //#pragma acc update self(grid1[0:gsa6]) - - #pragma acc parallel loop collapse(2) present(val1[0:iN][0:gsa]) - for (int i1=0;i1 basis1 = basis[i1]; - int l1 = basis1[1]; int m1 = basis1[2]; int ng = basis1[3]; - int in = ig + ng; //index to find norm - - float* valm = val1[ii1]; - for (int j=0;j0) s3 = n2i[n-1]; int s4 = n2i[n]; - - float Z2 = (float)atno[m]; - double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; - - #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gsa]) - for (int i2=0;i2 basis2 = basis[i2]; - int l2 = basis2[1]; int m2 = basis2[2]; int ng = basis2[3]; - int in = ig + ng; //index to find norm - - float* valn = val2[ii2]; - for (int j=0;j2) - { - printf("\n fxc: \n"); - print_square(N,fxc); - } - - #pragma acc exit data delete(grid1[0:gsa6],grid2[0:gsa6]) - #pragma acc exit data delete(val0[0:gsa],val1[0:iN][0:gsa],val2[0:iN][0:gsa]) - - delete [] grid1; - delete [] grid2; - delete [] n2i; - - delete [] val0; - for (int i=0;i > &basis, bool need_wt, bool gga, double* Pao, int gs, double* grid, double* wt, double* vxc, double* vxcs, double* fxc, int prl) @@ -1387,6 +1489,10 @@ void compute_fxcd(bool gbasis, int natoms, int* atno, double* coords, vector0) s1 = n2i[m-1]; int s2 = n2i[m]; - float Z1 = (float)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gs,grid1,grid); @@ -1489,14 +1594,11 @@ void compute_fxcd(bool gbasis, int natoms, int* atno, double* coords, vector0) s3 = n2i[n-1]; int s4 = n2i[n]; - float Z2 = (float)atno[m]; + //float Z2 = (float)atno[n]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gs]) @@ -1564,14 +1667,11 @@ void compute_fxcd(bool gbasis, int natoms, int* atno, double* coords, vector0) s1 = n2i[m-1]; int s2 = n2i[m]; - float Z1 = (float)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gs,grid1,grid); @@ -1642,7 +1742,6 @@ void compute_fxcd(bool gbasis, int natoms, int* atno, double* coords, vector0) s3 = n2i[n-1]; int s4 = n2i[n]; - float Z2 = (float)atno[m]; + //float Z2 = (float)atno[n]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gs]) @@ -1924,7 +2023,6 @@ void compute_fxc(bool gbasis, int natoms, int* atno, double* coords, vector0) s1 = n2i[m-1]; int s2 = n2i[m]; - float Z1 = (float)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gs,grid1,grid); @@ -1999,7 +2097,7 @@ void compute_fxc(bool gbasis, int natoms, int* atno, double* coords, vector0) s3 = n2i[n-1]; int s4 = n2i[n]; - float Z2 = (float)atno[m]; + //float Z2 = (float)atno[n]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gs]) @@ -2088,7 +2186,6 @@ void compute_fxc(bool gbasis, int natoms, int* atno, double* coords, vector0) s1 = n2i[m-1]; int s2 = n2i[m]; - float Z1 = (float)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gs,grid1,grid); @@ -2184,7 +2281,7 @@ void compute_fxc(bool gbasis, int natoms, int* atno, double* coords, vector0) s3 = n2i[n-1]; int s4 = n2i[n]; - float Z2 = (float)atno[m]; + //float Z2 = (float)atno[n]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gs]) @@ -2695,7 +2792,6 @@ void compute_rho(bool gbasis, int natoms, int* atno, double* coords, vector0) s1 = n2i[m-1]; int s2 = n2i[m]; - float Z1 = (float)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gsa,grid1,grid); @@ -2782,7 +2878,7 @@ void compute_rho(bool gbasis, int natoms, int* atno, double* coords, vector0) s3 = n2i[n-1]; int s4 = n2i[n]; - float Z2 = (float)atno[m]; + //float Z2 = (float)atno[n]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gsa]) @@ -3024,7 +3120,6 @@ void compute_lap_hessg(bool gbasis, int natoms, int* atno, double* coords, vecto //working on this block of the matrix int s1 = 0; if (m>0) s1 = n2i[m-1]; int s2 = n2i[m]; - double Z1 = (double)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; if (lowmem) @@ -3123,7 +3218,6 @@ void compute_lap_hessg(bool gbasis, int natoms, int* atno, double* coords, vecto { int s3 = 0; if (n>0) s3 = n2i[n-1]; int s4 = n2i[n]; - double Z2 = (double)atno[m]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gsaa]) @@ -3347,7 +3441,6 @@ void compute_rhodg(bool gbasis, int natoms, int* atno, double* coords, vector0) s1 = n2i[m-1]; int s2 = n2i[m]; - double Z1 = (double)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gsa,grid1,grid); @@ -3434,7 +3527,6 @@ void compute_rhodg(bool gbasis, int natoms, int* atno, double* coords, vector0) s3 = n2i[n-1]; int s4 = n2i[n]; - double Z2 = (double)atno[m]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gsa]) @@ -3650,7 +3742,6 @@ void compute_rho(int natoms, int* atno, double* coords, vector > //working on this block of the matrix int s1 = 0; if (m>0) s1 = n2i[m-1]; int s2 = n2i[m]; - float Z1 = (float)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gsa,grid1,grid); @@ -3767,7 +3858,7 @@ void compute_rho(int natoms, int* atno, double* coords, vector > { int s3 = 0; if (n>0) s3 = n2i[n-1]; int s4 = n2i[n]; - float Z2 = (float)atno[m]; + //float Z2 = (float)atno[n]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gsa]) @@ -4033,7 +4124,6 @@ void compute_rhod(int natoms, int* atno, double* coords, vector > //working on this block of the matrix int s1 = 0; if (m>0) s1 = n2i[m-1]; int s2 = n2i[m]; - double Z1 = (double)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gsa,grid1,grid); @@ -4115,7 +4205,6 @@ void compute_rhod(int natoms, int* atno, double* coords, vector > { int s3 = 0; if (n>0) s3 = n2i[n-1]; int s4 = n2i[n]; - double Z2 = (double)atno[m]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gsa]) @@ -4368,7 +4457,6 @@ void compute_lap_hess(int natoms, int* atno, double* coords, vector0) s1 = n2i[m-1]; int s2 = n2i[m]; - double Z1 = (double)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gsa,grid1,grid); @@ -4513,7 +4601,6 @@ void compute_lap_hess(int natoms, int* atno, double* coords, vector0) s3 = n2i[n-1]; int s4 = n2i[n]; - double Z2 = (double)atno[m]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gsa]) @@ -6077,7 +6164,7 @@ void compute_fxc(int natoms, int* atno, double* coords, vector > //working on this block of the matrix int s1 = 0; if (m>0) s1 = n2i[m-1]; int s2 = n2i[m]; - float Z1 = (float)atno[m]; + //float Z1 = (float)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gs,grid1,grid); @@ -6132,7 +6219,7 @@ void compute_fxc(int natoms, int* atno, double* coords, vector > { int s3 = 0; if (n>0) s3 = n2i[n-1]; int s4 = n2i[n]; - float Z2 = (float)atno[m]; + //float Z2 = (float)atno[n]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gs]) @@ -6312,7 +6399,7 @@ void compute_fxc(int natoms, int* atno, double* coords, vector > { int s3 = 0; if (n>0) s3 = n2i[n-1]; int s4 = n2i[n]; - float Z2 = (float)atno[m]; + //float Z2 = (float)atno[n]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gs]) @@ -6578,7 +6665,7 @@ void compute_fxcd(int natoms, int* atno, double* coords, vector > //working on this block of the matrix int s1 = 0; if (m>0) s1 = n2i[m-1]; int s2 = n2i[m]; - double Z1 = (double)atno[m]; + //double Z1 = (double)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gs,grid1,grid); @@ -6633,7 +6720,6 @@ void compute_fxcd(int natoms, int* atno, double* coords, vector > { int s3 = 0; if (n>0) s3 = n2i[n-1]; int s4 = n2i[n]; - double Z2 = (double)atno[m]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gs]) @@ -6702,7 +6788,7 @@ void compute_fxcd(int natoms, int* atno, double* coords, vector > //working on this block of the matrix int s1 = 0; if (m>0) s1 = n2i[m-1]; int s2 = n2i[m]; - double Z1 = (double)atno[m]; + //double Z1 = (double)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gs,grid1,grid); @@ -6793,7 +6879,6 @@ void compute_fxcd(int natoms, int* atno, double* coords, vector > { int s3 = 0; if (n>0) s3 = n2i[n-1]; int s4 = n2i[n]; - double Z2 = (double)atno[m]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gs]) @@ -7023,7 +7108,6 @@ void compute_dft_grad(int natoms, int* atno, double* coords, vector0) s1 = n2i[m-1]; int s2 = n2i[m]; - float Z1 = (float)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gsa,grid1,grid); @@ -7099,7 +7183,7 @@ void compute_dft_grad(int natoms, int* atno, double* coords, vector0) s3 = n2i[n-1]; int s4 = n2i[n]; - float Z2 = (float)atno[m]; + //float Z2 = (float)atno[n]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gsa]) @@ -7516,3 +7600,60 @@ void test_becke_weight(int natoms, int* atno, double* coords, vector5) { printf("\n ERROR: l>6 in eval_vgh \n"); exit(-1); } + + #if 1 + double normf = 2.*PI/(2.*l+1.)*norm; + double norm1 = pow(zt,-0.5*(2.*l+3.)); + double norm2 = pow(zt,-1.-0.5*l); + double gf1 = 0.5*(2.*l+3.); + double gf2 = 1.-0.5*l; + + #pragma acc parallel loop present(grid[0:gsa6],val[0:gsa]) + for (int j=0;j >& basis, double* Pao, double* Pmo, double* jCA, float* grid, float* gridb, float* wt, float* wtb, float* rho, float* vxch, int prl) { @@ -440,216 +492,402 @@ void integrate_hole_para_gh(double* rdm, bool full_rdm, bool hfx_on, int Nc, int return; } +#endif -void wf_to_grid_gh_ke_2(int natoms, int* atno, double* coords, vector > basis, int nbas, int nenv, int N, int* atm, int* bas, double* env, - double* jCA, int gs, float* grid, float* wt, double* Td, int prl) +void wf_to_grid_gh(bool divide_by_rho, bool calc_rho, int natoms, int* atno, double* coords, vector > basis, + double* Pao, double* gfao, int gs, double* grid, double* wt, + double* rho, double* gf, double* Td, double* TL, int prl) { - if (prl>-1) printf(" compute_ke: Gaussian basis (testing) \n"); - printf("\n WARNING: updated eval_gh_ke ftns to new procedure \n"); + if (prl>1) printf(" wf_to_grid_gh \n"); int gsa = gs*natoms; int gsa3 = gsa*3; int gsa6 = gsa*6; - int N0 = basis.size(); - if (N0!=N) { printf("\n ERROR: mismatch in basis sizes \n"); exit(-1); } - //int N2 = N*N; + bool need_g = 1; + if (Td==NULL) need_g = 0; + bool need_gf = 1; + if (gf==NULL || gfao==NULL) need_gf = 0; + bool need_L = 1; + if (TL==NULL) need_L = 0; + + int N = basis.size(); + int N2 = N*N; + + if (prl>1) + { + printf("\n Pao: \n"); + print_square(N,Pao); + } + + //normalization convention + for (int j=0;j1) + #pragma acc enter data create(val0[0:gsa],val1[0:iN][0:gsa],val2[0:iN][0:gsa]) + if (need_g) { - printf("\n jCA: \n"); - print_square(N,jCA); + #pragma acc enter data create(val0p[0:gsa3],val1p[0:iN][0:gsa3],val2p[0:iN][0:gsa3]) + } + if (need_L) + { + #pragma acc enter data create(val1L[0:iN][0:gsa],val2L[0:iN][0:gsa]) } - #pragma acc parallel loop present(valmo[0:gsa]) + if (calc_rho) + #pragma acc parallel loop present(rho[0:gsa]) for (int j=0;j0) s1 = n2i[m-1]; int s2 = n2i[m]; - float Z1 = (float)atno[m]; + //double Z1 = (double)atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; copy_grid(gsa,grid1,grid); recenter_grid_zero(gsa,grid1,-A1,-B1,-C1); - //tricky due to non-constant jumps in loop - //#pragma omp parallel for - for (int i1=s1;i1 basis1 = basis[i1]; - int n1 = basis1[0]; int l1 = basis1[1]; int m1 = basis1[2]; int ng = basis1[3]; + int l1 = basis1[1]; int m1 = basis1[2]; int ng = basis1[3]; int in = ig + ng; //index to find norm - float* valm = val1[ii1]; - float* valL = val1L[ii1]; + double* valm = val1[ii1]; + double* valmp = val1p[ii1]; + double* valml = val1L[ii1]; for (int j=0;j0) s3 = n2i[n-1]; int s4 = n2i[n]; - float Z2 = (float)atno[n]; + //double Z2 = (double)atno[n]; double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; copy_grid(gsa,grid2,grid); - recenter_grid_zero(gsa,grid1,-A2,-B2,-C2); + recenter_grid_zero(gsa,grid2,-A2,-B2,-C2); + + #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gsa]) + for (int i1=0;i1 basis2 = basis[i1]; + int l2 = basis2[1]; int m2 = basis2[2]; int ng = basis2[3]; + int in = ig + ng; //index to find norm + + double* valm = val2[ii1]; + double* valmp = val2p[ii1]; + double* valml = val2L[ii1]; + for (int j=0;j0 || calc_rho) + { + double dtot = 0.; + #pragma acc parallel loop present(rho[0:gsa],wt[0:gsa]) + for (int j=0;j-2) + if (divide_by_rho) { - #pragma acc update self(valmo[0:gsa]) - printf("\n MO: \n"); - print_vec(gsa,grid,valmo); + const double den = 1.e-30; + + if (need_g) + #pragma acc parallel loop present(Td[0:gsa],rho[0:gsa]) + for (int j=0;j2 && gsa<10000) + { + #pragma acc update self(gf[0:gsa]) + printf("\n gf: \n"); + print_vec(gsa,grid,gf); + } + if (prl>2 && need_g && gsa<10000) { #pragma acc update self(Td[0:gsa]) - printf("\n ke: \n"); + printf("\n Td: \n"); print_vec(gsa,grid,Td); } + //revert normalization convention + for (int j=0;j=3 + //printf(" eval_gh_spd. inc_r: %i gs: %4i l1: %i m1: %i \n",(int)include_r,gs,l1,m1); + if (l1==0) return; if (l1==1) @@ -3706,7 +3952,7 @@ void eval_ghd(int gs, double* grid, double* val1, int l1, int m1, const double n double y = grid[6*i+1]; double x2 = x*x; double y2 = y*y; - val1[i] *= -(5.*x2*x2 - 10.*x2*y2 + y2*y2)*y; + val1[i] *= (5.*x2*x2 - 10.*x2*y2 + y2*y2)*y; //changed sign } } else if (m1==-4) //h (y2 - x2)*x*y*z @@ -3719,7 +3965,7 @@ void eval_ghd(int gs, double* grid, double* val1, int l1, int m1, const double n double z = grid[6*i+2]; double x2 = x*x; double y2 = y*y; - val1[i] *= (y2 - x2)*x*y*z; + val1[i] *= -(y2 - x2)*x*y*z; //changed sign } } //ordering differed (lower should be +5) @@ -3736,7 +3982,7 @@ void eval_ghd(int gs, double* grid, double* val1, int l1, int m1, const double n double y2 = y*y; double z2 = z*z; double r2 = r*r; - val1[i] *= y * (r2*(3.*x2-y2) + 9.*z2*(y2-3.*x2)); + val1[i] *= -y * (r2*(3.*x2-y2) + 9.*z2*(y2-3.*x2)); //changed sign //val1[i] *= (x2*x2 - 10.*x2*y2 + 5.*y2*y2)*x; } } @@ -3769,7 +4015,7 @@ void eval_ghd(int gs, double* grid, double* val1, int l1, int m1, const double n double r = grid[6*i+3]; double z2 = z*z; double r2 = r*r; - val1[i] *= y * (14.*r2*z2 - 21.*z2*z2 - r2*r2); + val1[i] *= -y * (14.*r2*z2 - 21.*z2*z2 - r2*r2); //changed sign //val1[i] *= z * (x2*x2 - 6.*x2*y2 + y2*y2); } } @@ -3798,7 +4044,7 @@ void eval_ghd(int gs, double* grid, double* val1, int l1, int m1, const double n double r = grid[6*i+3]; double z2 = z*z; double r2 = r*r; - val1[i] *= x*(-r2*r2 + 14.*r2*z2 - 21.*z2*z2); + val1[i] *= -x*(-r2*r2 + 14.*r2*z2 - 21.*z2*z2); //changed sign //val1[i] *= x * (x2-3.*y2)*(x2 + y2 - 8.*z2); } } @@ -3814,7 +4060,7 @@ void eval_ghd(int gs, double* grid, double* val1, int l1, int m1, const double n double r = grid[6*i+3]; double z2 = z*z; double r2 = r*r; - val1[i] *= z * (r2 - 3.*z2)*(x-y)*(x+y); + val1[i] *= -z * (r2 - 3.*z2)*(x-y)*(x+y); //changed sign //val1[i] *= y * (x2*x2 + y2*y2 - 12.*y2*z2 + 8.*z2*z2 + 2.*x2 * (y2-6.*z2)); } } @@ -3832,7 +4078,7 @@ void eval_ghd(int gs, double* grid, double* val1, int l1, int m1, const double n double y2 = y*y; double z2 = z*z; double r2 = r*r; - val1[i] *= x * (r2*(x2-3.*y2) + 9.*z2*(3.*y2-x2)); + val1[i] *= -x * (r2*(x2-3.*y2) + 9.*z2*(3.*y2-x2)); //changed sign //val1[i] *= (x2 - y2) * (x2 + y2 - 2.*z2) * z; } } @@ -3861,7 +4107,7 @@ void eval_ghd(int gs, double* grid, double* val1, int l1, int m1, const double n double y = grid[6*i+1]; double x2 = x*x; double y2 = y*y; - val1[i] *= x * (10.*x2*y2 - x2*x2 - 5.*y2*y2); + val1[i] *= -x * (10.*x2*y2 - x2*x2 - 5.*y2*y2); //changed sign //val1[i] *= x * (x2*x2 + y2*y2 - 12.*y2*z2 + 8.*z2*z2 + 2.*x2* (y2 - 6.*z2)); } } @@ -4079,5 +4325,35 @@ void eval_ghd(int gs, double* grid, double* val1, int l1, int m1, const double n } } + if (include_r) + { + #pragma acc parallel loop present(grid[0:6*gs],val1[0:gs]) + for (int i=0;i > & return; } -void reduce_Exyz(int i1, int i2, int N, int gs, double* val1m, double* val2m, double* grid1m, double A1, double B1, double C1, double* E) +void reduce_Exyz(int i1, int i2, int N, int gs, double* val1m, double* val2m, double* grid1m, double A1, double B1, double C1, double rconf, double pconf, double* E) { int gs6 = 6*gs; int N2 = N*N; + const double a = 11.; //33, 66 also viable? + double valx = 0.; double valy = 0.; double valz = 0.; #pragma acc parallel loop present(val1m[0:gs],val2m[0:gs],grid1m[0:gs6]) reduction(+:valx,valy,valz) for (int j=0;jrconf) { xs += pconf*(r-rconf); ys += pconf*(r-rconf); zs += pconf*(r-rconf); } + + valx += val1m[j]*val2m[j]*xs; + valy += val1m[j]*val2m[j]*ys; + valz += val1m[j]*val2m[j]*zs; } #pragma acc serial present(E[0:3*N2]) @@ -4265,11 +4276,13 @@ void reduce_Exyz(int i1, int i2, int N, int gs, double* val1m, double* val2m, do return; } -void reduce_Exyz_2(int i1, int i2, int N, int gs, double* val1m, double* val1n, double* val2m, double* val2n, double* grid1m, double* grid2n, double A1, double B1, double C1, double A2, double B2, double C2, double* E) +void reduce_Exyz_2(int i1, int i2, int N, int gs, double* val1m, double* val1n, double* val2m, double* val2n, double* grid1m, double* grid2n, double A1, double B1, double C1, double A2, double B2, double C2, double rconf, double pconf, double* E) { int gs6 = 6*gs; int N2 = N*N; + const double a = 11.; + double valx = 0.; double valy = 0.; double valz = 0.; #pragma acc parallel loop present(val1m[0:gs],val1n[0:gs],val2m[0:gs],val2n[0:gs],grid1m[0:gs6],grid2n[0:gs6]) reduction(+:valx,valy,valz) for (int j=0;jrconf) { xs1 += pconf*(r1-rconf); ys1 += pconf*(r1-rconf); zs1 += pconf*(r1-rconf); } + if (r2>rconf) { xs2 += pconf*(r2-rconf); ys2 += pconf*(r2-rconf); zs2 += pconf*(r2-rconf); } + + valx += val1m[j]*val2m[j]*xs1 + val1n[j]*val2n[j]*xs2; + valy += val1m[j]*val2m[j]*ys1 + val1n[j]*val2n[j]*ys2; + valz += val1m[j]*val2m[j]*zs1 + val1n[j]*val2n[j]*zs2; } #pragma acc serial present(E[0:N2]) @@ -4297,9 +4324,9 @@ void reduce_Exyz_2(int i1, int i2, int N, int gs, double* val1m, double* val1n, //applied electric fields // x,y,z directions only // could expand this to include higher order terms -void compute_Exyz(int natoms, int* atno, double* coords, bool gbasis, vector > &basis, int nrad, int nang, double* ang_g, double* ang_w, double* S, double* E, int prl) +void compute_Exyz(double rconf, double pconf, int natoms, int* atno, double* coords, bool gbasis, vector > &basis, int nrad, int nang, double* ang_g, double* ang_w, double* S, double* E, int prl) { - if (prl>1) printf(" beginning compute_E (double precision) \n"); + if (prl>1) printf(" beginning compute_E (double precision). rpconfine: %8.5f %8.5f \n",rconf,pconf); int N = basis.size(); int N2 = N*N; @@ -4417,7 +4444,7 @@ void compute_Exyz(int natoms, int* atno, double* coords, bool gbasis, vector > &basis, int nrad, int nang, double* ang_g, double* ang_w, double* S, double* E, int prl) +{ + return compute_Exyz(0,0,natoms,atno,coords,gbasis,basis,nrad,nang,ang_g,ang_w,S,E,prl); +} + //high-precision overlap integrals void compute_Sd(int natoms, int* atno, float* coords, vector > &basis, int nrad, int nang, double* ang_g, double* ang_w, double* S, int prl) { diff --git a/src/integrals/integrals_aux.cpp b/src/integrals/integrals_aux.cpp index 63320b9..d337609 100644 --- a/src/integrals/integrals_aux.cpp +++ b/src/integrals/integrals_aux.cpp @@ -609,6 +609,24 @@ void get_angular_grid(int size_ang, double* ang_g, double* ang_w) return; } +void get_angular_grid(int size_ang, float* ang_g, float* ang_w) +{ + double* ang_gd = new double[3*size_ang]; + double* ang_wd = new double[size_ang]; + + get_angular_grid(size_ang,ang_gd,ang_wd); + + for (int j=0;j<3*size_ang;j++) + ang_g[j] = ang_gd[j]; + for (int j=0;j > setup_integrals_gsgpu(vector >& basis_aux env = inp.get_env(); map< int, basis_t > basmap = inp.basmap; + map< int, basis_t > basmap_ri = inp.basmap_ri; vector > basis; for (int n=0;n > setup_integrals_gsgpu(vector >& basis_aux } } - //just to make basis_aux's size correct basis_aux.clear(); - for (int i=0;i b1; b1.push_back(1); - basis_aux.push_back(b1); + int i1 = atno[n]; + basis_t basis1 = basmap_ri[i1]; + int Z = basis1.nuc; + int nshell = basis1.shells.size(); + + for (int j=0;j b1; for (int p=0;p<10;p++) b1.push_back(0); + b1[0] = l1+1; b1[1] = l1; b1[2] = m; + //using b1[3]==b1[4] to store # of gaussians + b1[3] = b1[4] = size1; + b1[5] = coords[3*n+0]; b1[6] = coords[3*n+1]; b1[7] = coords[3*n+2]; + b1[8] = Z; b1[9] = n; + + //zeta then normalization coeffs + for (int k=0;k0) norm1 *= norm_sh(l1,m)/n0; + b1.push_back(norm1); + } + + basis_aux.push_back(b1); + } + } } return basis; diff --git a/src/libcintw/cintwrapper.cpp b/src/libcintw/cintwrapper.cpp index db8ddd8..d845257 100644 --- a/src/libcintw/cintwrapper.cpp +++ b/src/libcintw/cintwrapper.cpp @@ -56,12 +56,14 @@ void get_vri(double** val, int gs, int N, printf(" using ri basis in get_vri \n"); } + /* //CPMZ something is off about the cache size estimate int cache_size = int1e_grids_sph(NULL, NULL, shls, atm, natm, bas, nbas+nbas_ri, env, NULL, NULL); //printf(" cache_size: %4i \n",cache_size); cache_size += gs; cache_size *= 10; //double* cache = new double[cache_size](); + */ //CPMZ just let libcint allocate for itself double* cache = NULL; @@ -271,10 +273,13 @@ void get_hcore(double *hcore, double *En, double *T, int N, int di, dj; int shls[2]; - double *tmp = new double[N * N]; + int N2 = N*N; + double *tmp = new double[N2]; - if (BT::DO_CART) { - for (int i = 0; i < nbas; i++) { + if (BT::DO_CART) + { + for (int i = 0; i < nbas; i++) + { int idx_j = 0; shls[0] = i; di = CINTcgto_cart(i, bas); @@ -297,12 +302,12 @@ void get_hcore(double *hcore, double *En, double *T, int N, idx_i += di; }// for i - for (int i = 0; i < N * N; i++) { + for (int i = 0; i < N2; i++) hcore[i] = tmp[i]; - } idx_i = 0; - for (int i = 0; i < nbas; i++) { + for (int i = 0; i < nbas; i++) + { int idx_j = 0; shls[0] = i; di = CINTcgto_cart(i, bas); @@ -325,8 +330,10 @@ void get_hcore(double *hcore, double *En, double *T, int N, idx_i += di; }// for i } // if BT::DO_CART - else { - for (int i = 0; i < nbas; i++) { + else + { + for (int i = 0; i < nbas; i++) + { int idx_j = 0; shls[0] = i; di = CINTcgto_spheric(i, bas); @@ -350,15 +357,14 @@ void get_hcore(double *hcore, double *En, double *T, int N, }// for i if (En!=NULL) - for (int i = 0; i < N * N; i++) { + for (int i = 0; i < N2; i++) En[i] = tmp[i]; - } - for (int i = 0; i < N * N; i++) { + for (int i = 0; i < N2; i++) hcore[i] = tmp[i]; - } idx_i = 0; - for (int i = 0; i < nbas; i++) { + for (int i = 0; i < nbas; i++) + { int idx_j = 0; shls[0] = i; di = CINTcgto_spheric(i, bas); @@ -383,19 +389,18 @@ void get_hcore(double *hcore, double *En, double *T, int N, } if (T!=NULL) - for (int i = 0; i < N * N; i++) { + for (int i = 0; i < N2; i++) T[i] = tmp[i]; - } - for (int i = 0; i < N * N; i++) { + for (int i = 0; i < N2; i++) hcore[i] += tmp[i]; - } delete [] tmp; } void get_hcore(double *hcore, int N, int natm, int nbas, int nenv, - int *atm, int *bas, double *env) { + int *atm, int *bas, double *env) +{ return get_hcore(hcore,NULL,NULL,N,natm,nbas,nenv,atm,bas,env); } @@ -409,8 +414,8 @@ void get_tcore(double *tcore, int N, int N2 = N*N; double *tmp = new double[N2]; - if (BT::DO_CART) { - + if (BT::DO_CART) + { idx_i = 0; for (int i = 0; i < nbas; i++) { int idx_j = 0; @@ -435,7 +440,8 @@ void get_tcore(double *tcore, int N, idx_i += di; }// for i } // if BT::DO_CART - else { + else + { idx_i = 0; for (int i = 0; i < nbas; i++) { int idx_j = 0; @@ -544,12 +550,320 @@ void gen_pvp(double *pvp, int N, pvp[i] = tmp[i]; } - delete [] tmp; + delete [] tmp; +} + +void gen_4c_overlap_m4(double* ovlp4, size_t N, + int natm, int nbas, int nenv, + int* atm, int* bas, double* env) +{ + size_t N2 = N*N; + size_t Npairs = N*(N+1)/2; + CINTOpt *no_opt = NULL; + + #define USE_PARA_4C 1 + + #if USE_PARA_4C + //when j==i, parallel execution could write to same place at the same time + int nomp = 1; + #pragma omp parallel + nomp = omp_get_num_threads(); + if (nbas Date: Wed, 13 May 2026 15:57:06 -0400 Subject: [PATCH 2/4] compiler directive for 4c ints --- src/libcintw/cintwrapper.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/libcintw/cintwrapper.cpp b/src/libcintw/cintwrapper.cpp index d845257..9072d6f 100644 --- a/src/libcintw/cintwrapper.cpp +++ b/src/libcintw/cintwrapper.cpp @@ -557,6 +557,7 @@ void gen_4c_overlap_m4(double* ovlp4, size_t N, int natm, int nbas, int nenv, int* atm, int* bas, double* env) { + #ifdef COMPILE_CINTW_4C size_t N2 = N*N; size_t Npairs = N*(N+1)/2; CINTOpt *no_opt = NULL; @@ -738,7 +739,12 @@ void gen_4c_overlap_m4(double* ovlp4, size_t N, #if USE_PARA_4C delete [] shell_offset; #endif - + + #else + printf("\n\n\n ERROR: this SlaterGPU is not set up for 4c center integrals gbasis \n"); + exit(-1); + #endif + return; } @@ -747,6 +753,7 @@ void gen_4c_overlap(double* ovlp4, size_t N, int natm, int nbas, int nenv, int* atm, int* bas, double* env) { + #ifdef COMPILE_CINTW_4C size_t N2 = N*N; size_t idxi = 0, idxj = 0, idxk = 0, idxl = 0; size_t di, dj, dk, dl; @@ -857,6 +864,10 @@ void gen_4c_overlap(double* ovlp4, size_t N, delete [] buf; + #else + printf("\n\n\n ERROR: this SlaterGPU is not set up for 4c center integrals gbasis \n"); + exit(-1); + #endif return; } From 6f90ff37009f35da65906e7d8a808c6bf6578afe Mon Sep 17 00:00:00 2001 From: Joshua Kammeraad Date: Thu, 14 May 2026 11:26:27 -0700 Subject: [PATCH 3/4] add COMPILE_CINTW_4C build option Adds a CMake variable that, when ON, defines the COMPILE_CINTW_4C preprocessor macro for the cintw target. This macro gates the bodies of gen_4c_overlap_m4 and gen_4c_overlap in cintwrapper.cpp, which require libcint's cint4c1e_* functions (compiled only with a non-default flag and affected by the known bug in issue #82). Defaults to OFF. Mirrors the existing USE_ACC env-var pattern so users can opt in via: export COMPILE_CINTW_4C=ON Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 6 ++++++ src/libcintw/CMakeLists.txt | 3 +++ 2 files changed, 9 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b6fc849..64aa9dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,6 +32,12 @@ endif() set(DO_GTO ${DO_GTO}) +set(COMPILE_CINTW_4C OFF) +if("$ENV{COMPILE_CINTW_4C}" STREQUAL "ON") + set(COMPILE_CINTW_4C ON) +endif() +message(STATUS "COMPILE_CINTW_4C: ${COMPILE_CINTW_4C}") + if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() diff --git a/src/libcintw/CMakeLists.txt b/src/libcintw/CMakeLists.txt index defb754..ae64cfa 100644 --- a/src/libcintw/CMakeLists.txt +++ b/src/libcintw/CMakeLists.txt @@ -18,6 +18,9 @@ set(CINTW_COMP_FLAGS ${CMAKE_CXX_FLAGS} add_definitions("-w") target_compile_options(cintw PRIVATE ${CINTW_COMP_FLAGS}) +if(COMPILE_CINTW_4C) + target_compile_definitions(cintw PRIVATE COMPILE_CINTW_4C) +endif() target_include_directories(cintw PRIVATE "${CMAKE_SOURCE_DIR}/include" "${LIBCINT_PATH}/include" From f7afd4b9b717ae83adeef8752fb15cba90bc6d85 Mon Sep 17 00:00:00 2001 From: Joshua Kammeraad Date: Thu, 14 May 2026 11:27:43 -0700 Subject: [PATCH 4/4] fix indent on 4c integral compile guard The #ifdef COMPILE_CINTW_4C inside gen_4c_overlap (cintwrapper.cpp:756) was 1-space indented; the matching one inside gen_4c_overlap_m4 and both #else / #endif lines are 2-space. Normalize to match. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/libcintw/cintwrapper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libcintw/cintwrapper.cpp b/src/libcintw/cintwrapper.cpp index 9072d6f..1a5f565 100644 --- a/src/libcintw/cintwrapper.cpp +++ b/src/libcintw/cintwrapper.cpp @@ -753,7 +753,7 @@ void gen_4c_overlap(double* ovlp4, size_t N, int natm, int nbas, int nenv, int* atm, int* bas, double* env) { - #ifdef COMPILE_CINTW_4C + #ifdef COMPILE_CINTW_4C size_t N2 = N*N; size_t idxi = 0, idxj = 0, idxk = 0, idxl = 0; size_t di, dj, dk, dl;