BattModels · mkphuthi · Jun 8, 2023
diff --git a/benchmarks/allegro_1-gpu/allegro.def b/benchmarks/allegro_1-gpu/allegro.def
@@ -0,0 +1,93 @@
+Bootstrap: docker
+From: oraclelinux:8
+
+%post
+    # CUDA
+    dnf install -y git
+    yum -y install cmake
+    export distro=rhel8
+    export arch=x86_64
+    dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$distro/$arch/cuda-$distro.repo
+    dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$distro/$arch/cuda-$distro.repo
+    dnf clean all
+    dnf install -y cuda-11-3.x86_64
+    export PATH=/usr/local/cuda-11.3/bin/:$PATH
+    export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64/:$LD_LIBRARY_PATH
+
+    # CUDA-aware OpenMPI
+    dnf install -y wget
+    wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz
+    gunzip -c openmpi-4.1.5.tar.gz | tar xf -
+    cd openmpi-4.1.5
+    ./configure --with-cuda=/usr/local/cuda-11.3 --prefix=/usr/local
+    make -j4 install
+    cd ../
+
+    # Conda
+    dnf install -y unzip
+    dnf install -y wget
+    mkdir -p opt/conda
+    wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh
+    bash ./Miniconda3-py39_4.12.0-Linux-x86_64.sh -b -p /opt/miniconda3
+    /opt/miniconda3/bin/conda init bash
+    source ~/.bashrc
+
+    # Nequip
+    conda install -y python==3.9
+    conda install -y pytorch==1.11 torchvision torchaudio cudatoolkit==11.3.1 -c pytorch
+    conda install -y pyg -c pyg
+    conda install -y cudnn mkl-include
+    pip install wandb
+    git clone https://github.com/mir-group/nequip.git
+    cd nequip
+    git checkout develop
+    pip install .
+    cd ../
+    git clone https://github.com/mir-group/allegro.git
+    cd allegro
+    git checkout develop
+    pip install .
+    cd ../
+
+    # Pair-nequip and pair-allegro
+    wget https://github.com/lammps/lammps/archive/refs/tags/stable_29Sep2021.tar.gz
+    gunzip stable_29Sep2021.tar.gz
+    tar -xvf stable_29Sep2021.tar
+    git clone https://github.com/mir-group/pair_nequip.git
+    cd pair_nequip
+    ./patch_lammps.sh ../lammps-stable_29Sep2021/
+    cd ../
+    git clone https://github.com/mir-group/pair_allegro.git
+    cd pair_allegro
+    ./patch_lammps.sh ../lammps-stable_29Sep2021/
+    cd ../
+
+    export CUDNN_ROOT="/opt/miniconda3/"
+    wget https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.0%2Bcu113.zip
+    unzip libtorch-cxx11-abi-shared-with-deps-1.12.0+cu113.zip
+    mkdir ~/install
+    mv libtorch ~/install/
+    export LD_LIBRARY_PATH=~/install/libtorch/lib/:$LD_LIBRARY_PATH
+
+    # LAMMPS
+    cd lammps-stable_29Sep2021
+    mkdir allegro-build && cd allegro-build
+    cmake ../cmake -DCMAKE_PREFIX_PATH=~/install/libtorch -DCMAKE_INSTALL_PREFIX=~/install/nequip-lammps -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ -DPKG_KOKKOS=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_HOPPER90=ON -DMKL_INCLUDE_DIR="$CONDA_PREFIX/include"
+    make -j4
+    make install
+    cd ../
+
+    export PATH=/home/opc/install/nequip-lammps/bin:$PATH
+
+%environment
+    export LC_ALL=C
+    export PATH=/usr/games:$PATH
+    export BASH_ENV=/opt/etc/bashrc
+    export LD_LIBRARY_PATH=~/install/libtorch/lib/:$LD_LIBRARY_PATH
+    export PATH=/home/opc/install/nequip-lammps/bin:$PATH
+    export CUDNN_ROOT="/opt/miniconda3/"
+    export PATH=/usr/local/cuda-11.3/bin/:$PATH
+    export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64/:$LD_LIBRARY_PATH
+
+%runscript
+    fortune | cowsay | lolcat
diff --git a/benchmarks/allegro_1-gpu/allegro_li.lammps b/benchmarks/allegro_1-gpu/allegro_li.lammps
@@ -0,0 +1,35 @@
+
+variable nsteps equal 1000               # equilibration time
+variable nthermo equal 5           # interval for thermo output
+
+timestep        0.002000
+units           metal
+atom_style      atomic
+
+neighbor        1.0 bin
+boundary        p p p
+
+lattice 	bcc 3.5
+region	box block 0 1 0 1 0 1 units lattice
+create_box	1 box
+
+lattice	bcc 3.5 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1  
+create_atoms 1 box
+replicate 10 10 10
+
+mass            1 6.94
+
+newton on
+pair_style      allegro
+pair_coeff     * * li_f32_allegro_model.pth Li
+
+thermo_style    custom step temp pe ke etotal press vol
+thermo          ${nthermo}
+thermo_modify   norm no
+
+# dump            1 all custom ${nthermo} equil_npt_T100.0_dump.lammpstrj id type element x y z vx vy vz fx fy fz
+# dump_modify     1 element Li
+
+fix             1 all npt temp 10 100.0 0.1 iso 0 0 0.5
+run             ${nsteps}
+unfix           1
diff --git a/benchmarks/allegro_1-gpu/init_atoms.lmpdat b/benchmarks/allegro_1-gpu/init_atoms.lmpdat
diff --git a/benchmarks/allegro_1-gpu/li_f32_allegro_model.pth b/benchmarks/allegro_1-gpu/li_f32_allegro_model.pth
diff --git a/benchmarks/allegro_1-gpu/run.sh b/benchmarks/allegro_1-gpu/run.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/bash
+
+# This script assumes the singularity container "allegro.sif" is in the
+# same directory and that the host has up-to-date CUDA drivers
+
+# Run on 1 node with 1 GPU
+TOTAL_GPUS=1
+GPUS_PER_NODE=1
+singularity exec allegro.sif mpirun -np ${TOTAL_GPUS} lmp -sf kk -k on g ${GPUS_PER_NODE} -pk kokkos newton on neigh full -in allegro_li.lammps
diff --git a/benchmarks/allegro_4-gpu/allegro.def b/benchmarks/allegro_4-gpu/allegro.def
@@ -0,0 +1,93 @@
+Bootstrap: docker
+From: oraclelinux:8
+
+%post
+    # CUDA
+    dnf install -y git
+    yum -y install cmake
+    export distro=rhel8
+    export arch=x86_64
+    dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$distro/$arch/cuda-$distro.repo
+    dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$distro/$arch/cuda-$distro.repo
+    dnf clean all
+    dnf install -y cuda-11-3.x86_64
+    export PATH=/usr/local/cuda-11.3/bin/:$PATH
+    export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64/:$LD_LIBRARY_PATH
+
+    # CUDA-aware OpenMPI
+    dnf install -y wget
+    wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz
+    gunzip -c openmpi-4.1.5.tar.gz | tar xf -
+    cd openmpi-4.1.5
+    ./configure --with-cuda=/usr/local/cuda-11.3 --prefix=/usr/local
+    make -j4 install
+    cd ../
+
+    # Conda
+    dnf install -y unzip
+    dnf install -y wget
+    mkdir -p opt/conda
+    wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh
+    bash ./Miniconda3-py39_4.12.0-Linux-x86_64.sh -b -p /opt/miniconda3
+    /opt/miniconda3/bin/conda init bash
+    source ~/.bashrc
+
+    # Nequip
+    conda install -y python==3.9
+    conda install -y pytorch==1.11 torchvision torchaudio cudatoolkit==11.3.1 -c pytorch
+    conda install -y pyg -c pyg
+    conda install -y cudnn mkl-include
+    pip install wandb
+    git clone https://github.com/mir-group/nequip.git
+    cd nequip
+    git checkout develop
+    pip install .
+    cd ../
+    git clone https://github.com/mir-group/allegro.git
+    cd allegro
+    git checkout develop
+    pip install .
+    cd ../
+
+    # Pair-nequip and pair-allegro
+    wget https://github.com/lammps/lammps/archive/refs/tags/stable_29Sep2021.tar.gz
+    gunzip stable_29Sep2021.tar.gz
+    tar -xvf stable_29Sep2021.tar
+    git clone https://github.com/mir-group/pair_nequip.git
+    cd pair_nequip
+    ./patch_lammps.sh ../lammps-stable_29Sep2021/
+    cd ../
+    git clone https://github.com/mir-group/pair_allegro.git
+    cd pair_allegro
+    ./patch_lammps.sh ../lammps-stable_29Sep2021/
+    cd ../
+
+    export CUDNN_ROOT="/opt/miniconda3/"
+    wget https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.0%2Bcu113.zip
+    unzip libtorch-cxx11-abi-shared-with-deps-1.12.0+cu113.zip
+    mkdir ~/install
+    mv libtorch ~/install/
+    export LD_LIBRARY_PATH=~/install/libtorch/lib/:$LD_LIBRARY_PATH
+
+    # LAMMPS
+    cd lammps-stable_29Sep2021
+    mkdir allegro-build && cd allegro-build
+    cmake ../cmake -DCMAKE_PREFIX_PATH=~/install/libtorch -DCMAKE_INSTALL_PREFIX=~/install/nequip-lammps -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ -DPKG_KOKKOS=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_HOPPER90=ON -DMKL_INCLUDE_DIR="$CONDA_PREFIX/include"
+    make -j4
+    make install
+    cd ../
+
+    export PATH=/home/opc/install/nequip-lammps/bin:$PATH
+
+%environment
+    export LC_ALL=C
+    export PATH=/usr/games:$PATH
+    export BASH_ENV=/opt/etc/bashrc
+    export LD_LIBRARY_PATH=~/install/libtorch/lib/:$LD_LIBRARY_PATH
+    export PATH=/home/opc/install/nequip-lammps/bin:$PATH
+    export CUDNN_ROOT="/opt/miniconda3/"
+    export PATH=/usr/local/cuda-11.3/bin/:$PATH
+    export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64/:$LD_LIBRARY_PATH
+
+%runscript
+    fortune | cowsay | lolcat
diff --git a/benchmarks/allegro_4-gpu/allegro_li.lammps b/benchmarks/allegro_4-gpu/allegro_li.lammps
@@ -0,0 +1,35 @@
+
+variable nsteps equal 1000               # equilibration time
+variable nthermo equal 5           # interval for thermo output
+
+timestep        0.002000
+units           metal
+atom_style      atomic
+
+neighbor        1.0 bin
+boundary        p p p
+
+lattice 	bcc 3.5
+region	box block 0 1 0 1 0 1 units lattice
+create_box	1 box
+
+lattice	bcc 3.5 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1  
+create_atoms 1 box
+replicate 30 30 30
+
+mass            1 6.94
+
+newton on
+pair_style      allegro
+pair_coeff     * * li_f32_allegro_model.pth Li
+
+thermo_style    custom step temp pe ke etotal press vol
+thermo          ${nthermo}
+thermo_modify   norm no
+
+# dump            1 all custom ${nthermo} equil_npt_T100.0_dump.lammpstrj id type element x y z vx vy vz fx fy fz
+# dump_modify     1 element Li
+
+fix             1 all npt temp 10 100.0 0.1 iso 0 0 0.5
+run             ${nsteps}
+unfix           1
diff --git a/benchmarks/allegro_4-gpu/get_lammps_ns_per_day.py b/benchmarks/allegro_4-gpu/get_lammps_ns_per_day.py
@@ -0,0 +1,10 @@
+with open('log.lammps', 'r') as f:
+    lines = f.readlines()
+
+for line in lines:
+    if line.startswith('Performance'):
+        ns_per_day=float(line.split(' ')[1])
+        break
+
+print(f'score: {ns_per_day}')
+