Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lammps polaris #35

Merged
merged 10 commits into from
Feb 27, 2024
Merged
3 changes: 3 additions & 0 deletions component-tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Tuning Individual Components of MOFA

Evaluate each part of our workflow in isolation from the rest of the application.
3 changes: 3 additions & 0 deletions component-tests/lammps-md/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Test LAMMPS MD

Run MD simulations using LAMMPS to establish tradeoffs between run length, execution time, and measured volume change.
69 changes: 69 additions & 0 deletions component-tests/lammps-md/example-mofs.json

Large diffs are not rendered by default.

129 changes: 129 additions & 0 deletions component-tests/lammps-md/run_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""Test LAMMPS by running a large number of MD simulations with different runtimes"""
from platform import node
import argparse
import json

from tqdm import tqdm
from ase import Atoms
import parsl
from parsl.config import Config
from parsl.app.python import PythonApp
from parsl.executors import HighThroughputExecutor
from parsl.providers import PBSProProvider
from parsl.launchers import MpiExecLauncher

from mofa.model import MOFRecord
from mofa.scoring.geometry import LatticeParameterChange
from mofa.utils.conversions import write_to_string


def test_function(mof: MOFRecord, lammps_invocation: list[str], timesteps: int) -> tuple[float, list[Atoms]]:
"""Run a LAMMPS simulation, report runtime and resultant traj

Args:
strc: MOF to use
lammps_invocation: Command to invoke LAMMPS
timesteps: Number of MD time steps
Returns:
- Runtime (s)
- MD trajectory
"""
from mofa.simulation.lammps import LAMMPSRunner
from time import perf_counter
from pathlib import Path

run_dir = Path(f'run-{timesteps}')
run_dir.mkdir(exist_ok=True, parents=True)

# Run
lmp_runner = LAMMPSRunner(lammps_invocation, lmp_sims_root_path=str(run_dir))
start_time = perf_counter()
output = lmp_runner.run_molecular_dynamics(mof, timesteps, timesteps // 5)
run_time = perf_counter() - start_time

return run_time, output


if __name__ == "__main__":
# Get the length of the runs, etc
parser = argparse.ArgumentParser()
parser.add_argument('--timesteps', help='Number of timesteps to run', default=1000, type=int)
parser.add_argument('--config', help='Which compute configuration to use', default='local')
args = parser.parse_args()

# Select the correct configuraion
if args.config == "local":
lammps_cmd = ['/home/lward/Software/lammps-2Aug2023/build/lmp', '-sf', 'omp']
config = Config(executors=[HighThroughputExecutor(max_workers=1, cpu_affinity='block')])
elif args.config == "polaris":
lammps_cmd = ('/lus/eagle/projects/ExaMol/mofa/lammps-2Aug2023/src/lmp_polaris_nvhpc_kokkos '
'-k on g 1 -sf kk -pk kokkos neigh half neigh/qeq full newton on ').split()
config = Config(retries=1, executors=[
HighThroughputExecutor(
max_workers=4,
cpu_affinity='block-reverse',
available_accelerators=4,
provider=PBSProProvider(
launcher=MpiExecLauncher(bind_cmd="--cpu-bind", overrides="--depth=64 --ppn 1"),
account='ExaMol',
queue='debug',
select_options="ngpus=4",
scheduler_options="#PBS -l filesystems=home:eagle",
worker_init="""
module load kokkos
module load nvhpc/23.3
module list
source activate /lus/eagle/projects/ExaMol/mofa/mof-generation-at-scale/env-polaris

cd $PBS_O_WORKDIR
pwd
which python
hostname
""",
nodes_per_block=1,
init_blocks=1,
min_blocks=0,
max_blocks=1,
cpus_per_node=32,
walltime="1:00:00",
)
)
])
else:
raise ValueError(f'Configuration not defined: {args.config}')

# Prepare parsl
parsl.load(config)
test_app = PythonApp(test_function)

# Submit each MOF
futures = []
with open('example-mofs.json') as fp:
for line in fp:
mof = MOFRecord(**json.loads(line))
future = test_app(mof, lammps_cmd, args.timesteps)
future.mof = mof
futures.append(future)

# Store results
scorer = LatticeParameterChange()
for future in tqdm(futures):
runtime, traj = future.result()

# Get the strain
# TODO (wardlt): Simplify how we compute strain
traj_vasp = [write_to_string(t, 'vasp') for t in traj]
mof = future.mof
mof.md_trajectory['uff'] = traj_vasp
strain = scorer.score_mof(mof)

# Store the result
with open('runtimes.json', 'a') as fp:
print(json.dumps({
'host': node(),
'lammps_cmd': lammps_cmd,
'timesteps': args.timesteps,
'mof': mof.name,
'runtime': runtime,
'strain': strain
}), file=fp)
23 changes: 23 additions & 0 deletions envs/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,24 @@
Environment files for different resources

## Building LAMMPS on Polaris

Follow the instructions [from ALCF for building LAMMPS with Kokkos](https://github.com/argonne-lcf/GettingStarted/tree/master/Applications/Polaris/LAMMPS)

```bash
#! /bin/bash

# Make the build environment
module use /soft/modulefiles
module load cudatoolkit-standalone/11.8.0
module load kokkos
module list

# Build
cd src
cd MAKE/MACHINES/
wget -c https://github.com/argonne-lcf/GettingStarted/raw/master/Applications/Polaris/LAMMPS/Makefile.polaris_nvhpc_kokkos
cd ../..
pwd
#make yes-most
make polaris_nvhpc_kokkos -j 16
```
3 changes: 3 additions & 0 deletions mofa/simulation/disable_mps_polaris.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash -l

echo quit | nvidia-cuda-mps-control
6 changes: 6 additions & 0 deletions mofa/simulation/enable_mps_polaris.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash -l

export CUDA_MPS_PIPE_DIRECTORY=/tmp/nvidia-mps
export CUDA_MPS_LOG_DIRECTORY=/tmp/nvidia-log
CUDA_VISIBLE_DEVICES=0,1,2,3 nvidia-cuda-mps-control -d
echo "start_server -uid $( id -u )" | nvidia-cuda-mps-control
89 changes: 89 additions & 0 deletions mofa/simulation/lammps-polaris-compile.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
module purge

export PATH=/soft/buildtools/cmake/cmake-3.23.2/cmake-3.23.2-linux-x86_64/bin:/opt/anaconda3x/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/bin:/opt/c3/bin:/dbhome/db2cat/sqllib/bin:/dbhome/db2cat/sqllib/adm:/dbhome/db2cat/sqllib/misc:/dbhome/db2cat/sqllib/gskit/bin:/usr/lib/mit/bin:/usr/lib/mit/sbin:/opt/pbs/bin:/sbin:/opt/cray/pe/bin
export LD_LIBRARY_PATH=/dbhome/db2cat/sqllib/lib64:/dbhome/db2cat/sqllib/lib64/gskit:/dbhome/db2cat/sqllib/lib32
export LIBRARY_PATH=/dbhome/db2cat/sqllib/lib64:/dbhome/db2cat/sqllib/lib64/gskit:/dbhome/db2cat/sqllib/lib32
export MANPATH=/usr/share/lmod/lmod/share/man:/usr/local/man:/usr/share/man:/opt/c3/man:/opt/pbs/share/man:/opt/clmgr/man:/opt/sgi/share/man:/opt/clmgr/share/man:/opt/clmgr/lib/cm-cli/man
export CPATH=

export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nvshmem/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nccl/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/openmpi4/openmpi-4.0.5/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/extras/qd/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/extras/qd/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/share/llvm/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/bin:$PATH

export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nvshmem/lib:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nccl/lib:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/openmpi4/openmpi-4.0.5/lib:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/lib:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/extras/qd/lib:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/extras/CUPTI/lib64:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/lib64:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64/stubs:$LIBRARY_PATH

export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nvshmem/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nccl/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/openmpi4/openmpi-4.0.5/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/extras/qd/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/extras/CUPTI/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64/stubs:$LD_LIBRARY_PATH

export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nvshmem/include:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nccl/include:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/openmpi4/openmpi-4.0.5/include:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/extras/qd/include/qd:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/include:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/include:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/include:$CPATH

export OPAL_PREFIX=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/openmpi4/openmpi-4.0.5
export MANPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/man:$MANPATH


cd /grand/projects/RAPINS/xiaoliyan/lmp20230802stb/build-gpu-nvhpc
cmake ../cmake -DCMAKE_BUILD_TYPE=release \
-DCMAKE_C_COMPILER=nvc++ \
-DCMAKE_CXX_COMPILER=nvc++ \
-DCMAKE_FORTRAN_COMPILER=nvfortran \
-DCMAKE_CXX_STANDARD=17 \
-DLAMMPS_MEMALIGN=64 \
-DLAMMPS_SIZES=bigbig \
-DPKG_MISC=on \
-DPKG_ML-SNAP=on \
-DPKG_MOFFF=on \
-DFFT=KISS \
-DPKG_QEQ=on \
-DPKG_REAXFF=on \
-DPKG_PTM=on \
-DPKG_RIGID=on \
-DPKG_MOLECULE=on \
-DPKG_EXTRA-MOLECULE=on \
-DPKG_EXTRA-FIX=on \
-DPKG_KSPACE=on \
-DPKG_MANYBODY=on \
-DPKG_GRANULAR=on \
-DPKG_GPU=on \
-DGPU_API=cuda \
-DGPU_PREC=double \
-DGPU_ARCH=sm_80 \
-DGPU_DEBUG=yes \
-DCUDA_MPS_SUPPORT=yes \
-DBUILD_OMP=yes \
-DBUILD_MPI=yes \
-DCUDA_NVCC_FLAGS="-std=c++17 -Xcompiler -fopenmp --allow-unsupported-compiler" \
-DCMAKE_CXX_FLAGS="-std=c++17 -DCUDA_PROXY -fopenmp -g -O3 -mp" \
-DCMAKE_C_FLAGS="-std=c++17 -DCUDA_PROXY -fopenmp -g -O3 -mp" \
-DCMAKE_FORTRAN_FLAGS="-std=c++17 -DCUDA_PROXY -fopenmp -g -O3 -mp"

make -j 16

#LMPEXE=/grand/projects/RAPINS/xiaoliyan/lmp20230802stb/build-gpu-nvhpc/lmp
103 changes: 103 additions & 0 deletions mofa/simulation/lammps-polaris-mps.pbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/bin/bash -l
#PBS -l select=1:system=polaris
#PBS -l place=scatter
#PBS -l walltime=01:00:00
#PBS -l filesystems=home:grand:eagle
#PBS -q debug-scaling
#PBS -A RAPINS
#PBS -N test-mps-lmp
#PBS -M xyan11@anl.gov
#PBS -m abe
#PBS -k doe
#PBS -j oe

lscpu
free -h
ulimit -s unlimited
echo Working directory is $PBS_O_WORKDIR
cd $PBS_O_WORKDIR
echo Jobid: $PBS_JOBID
echo Running on host `hostname`
echo Running on nodes `cat $PBS_NODEFILE`

export PATH=/soft/buildtools/cmake/cmake-3.23.2/cmake-3.23.2-linux-x86_64/bin:/opt/anaconda3x/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/bin:/opt/c3/bin:/usr/lib/mit/bin:/usr/lib/mit/sbin:/opt/pbs/bin:/sbin:/opt/cray/pe/bin
export LD_LIBRARY_PATH=
export LIBRARY_PATH=
export MANPATH=/usr/share/lmod/lmod/share/man:/usr/local/man:/usr/share/man:/opt/c3/man:/opt/pbs/share/man:/opt/clmgr/man:/opt/sgi/share/man:/opt/clmgr/share/man:/opt/clmgr/lib/cm-cli/man
export OPAL_PREFIX=
export CPATH=

export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nvshmem/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nccl/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/openmpi4/openmpi-4.0.5/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/extras/qd/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/extras/qd/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/share/llvm/bin:$PATH
export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/bin:$PATH

export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nvshmem/lib:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nccl/lib:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/openmpi4/openmpi-4.0.5/lib:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/lib:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/extras/qd/lib:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/extras/CUPTI/lib64:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/lib64:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64:$LIBRARY_PATH
export LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64/stubs:$LIBRARY_PATH

export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nvshmem/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nccl/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/openmpi4/openmpi-4.0.5/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/extras/qd/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/extras/CUPTI/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/lib64/stubs:$LD_LIBRARY_PATH

export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nvshmem/include:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/12.0/nccl/include:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/openmpi4/openmpi-4.0.5/include:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/extras/qd/include/qd:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/include:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cuda/12.0/include:$CPATH
export CPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/math_libs/include:$CPATH

export OPAL_PREFIX=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/openmpi4/openmpi-4.0.5
export MANPATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/compilers/man:$MANPATH

module list

export NGPUS=1
export OMP_NUM_THREADS=1
export OMP_PROC_BIND=spread
export OMP_PLACES=threads
export CUDA_VISIBLE_DEVICES=0
export MPICH_GPU_SUPPORT_ENABLED=1
export MPIARG="mpiexec -np 1 -npernode 1"
export EXE="/grand/projects/RAPINS/xiaoliyan/lmp20230802stb/build-gpu-nvhpc/lmp"
export EXE_ARG="-sf gpu -pk gpu ${NGPUS} -in in.lmp"

# Enable MPS on each node allocated to job
${MPIARG} ${PBS_O_WORKDIR}/enable_mps_polaris.sh &

# record start time
start=`date +%s`

# workload
cd ${PBS_O_WORKDIR}/inst1
${MPIARG} ${EXE} ${EXE_ARG} &
cd ${PBS_O_WORKDIR}/inst2
${MPIARG} ${EXE} ${EXE_ARG} &

wait

# record end time
end=`date +%s`
echo Total MPS jobs running time was `expr $end - $start` seconds.

# Disable MPS on each node allocated to job
${MPIARG} ${PBS_O_WORKDIR}/disable_mps_polaris.sh
Loading