From 2e569edba653cc31867b9c360ccc53e3e692a496 Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 10:24:18 +0000 Subject: [PATCH 01/12] Adds R test for CirrusEX --- tests/utils/R/rscript.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/utils/R/rscript.py b/tests/utils/R/rscript.py index 18b829d4..186ac622 100644 --- a/tests/utils/R/rscript.py +++ b/tests/utils/R/rscript.py @@ -29,7 +29,10 @@ class RscriptInstall(RscriptBase): """Tests installing packages with R on the login nodes""" descr = "Tests that R packages can be installed locally. Requires internet access." - valid_systems = ["archer2:login"] + valid_systems = [ + "archer2:login", + "cirrus-ex:login" + ] local = True executable_opts = ["install_benchmark_packages.R"] libs_path = None @@ -61,7 +64,12 @@ class RscriptRun(RscriptBase): Uses packages installed locally in a previous test. """ - valid_systems = ["archer2:login", "archer2:compute"] + valid_systems = [ + "archer2:login", + "archer2:compute", + "cirrus-ex:login", + "cirrus-ex:compute" + ] executable_opts = ["run_benchmark.R"] library = fixture(RscriptInstall, scope="session") libs_path = None From 12b8b895f6991c5e0da0270656128c0351a7869b Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 10:28:31 +0000 Subject: [PATCH 02/12] Adds basic xthi test --- tests/utils/xthi/xthi.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/utils/xthi/xthi.py b/tests/utils/xthi/xthi.py index c837714d..25df4d0b 100644 --- a/tests/utils/xthi/xthi.py +++ b/tests/utils/xthi/xthi.py @@ -13,11 +13,16 @@ class XthiCompilationTest(reframe.CompileOnlyRegressionTest): maintainers = ["k.straford@epcc.ed.ac.uk"] descr = "xthi compilation test" - valid_systems = ["archer2:login", "cirrus:login"] + valid_systems = [ + "archer2:login", + "cirrus:login", + "cirrus-ex:login" + ] valid_prog_environs = [ "PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc", + "PrgEnv-intel", "gcc", "intel", ] From 9f98058e5a8f0097216e6e29a6e471bcd3fe0ed5 Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 10:34:05 +0000 Subject: [PATCH 03/12] Adds CirrusEX hetjob test --- tests/utils/xthi/hetjob.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utils/xthi/hetjob.py b/tests/utils/xthi/hetjob.py index 2c81b2e2..6b90214e 100644 --- a/tests/utils/xthi/hetjob.py +++ b/tests/utils/xthi/hetjob.py @@ -24,7 +24,7 @@ class SharedCommWorldTest(rfm.RunOnlyRegressionTest): maintainers = ["k.stratford@epcc.ed.ac.uk"] descr = "SLURM hetjob for xthi shared MPI_COM_WORLD" - valid_systems = ["archer2:compute"] + valid_systems = ["archer2:compute", "cirrus-ex:compute"] valid_prog_environs = ["*"] modules = ["xthi"] @@ -56,7 +56,7 @@ class SharedCommWorldWithOpenMPTest(rfm.RunOnlyRegressionTest): """ descr = "SLURM hetjob for shared MPI_COM_WORLD with OpenMP" - valid_systems = ["archer2:compute"] + valid_systems = ["archer2:compute", "cirrus-ex:compute"] valid_prog_environs = ["*"] modules = ["xthi"] From cd0641fbb6dd204dba16a6c14a5cebb77c5444ff Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 10:48:32 +0000 Subject: [PATCH 04/12] Adds CirrusEX hetjob test --- tests/utils/xthi/hetjob.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/utils/xthi/hetjob.py b/tests/utils/xthi/hetjob.py index 6b90214e..77554afb 100644 --- a/tests/utils/xthi/hetjob.py +++ b/tests/utils/xthi/hetjob.py @@ -30,8 +30,8 @@ class SharedCommWorldTest(rfm.RunOnlyRegressionTest): # Utter, utter kludge # 1 + 2 nodes; 8 + 2x4 MPI tasks - hetgroup0 = "--het-group=0 --nodes=1 --ntasks=8 --ntasks-per-node=8 xthi" - hetgroup1 = "--het-group=1 --nodes=2 --ntasks=8 --ntasks-per-node=4 xthi" + hetgroup0 = "--het-group=0 --nodes=1 --ntasks=8 --ntasks-per-node=8 xthi_mpi_mp" + hetgroup1 = "--het-group=1 --nodes=2 --ntasks=8 --ntasks-per-node=4 xthi_mpi_mp" executable = hetgroup0 + " : " + hetgroup1 time_limit = "2m" @@ -64,8 +64,8 @@ class SharedCommWorldWithOpenMPTest(rfm.RunOnlyRegressionTest): shared_args = " --nodes=1 --ntasks=8 --tasks-per-node=8 --cpus-per-task=16" openmp0 = " --export=all,OMP_NUM_THREADS=16" openmp1 = " --export=all,OMP_NUM_THREADS=1" - hetgroup0 = "--het-group=0" + shared_args + openmp0 + " xthi" - hetgroup1 = "--het-group=1" + shared_args + openmp1 + " xthi" + hetgroup0 = "--het-group=0" + shared_args + openmp0 + " xthi_mpi_mp" + hetgroup1 = "--het-group=1" + shared_args + openmp1 + " xthi_mpi_mp" executable = hetgroup0 + " : " + hetgroup1 time_limit = "2m" From b6e40d8d131cb26eba15bfbc5f4d336bdfe96299 Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 11:22:30 +0000 Subject: [PATCH 05/12] Updates container test for Cirrus EX --- .../containers.py} | 86 +++++++++++++++++-- 1 file changed, 81 insertions(+), 5 deletions(-) rename tests/utils/{singularity/singularity.py => containers/containers.py} (54%) diff --git a/tests/utils/singularity/singularity.py b/tests/utils/containers/containers.py similarity index 54% rename from tests/utils/singularity/singularity.py rename to tests/utils/containers/containers.py index 1e10edd3..f47e15b4 100644 --- a/tests/utils/singularity/singularity.py +++ b/tests/utils/containers/containers.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 """ -Singularity OSU benchmarks +Containerised OSU benchmarks -These tests checks that singularity containers can be run with mpi. Basic performance checks are also included. +These tests checks that containers can be run with MPI. Basic performance checks are also included. """ import os @@ -14,9 +14,10 @@ class PullOSUContainer(rfm.RunOnlyRegressionTest): """Pull a container containing an osu benchmark""" - descr = "Pill a osu benchmark container from github " - valid_systems = ["archer2:login"] + descr = "Pull an OSU benchmark container from github " + valid_systems = ["archer2:login, cirrus-ex:login"] valid_prog_environs = ["PrgEnv-gnu"] + # On Cirrus EX, this relies on "singularity" being an alias for "apptainer" executable = "singularity" image_name = "archer2_osu" executable_opts = ["pull", f"docker://ghcr.io/epcced/epcc-reframe/{image_name}"] @@ -29,7 +30,7 @@ def validate_download(self): @rfm.simple_test -class OSUContainerTest(rfm.RunOnlyRegressionTest): +class OSUContainerTestARCHER2(rfm.RunOnlyRegressionTest): """Run the OSU benchmark in a container""" descr = "OSU benchmarks in a container" @@ -93,3 +94,78 @@ def latency_small(self): def validate_job_run(self): """Basic check that any output was produced""" return sn.assert_found("OSU MPI Allreduce Latency Test ", self.stdout) + + +@rfm.simple_test +class OSUContainerTestCirrusEX(rfm.RunOnlyRegressionTest): + """Run the OSU benchmark in a container""" + + descr = "OSU benchmarks in a container" + osu_container = fixture(PullOSUContainer, scope="session") + valid_systems = ["cirrus-ex:compute"] + valid_prog_environs = ["PrgEnv-gnu"] + num_tasks = 576 + num_tasks_per_node = 288 + num_cpus_per_task = 1 + time_limit = "10m" + + env_vars = { + "OMP_NUM_THREADS": str(num_cpus_per_task), + "OMP_PLACES": "cores", + "APPTAINERENV_LD_LIBRARY_PATH": "/opt/cray/pe/mpich/8.1.32/ofi/gnu/11.2/lib-abi-mpich:\ +/opt/cray/libfabric/1.22.0/lib64:\ +/opt/cray/pals/1.6/lib:\ +/opt/cray/pe/lib64:\ +/opt/xpmem/lib64:/lib64", + "APPTAINER_BIND": "/opt/cray,/var/spool,\ +/opt/cray/pe/mpich/8.1.32/ofi/gnu/11.2/lib-abi-mpich,\ +/etc/host.conf,/etc/libibverbs.d/mlx5.driver,\ +/etc/libnl/classid,\ +/etc/resolv.conf,\ +/opt/cray/libfabric/1.22.0/lib64/libfabric.so.1,\ +/lib64/libatomic.so.1,\ +/lib64/libgcc_s.so.1,/lib64/libgfortran.so.5,\ +/lib64/libquadmath.so.0,\ +/opt/cray/pals/1.6/lib/libpals.so.0,\ +/opt/cray/pe/lib64/libpmi2.so.0,\ +/opt/cray/pe/lib64/libpmi.so.0,\ +/opt/xpmem/lib64/libxpmem.so.0,\ +/run/munge/munge.socket.2,\ +/lib64/libmunge.so.2,\ +/lib64/libnl-3.so.200,\ +/lib64/libnl-genl-3.so.200,\ +/lib64/libnl-route-3.so.200,\ +/lib64/librdmacm.so.1,\ +/lib64/libcxi.so.1,\ +/lib64/libm.so.6" + } + + reference = { + "cirrus-ex:compute": {"latency_big": (2200, -0.02, 0.30, "us"), "latency_small": (8.4, -0.05, 0.30, "us")} + } + + @require_deps + def set_singularity_invoke(self): + """Builds the command to be passed to srun""" + self.executable = "apptainer" + + self.executable_opts = [ + "run", + os.path.join(self.osu_container.stagedir, self.osu_container.image_name + "_latest.sif"), + "osu_allreduce", + ] + + @performance_function("us") + def latency_big(self): + """Extract the latency from the largest size in the OSU test""" + return sn.extractsingle(r"^1048576\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float) + + @performance_function("us") + def latency_small(self): + """Extract the latency from the largest size in the OSU test""" + return sn.extractsingle(r"^4\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float) + + @sanity_function + def validate_job_run(self): + """Basic check that any output was produced""" + return sn.assert_found("OSU MPI Allreduce Latency Test ", self.stdout) From 74a789857f1fba6b44d03004ad1d62c1328323a0 Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 11:25:18 +0000 Subject: [PATCH 06/12] Fixes typo --- tests/utils/containers/containers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/containers/containers.py b/tests/utils/containers/containers.py index f47e15b4..45eeec36 100644 --- a/tests/utils/containers/containers.py +++ b/tests/utils/containers/containers.py @@ -15,7 +15,7 @@ class PullOSUContainer(rfm.RunOnlyRegressionTest): """Pull a container containing an osu benchmark""" descr = "Pull an OSU benchmark container from github " - valid_systems = ["archer2:login, cirrus-ex:login"] + valid_systems = ["archer2:login", "cirrus-ex:login"] valid_prog_environs = ["PrgEnv-gnu"] # On Cirrus EX, this relies on "singularity" being an alias for "apptainer" executable = "singularity" From 18a61e64669a942515ac1777beeb07fc06bd14ea Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 12:04:53 +0000 Subject: [PATCH 07/12] Update image for CirrusEX --- tests/utils/containers/containers.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tests/utils/containers/containers.py b/tests/utils/containers/containers.py index 45eeec36..fffb6bad 100644 --- a/tests/utils/containers/containers.py +++ b/tests/utils/containers/containers.py @@ -11,13 +11,12 @@ import reframe.utility.sanity as sn -class PullOSUContainer(rfm.RunOnlyRegressionTest): - """Pull a container containing an osu benchmark""" +class PullOSUContainerARCHER2(rfm.RunOnlyRegressionTest): + """Pull a container containing an osu benchmark - GLIBC compatible with ARCHER2 OS""" descr = "Pull an OSU benchmark container from github " - valid_systems = ["archer2:login", "cirrus-ex:login"] + valid_systems = ["archer2:login"] valid_prog_environs = ["PrgEnv-gnu"] - # On Cirrus EX, this relies on "singularity" being an alias for "apptainer" executable = "singularity" image_name = "archer2_osu" executable_opts = ["pull", f"docker://ghcr.io/epcced/epcc-reframe/{image_name}"] @@ -29,12 +28,29 @@ def validate_download(self): return sn.assert_not_found("error", self.stderr) +class PullOSUContainerCirrusEX(rfm.RunOnlyRegressionTest): + """Pull a container containing an osu benchmark - GLIBC compatible with CirrusEX OS"""" + + descr = "Pull an OSU benchmark container from github " + valid_systems = ["cirrus-ex:login"] + valid_prog_environs = ["PrgEnv-gnu"] + executable = "apptainer" + image_name = "osu-benchmarks:7.5.1" + executable_opts = ["pull", f"docker://ghcr.io/epcced/epcc-reframe/{image_name}"] + local = True + + @sanity_function + def validate_download(self): + """Sanity Check""" + return sn.assert_not_found("error", self.stderr) + + @rfm.simple_test class OSUContainerTestARCHER2(rfm.RunOnlyRegressionTest): """Run the OSU benchmark in a container""" descr = "OSU benchmarks in a container" - osu_container = fixture(PullOSUContainer, scope="session") + osu_container = fixture(PullOSUContainerARCHER2, scope="session") valid_systems = ["archer2:compute"] valid_prog_environs = ["PrgEnv-gnu"] num_tasks = 256 @@ -101,7 +117,7 @@ class OSUContainerTestCirrusEX(rfm.RunOnlyRegressionTest): """Run the OSU benchmark in a container""" descr = "OSU benchmarks in a container" - osu_container = fixture(PullOSUContainer, scope="session") + osu_container = fixture(PullOSUContainerCirrusEX, scope="session") valid_systems = ["cirrus-ex:compute"] valid_prog_environs = ["PrgEnv-gnu"] num_tasks = 576 From 8b8ddbaf64f05ece065d7a4c27062363a96fd5e7 Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 12:09:21 +0000 Subject: [PATCH 08/12] Fixes typo --- tests/utils/containers/containers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/containers/containers.py b/tests/utils/containers/containers.py index fffb6bad..10400931 100644 --- a/tests/utils/containers/containers.py +++ b/tests/utils/containers/containers.py @@ -29,7 +29,7 @@ def validate_download(self): class PullOSUContainerCirrusEX(rfm.RunOnlyRegressionTest): - """Pull a container containing an osu benchmark - GLIBC compatible with CirrusEX OS"""" + """Pull a container containing an osu benchmark - GLIBC compatible with CirrusEX OS""" descr = "Pull an OSU benchmark container from github " valid_systems = ["cirrus-ex:login"] From aafce3a9961cc414c6b4961838a0d519b93b4604 Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 12:13:23 +0000 Subject: [PATCH 09/12] Add functionality to cope with image version --- tests/utils/containers/containers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/utils/containers/containers.py b/tests/utils/containers/containers.py index 10400931..624b762a 100644 --- a/tests/utils/containers/containers.py +++ b/tests/utils/containers/containers.py @@ -35,8 +35,9 @@ class PullOSUContainerCirrusEX(rfm.RunOnlyRegressionTest): valid_systems = ["cirrus-ex:login"] valid_prog_environs = ["PrgEnv-gnu"] executable = "apptainer" - image_name = "osu-benchmarks:7.5.1" - executable_opts = ["pull", f"docker://ghcr.io/epcced/epcc-reframe/{image_name}"] + image_name = "osu-benchmarks" + image_version = "7.5.1" + executable_opts = ["pull", f"docker://ghcr.io/epcced/epcc-reframe/{image_name}:{image_version}"] local = True @sanity_function @@ -167,7 +168,7 @@ def set_singularity_invoke(self): self.executable_opts = [ "run", - os.path.join(self.osu_container.stagedir, self.osu_container.image_name + "_latest.sif"), + os.path.join(self.osu_container.stagedir, self.osu_container.image_name + "_" self.osu_container.image_version + ".sif"), "osu_allreduce", ] From 79684af9a6137c7ad2b41dfdedc43396d942a06c Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 12:15:10 +0000 Subject: [PATCH 10/12] Fixes typo --- tests/utils/containers/containers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/containers/containers.py b/tests/utils/containers/containers.py index 624b762a..aa5effda 100644 --- a/tests/utils/containers/containers.py +++ b/tests/utils/containers/containers.py @@ -168,7 +168,7 @@ def set_singularity_invoke(self): self.executable_opts = [ "run", - os.path.join(self.osu_container.stagedir, self.osu_container.image_name + "_" self.osu_container.image_version + ".sif"), + os.path.join(self.osu_container.stagedir, self.osu_container.image_name + "_" + self.osu_container.image_version + ".sif"), "osu_allreduce", ] From 9a9fd20edf02628951900ddffbaa96b60a28074b Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 12:18:45 +0000 Subject: [PATCH 11/12] Updates performance numbers ot match CirrusEX --- tests/utils/containers/containers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/containers/containers.py b/tests/utils/containers/containers.py index aa5effda..6b0b2824 100644 --- a/tests/utils/containers/containers.py +++ b/tests/utils/containers/containers.py @@ -158,7 +158,7 @@ class OSUContainerTestCirrusEX(rfm.RunOnlyRegressionTest): } reference = { - "cirrus-ex:compute": {"latency_big": (2200, -0.02, 0.30, "us"), "latency_small": (8.4, -0.05, 0.30, "us")} + "cirrus-ex:compute": {"latency_big": (1100, -0.02, 0.30, "us"), "latency_small": (9.7, -0.05, 0.30, "us")} } @require_deps From cef0485b5711e330acf4438a0805c6afbb84c896 Mon Sep 17 00:00:00 2001 From: Andy Turner Date: Sun, 16 Nov 2025 12:52:14 +0000 Subject: [PATCH 12/12] Fixes formatting errors --- tests/utils/R/rscript.py | 12 ++---------- tests/utils/containers/containers.py | 7 +++++-- tests/utils/xthi/xthi.py | 6 +----- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/tests/utils/R/rscript.py b/tests/utils/R/rscript.py index 186ac622..1d5a2003 100644 --- a/tests/utils/R/rscript.py +++ b/tests/utils/R/rscript.py @@ -29,10 +29,7 @@ class RscriptInstall(RscriptBase): """Tests installing packages with R on the login nodes""" descr = "Tests that R packages can be installed locally. Requires internet access." - valid_systems = [ - "archer2:login", - "cirrus-ex:login" - ] + valid_systems = ["archer2:login", "cirrus-ex:login"] local = True executable_opts = ["install_benchmark_packages.R"] libs_path = None @@ -64,12 +61,7 @@ class RscriptRun(RscriptBase): Uses packages installed locally in a previous test. """ - valid_systems = [ - "archer2:login", - "archer2:compute", - "cirrus-ex:login", - "cirrus-ex:compute" - ] + valid_systems = ["archer2:login", "archer2:compute", "cirrus-ex:login", "cirrus-ex:compute"] executable_opts = ["run_benchmark.R"] library = fixture(RscriptInstall, scope="session") libs_path = None diff --git a/tests/utils/containers/containers.py b/tests/utils/containers/containers.py index 6b0b2824..31f5fff0 100644 --- a/tests/utils/containers/containers.py +++ b/tests/utils/containers/containers.py @@ -154,7 +154,7 @@ class OSUContainerTestCirrusEX(rfm.RunOnlyRegressionTest): /lib64/libnl-route-3.so.200,\ /lib64/librdmacm.so.1,\ /lib64/libcxi.so.1,\ -/lib64/libm.so.6" +/lib64/libm.so.6", } reference = { @@ -168,7 +168,10 @@ def set_singularity_invoke(self): self.executable_opts = [ "run", - os.path.join(self.osu_container.stagedir, self.osu_container.image_name + "_" + self.osu_container.image_version + ".sif"), + os.path.join( + self.osu_container.stagedir, + self.osu_container.image_name + "_" + self.osu_container.image_version + ".sif", + ), "osu_allreduce", ] diff --git a/tests/utils/xthi/xthi.py b/tests/utils/xthi/xthi.py index 25df4d0b..460913ab 100644 --- a/tests/utils/xthi/xthi.py +++ b/tests/utils/xthi/xthi.py @@ -13,11 +13,7 @@ class XthiCompilationTest(reframe.CompileOnlyRegressionTest): maintainers = ["k.straford@epcc.ed.ac.uk"] descr = "xthi compilation test" - valid_systems = [ - "archer2:login", - "cirrus:login", - "cirrus-ex:login" - ] + valid_systems = ["archer2:login", "cirrus:login", "cirrus-ex:login"] valid_prog_environs = [ "PrgEnv-cray", "PrgEnv-gnu",