From 86c547efba84088cb7556ccc5caadf13475354b2 Mon Sep 17 00:00:00 2001 From: Patrick Lavin Date: Tue, 3 Mar 2026 12:22:02 -0700 Subject: [PATCH 1/7] merge new mpi_comm_spawn changes --- config/sst_check_ariel_mpi.m4 | 53 ---- configure.ac | 2 - src/sst/elements/ariel/Makefile.am | 6 - src/sst/elements/ariel/api/Makefile.am | 12 - src/sst/elements/ariel/api/arielapi.c | 61 +---- src/sst/elements/ariel/api/arielapi.h | 7 + src/sst/elements/ariel/ariel_shmem.h | 2 + src/sst/elements/ariel/arielcore.cc | 13 + src/sst/elements/ariel/arielcpu.h | 7 +- src/sst/elements/ariel/configure.m4 | 6 - .../elements/ariel/frontend/pin3/fesimple.cc | 251 ++++++++++++------ .../ariel/frontend/pin3/pin3frontend.cc | 133 +++++++++- .../ariel/frontend/pin3/pin3frontend.h | 1 + src/sst/elements/ariel/mpi/.gitignore | 4 - src/sst/elements/ariel/mpi/Makefile.am | 12 - src/sst/elements/ariel/mpi/fakepin.cc | 47 ---- src/sst/elements/ariel/mpi/mpilauncher.cc | 215 --------------- src/sst/elements/ariel/tests/testMPI/Makefile | 3 + .../ariel/tests/testMPI/ariel-reduce.py | 5 +- 19 files changed, 333 insertions(+), 507 deletions(-) delete mode 100644 config/sst_check_ariel_mpi.m4 delete mode 100644 src/sst/elements/ariel/mpi/.gitignore delete mode 100644 src/sst/elements/ariel/mpi/Makefile.am delete mode 100644 src/sst/elements/ariel/mpi/fakepin.cc delete mode 100644 src/sst/elements/ariel/mpi/mpilauncher.cc diff --git a/config/sst_check_ariel_mpi.m4 b/config/sst_check_ariel_mpi.m4 deleted file mode 100644 index 27df3a2396..0000000000 --- a/config/sst_check_ariel_mpi.m4 +++ /dev/null @@ -1,53 +0,0 @@ -AC_DEFUN([SST_CHECK_ARIEL_MPI], [ - sst_check_ariel_mpi_happy="no" - - AC_ARG_ENABLE([ariel-mpi], - [AS_HELP_STRING([--enable-ariel-mpi], - [Enable MPI support in Ariel [default=no]])]) - - AS_IF([test "$enable_ariel_mpi" = "yes"], [sst_check_ariel_mpi_happy="yes"]) - - dnl Ensure Core was compiled without MPI - dnl Regrettably, this runs before we have checked whether sst-config exists, - dnl as that config file overwrites the MPICXX and MPICC variables needed by - dnl the ACX_MPI macro. We plan to remove this check altogether in the future. - AS_IF([test "$sst_check_ariel_mpi_happy" = "yes"], [ - AC_MSG_CHECKING([whether sst-core was compilied without MPI]) - sst_config_out=$(sst-config --MPI_CPPFLAGS) - if test -z "$sst_config_out"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - AC_MSG_WARN([SST-Core appears to have been compiled with MPI support. Disabling Ariel MPI support.]) - sst_check_ariel_mpi_happy="no" - fi - ]) - - dnl Find the MPI compilers and put them in MPICC and MIPCXX - AS_IF([test "$sst_check_ariel_mpi_happy" = "yes"], [ - AC_LANG_PUSH([C]) - ACX_MPI([], [sst_check_ariel_mpi_happy="no"]) - AC_LANG_POP([C]) - ]) - - AS_IF([test "$sst_check_ariel_mpi_happy" = "yes"], [ - AC_LANG_PUSH([C++]) - ACX_MPI([], [sst_check_ariel_mpi_happy="no"]) - AC_LANG_POP([C++]) - ]) - - dnl Elements will overwrite these with the values used for compiling Core. We - dnl will save them in new variables. - ARIEL_MPICC=$MPICC - ARIEL_MPICXX=$MPICXX - AS_IF([test "$sst_check_ariel_mpi_happy" = "yes"], [ - ARIEL_MPI_CFLAGS=$($MPICC -showme:compile) - ARIEL_MPI_LIBS=$($MPICC -showme:link) - ]) - - AS_IF([test "$sst_check_ariel_mpi_happy" = "yes"], [ - AC_DEFINE([ENABLE_ARIEL_MPI], [1], [Enable Ariel MPI features]) - ]) - AM_CONDITIONAL([SST_USE_ARIEL_MPI], [test "$sst_check_ariel_mpi_happy" = "yes"]) -]) - diff --git a/configure.ac b/configure.ac index 2a64b8bca6..a040ffe4fc 100644 --- a/configure.ac +++ b/configure.ac @@ -48,8 +48,6 @@ LTDL_INIT([recursive]) AC_CACHE_SAVE -SST_CHECK_ARIEL_MPI([], [AC_MSG_ERROR([Can't enable Ariel MPI])]) - SST_CORE_CHECK_INSTALL() SST_CHECK_OSX() diff --git a/src/sst/elements/ariel/Makefile.am b/src/sst/elements/ariel/Makefile.am index efae8db7ae..b694ef08ee 100644 --- a/src/sst/elements/ariel/Makefile.am +++ b/src/sst/elements/ariel/Makefile.am @@ -1,7 +1,4 @@ SUBDIRS = api -if SST_USE_ARIEL_MPI -SUBDIRS += mpi -endif AM_CPPFLAGS += \ -I$(top_srcdir)/src @@ -57,8 +54,6 @@ libariel_la_SOURCES = \ EXTRA_DIST = \ api/arielapi.c \ api/arielapi.h \ - mpi/mpilauncher.cc \ - mpi/fakepin.cc \ frontend/pin3/fesimple.cc \ frontend/simple/fesimple.cc \ frontend/simple/examples/multicore.py \ @@ -123,7 +118,6 @@ libariel_la_CPPFLAGS = \ -DARIEL_TRACE_LIB=$(libdir)/sst \ -DARIEL_TOOL_DIR="$(libexecdir)" \ -DPINTOOL_EXECUTABLE="$(PINTOOL_RUNTIME)" \ - -DMPILAUNCHER_EXECUTABLE="$(CURDIR)/mpi/mpilauncher" \ $(AM_CPPFLAGS) \ $(CPPFLAGS) diff --git a/src/sst/elements/ariel/api/Makefile.am b/src/sst/elements/ariel/api/Makefile.am index 06e94636ee..137e592c05 100644 --- a/src/sst/elements/ariel/api/Makefile.am +++ b/src/sst/elements/ariel/api/Makefile.am @@ -1,15 +1,3 @@ -if SST_USE_ARIEL_MPI -CC_LOCAL = $(ARIEL_MPICC) -CFLAGS_LOCAL = -fopenmp $(ARIEL_MPI_CFLAGS) -DENABLE_ARIEL_MPI=1 -else -CC_LOCAL = $(CC) -CFLAGS_LOCAL = -endif - -AM_CPPFLAGS += \ - $(MPI_CPPFLAGS) \ - -I$(top_srcdir)/src - lib_LTLIBRARIES = libarielapi.la libarielapi_la_SOURCES = \ diff --git a/src/sst/elements/ariel/api/arielapi.c b/src/sst/elements/ariel/api/arielapi.c index 7bcb442c34..34946ec547 100644 --- a/src/sst/elements/ariel/api/arielapi.c +++ b/src/sst/elements/ariel/api/arielapi.c @@ -17,9 +17,6 @@ #include #include #include -#ifdef ENABLE_ARIEL_MPI -#include -#endif /* These definitions are replaced during simulation */ @@ -40,6 +37,7 @@ void ariel_fence() { } uint64_t ariel_cycles() { + printf("ARIEL: ariel_cycles called in Ariel API.\n"); return 0; } @@ -51,59 +49,10 @@ void ariel_malloc_flag(int64_t id, int count, int level) { printf("ARIEL: flagging next %d mallocs at id %" PRId64 "\n", count, id); } -// To ensure that the Pintool (fesimple.cc) numbers our application's OpenMP threads -// from 0..N-1, we need to run an OpenMP parallel region before calling MPI Init. -// Otherwise, some MPI threads which aren't used for our application will be -// numbered 1 and 2. -void omp_parallel_region() { - volatile int x = 0; -#if defined(_OPENMP) -#pragma omp parallel - { -#pragma omp critical - { - x += 1; - } - } -#else - printf("ERROR: arielapi.c: libarielapi was compiled without OpenMP enabled\n"); - exit(1); -#endif +void ariel_output_stats_begin_region(const char *name) { + printf("ARIEL: Request to print statistics and begin region:%s\n", name); } -// This function only exists to get mapped by the frontend. It should only be called -// from MPI_Init or MPI_Init_thread to allow the frontend to distinguish between our -// custom versions of of those functions and the normal MPI library's versions. -void _api_mpi_init() { - printf("notifying fesimple\n"); -} - -// Custom version of MPI_Init. We override the normal version in order to call an -// OpenMP parallel region to ensure threads are numbered properly by the frontend. -int MPI_Init(int *argc, char ***argv) { -#ifdef ENABLE_ARIEL_MPI - // Communicate to the frontend that we have replaced the nomal MPI_Init with - // the one in the Ariel API - _api_mpi_init(); - omp_parallel_region(); - return PMPI_Init(argc, argv); -#else - printf("Error: arielapi.c: MPI_Init called in arielapi.c but this file was compiled without MPI.\n"); - exit(1); -#endif -} - -// Custom version of MPI_Init_thread. We override the normal verison in order to call an -// OpenMP parallel region to ensure threads are numbered properly by the frontend. -int MPI_Init_thread(int *argc, char ***argv, int required, int *provided) { -#ifdef ENABLE_ARIEL_MPI - // Communicate to the frontend that we have replaced the nomal MPI_Init_thread with - // the one in the Ariel API - _api_mpi_init(); - omp_parallel_region(); - return PMPI_Init_thread(argc, argv, required, provided); -#else - printf("Error: arielapi.c: MPI_Init_thread called in arielapi.c but this file was compiled without MPI. Please recompile the API with `CC=mpicc make`123123.\n"); - exit(1); -#endif +void ariel_output_stats_end_region(const char *name) { + printf("ARIEL: Request to print statistics and end region:%s\n", name); } diff --git a/src/sst/elements/ariel/api/arielapi.h b/src/sst/elements/ariel/api/arielapi.h index db287c8654..4c9611489e 100644 --- a/src/sst/elements/ariel/api/arielapi.h +++ b/src/sst/elements/ariel/api/arielapi.h @@ -48,6 +48,13 @@ uint64_t ariel_cycles(); /* Trigger the simulation to output statistics */ void ariel_output_stats(); +/* Trigger the simulation to output statistics along with a region name. + * This interface may be changed or removed in the future if named + * regions are added to sst-core. + */ +void ariel_output_stats_begin_region(const char *name); +void ariel_output_stats_end_region(const char *name); + /* Control which memory pool (level) the next 'count' allocations encountered should map to * */ diff --git a/src/sst/elements/ariel/ariel_shmem.h b/src/sst/elements/ariel/ariel_shmem.h index 5566f487e3..3e336077e3 100644 --- a/src/sst/elements/ariel/ariel_shmem.h +++ b/src/sst/elements/ariel/ariel_shmem.h @@ -55,6 +55,8 @@ enum ArielShmemCmd_t { ARIEL_SWITCH_POOL = 110, ARIEL_NOOP = 128, ARIEL_OUTPUT_STATS = 140, + ARIEL_OUTPUT_STATS_BEGIN_REGION = 141, + ARIEL_OUTPUT_STATS_END_REGION = 142, ARIEL_ISSUE_RTL = 150, ARIEL_FLUSHLINE_INSTRUCTION = 154, ARIEL_FENCE_INSTRUCTION = 155, diff --git a/src/sst/elements/ariel/arielcore.cc b/src/sst/elements/ariel/arielcore.cc index 90a5256fc0..d24b4e1d03 100644 --- a/src/sst/elements/ariel/arielcore.cc +++ b/src/sst/elements/ariel/arielcore.cc @@ -412,6 +412,7 @@ bool ArielCore::refillQueue() { coreID, (uint32_t) coreQ->size(), (uint32_t) maxQLength)); ArielCommand ac; + char *region_name; const bool avail = tunnel->readMessageNB(coreID, &ac); if ( !avail ) { @@ -428,6 +429,18 @@ bool ArielCore::refillQueue() { performGlobalStatisticOutput(); break; + case ARIEL_OUTPUT_STATS_BEGIN_REGION: + region_name = (char*)ac.inst.payload; + fprintf(stdout, "ARIEL_REGION_BEGIN %s %" PRIu64 "\n", region_name, getCurrentSimTime(getCoreTimeBase().toString())); + performGlobalStatisticOutput(); + break; + + case ARIEL_OUTPUT_STATS_END_REGION: + region_name = (char*)ac.inst.payload; + fprintf(stdout, "ARIEL_REGION_END %s %" PRIu64 "\n", region_name, getCurrentSimTime(getCoreTimeBase().toString())); + performGlobalStatisticOutput(); + break; + case ARIEL_START_INSTRUCTION: if(ARIEL_INST_SP_FP == ac.inst.instClass) { statFPSPIns->addData(1); diff --git a/src/sst/elements/ariel/arielcpu.h b/src/sst/elements/ariel/arielcpu.h index ad1e4f69af..2ee35f8476 100644 --- a/src/sst/elements/ariel/arielcpu.h +++ b/src/sst/elements/ariel/arielcpu.h @@ -64,10 +64,9 @@ class ArielCPU : public SST::Component { {"appstderrappend", "If appstderr is set, set this to 1 to append the file intead of overwriting", "0"}, {"launchparamcount", "Number of parameters supplied for the launch tool", "0" }, {"launchparam%(launchparamcount)d", "Set the parameter to the launcher", "" }, - {"mpimode", "Whether to use to to launch in order to trace MPI-enabled applications.", "0"}, - {"mpilauncher", "Specify a launcher to be used for MPI executables in conjuction with ", STRINGIZE(MPILAUNCHER_EXECUTABLE)}, - {"mpiranks", "Number of ranks to be launched by . Only will be traced by .", "1" }, - {"mpitracerank", "Rank to be traced by .", "0" }, + {"mpimode", "Whether to use MPI_Comm_spawn_multiple to to launch app. Use for MPI aplications. SST-Core must be compiled with MPI enabled.", "0"}, + {"mpiranks", "Number of ranks to be launched. Only will be traced by .", "1" }, + {"mpitracerank", "Rank to be traced", "0" }, {"envparamcount", "Number of environment parameters to supply to the Ariel executable, default=-1 (use SST environment)", "-1"}, {"envparamname%(envparamcount)d", "Sets the environment parameter name", ""}, {"envparamval%(envparamcount)d", "Sets the environment parameter value", ""}, diff --git a/src/sst/elements/ariel/configure.m4 b/src/sst/elements/ariel/configure.m4 index 80836d4d4d..f40675b145 100644 --- a/src/sst/elements/ariel/configure.m4 +++ b/src/sst/elements/ariel/configure.m4 @@ -11,13 +11,7 @@ AC_DEFUN([SST_ariel_CONFIG], [ # Use LIBZ SST_CHECK_LIBZ() - AC_SUBST([ARIEL_MPICC]) - AC_SUBST([ARIEL_MPICXX]) - AC_SUBST([ARIEL_MPI_CFLAGS]) - AC_SUBST([ARIEL_MPI_LIBS]) - AC_CONFIG_FILES([src/sst/elements/ariel/api/Makefile]) - AC_CONFIG_FILES([src/sst/elements/ariel/mpi/Makefile]) AS_IF([test "$sst_check_ariel" = "yes"], [$1], [$2]) ]) diff --git a/src/sst/elements/ariel/frontend/pin3/fesimple.cc b/src/sst/elements/ariel/frontend/pin3/fesimple.cc index 44a8de57f4..4d6458494c 100644 --- a/src/sst/elements/ariel/frontend/pin3/fesimple.cc +++ b/src/sst/elements/ariel/frontend/pin3/fesimple.cc @@ -22,6 +22,9 @@ #include "atomic.hpp" #include #include +#include +#include +#include #include #include @@ -142,9 +145,6 @@ struct timespec offset_tp_mono; struct timespec offset_tp_real; #endif -// MPI -int api_mpi_init_used = 0; - /****************************************************************/ /********************** SHADOW STACK ****************************/ /* Used by 'sieve' to associate mallocs to the code they */ @@ -180,6 +180,91 @@ class StackRecord { std::vector > arielStack; // Per-thread stacks + + +// Returns true iff "libmpi.so" or "libmpi_cray.so" is found +// in the call stack, indicating that the calling thread is +// currently inside an MPI library. We use this information +// to remap the mpi thread ids to come after the app threads. +static const UINT32 MAX_FRAMES = 64; +bool is_mpi_thread(CONTEXT* ctxt) { + void *buf[2048]; + + // PIN_Backtrace fills the trace array with return addresses. + // numFrames holds the number of captured frames. + UINT32 numFrames = PIN_Backtrace(ctxt, buf, sizeof(buf) / sizeof(buf[0])); + + for (UINT32 i = 0; i < numFrames; i++) { + // Get the image (module) that contains the current address. + IMG img = IMG_FindByAddress((ADDRINT)buf[i]); + + if (IMG_Valid(img)) { + std::string imgName = IMG_Name(img); + + // Check if "libmpi.so" appears in the module name. + // Also check for libmpi_cray.so, just in case + if ((imgName.find("libmpi.so") != std::string::npos) || + (imgName.find("libmpi_cray.so") != std::string::npos)) { + return true; + } + } + } + return false; +} + + +// Would be more efficient to implement as a TLS_KEY, +// but it is rarely written so false sharing should not +// be an issue +std::unordered_map remap_id; + +UINT32 next_app_thread; +UINT32 next_mpi_thread; +UINT32 num_threads; + +// Initialize data used for remapping thread IDs +VOID init_remapping_data(UINT32 core_count) { + remap_id[0] = 0; + next_app_thread = 1; + next_mpi_thread = core_count; + num_threads = 1; +} + +VOID SyscallEntry(THREADID tid, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v) +{ + PIN_LockClient(); + ADDRINT scNo = PIN_GetSyscallNumber(ctxt, std); + + if (scNo == SYS_clone || scNo == __NR_clone) { + + PIN_GetLock(&mainLock, tid); + // Get the next thread id + UINT32 next_thread = num_threads++; + + bool is_mpi = is_mpi_thread(ctxt); + + // App threads will be numbered 0..num_cores-1 + // MPI library threads will be numbered num_cores.. + if (is_mpi) { + remap_id[next_thread] = next_mpi_thread++; + } else { + remap_id[next_thread] = next_app_thread++; + } + PIN_ReleaseLock(&mainLock); + +//#ifdef ARIEL_DEBUG + fprintf(stderr, "Caught clone syscall. Launching thread %d (MPI?: %d). Current thread map is:\n", next_thread, is_mpi); + for (const auto& pair : remap_id) { + + std::cout << "[ " << pair.first << " -> " << pair.second << " ]\n"; + } +//#endif + } + + + PIN_UnlockClient(); +} + /* Instrumentation function to be called on function calls */ VOID ariel_stack_call(THREADID thr, ADDRINT stackPtr, ADDRINT target, ADDRINT ip) { @@ -346,7 +431,7 @@ VOID WriteFlushInstructionMarker(UINT32 thr, ADDRINT ip, ADDRINT vaddr) ac.instPtr = (uint64_t) ip; ac.flushline.vaddr = (uint32_t) vaddr; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } VOID WriteFenceInstructionMarker(UINT32 thr, ADDRINT ip) @@ -355,7 +440,7 @@ VOID WriteFenceInstructionMarker(UINT32 thr, ADDRINT ip) ac.command = ARIEL_FENCE_INSTRUCTION; ac.instPtr = (uint64_t) ip; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } VOID WriteInstructionRead(ADDRINT* address, UINT32 readSize, THREADID thr, ADDRINT ip, @@ -373,7 +458,7 @@ VOID WriteInstructionRead(ADDRINT* address, UINT32 readSize, THREADID thr, ADDRI ac.inst.instClass = instClass; ac.inst.simdElemCount = simdOpWidth; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } VOID WriteInstructionWrite(ADDRINT* address, UINT32 writeSize, THREADID thr, ADDRINT ip, @@ -405,7 +490,7 @@ VOID WriteInstructionWrite(ADDRINT* address, UINT32 writeSize, THREADID thr, ADD } printf("\n"); */ - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } VOID WriteStartInstructionMarker(UINT32 thr, ADDRINT ip, UINT32 instClass, UINT32 simdOpWidth) @@ -415,7 +500,7 @@ VOID WriteStartInstructionMarker(UINT32 thr, ADDRINT ip, UINT32 instClass, UINT3 ac.instPtr = (uint64_t) ip; ac.inst.simdElemCount = simdOpWidth; ac.inst.instClass = instClass; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } VOID WriteEndInstructionMarker(UINT32 thr, ADDRINT ip) @@ -423,22 +508,20 @@ VOID WriteEndInstructionMarker(UINT32 thr, ADDRINT ip) ArielCommand ac; ac.command = ARIEL_END_INSTRUCTION; ac.instPtr = (uint64_t) ip; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } VOID WriteInstructionReadWrite(THREADID thr, ADDRINT* readAddr, UINT32 readSize, ADDRINT* writeAddr, UINT32 writeSize, ADDRINT ip, UINT32 instClass, - UINT32 simdOpWidth, BOOL first, BOOL last ) + UINT32 simdOpWidth ) { if(enable_output) { - if(thr < core_count) { - if (first) - WriteStartInstructionMarker( thr, ip, instClass, simdOpWidth); + if(remap_id[thr] < core_count) { + WriteStartInstructionMarker( thr, ip, instClass, simdOpWidth); WriteInstructionRead( readAddr, readSize, thr, ip, instClass, simdOpWidth ); WriteInstructionWrite( writeAddr, writeSize, thr, ip, instClass, simdOpWidth ); - if (last) - WriteEndInstructionMarker( thr, ip ); + WriteEndInstructionMarker( thr, ip ); } } } @@ -448,7 +531,7 @@ VOID WriteInstructionReadOnly(THREADID thr, ADDRINT* readAddr, UINT32 readSize, { if(enable_output) { - if(thr < core_count) { + if(remap_id[thr] < core_count) { if (first) WriteStartInstructionMarker(thr, ip, instClass, simdOpWidth); WriteInstructionRead( readAddr, readSize, thr, ip, instClass, simdOpWidth ); @@ -462,11 +545,11 @@ VOID WriteInstructionReadOnly(THREADID thr, ADDRINT* readAddr, UINT32 readSize, VOID WriteNoOp(THREADID thr, ADDRINT ip) { if(enable_output) { - if(thr < core_count) { + if(remap_id[thr] < core_count) { ArielCommand ac; ac.command = ARIEL_NOOP; ac.instPtr = (uint64_t) ip; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } } } @@ -476,7 +559,7 @@ VOID WriteInstructionWriteOnly(THREADID thr, ADDRINT* writeAddr, UINT32 writeSiz { if(enable_output) { - if(thr < core_count) { + if(remap_id[thr] < core_count) { if (first) WriteStartInstructionMarker(thr, ip, instClass, simdOpWidth); WriteInstructionWrite(writeAddr, writeSize, thr, ip, instClass, simdOpWidth); @@ -559,31 +642,15 @@ VOID InstrumentInstruction(INS ins, VOID *v) } UINT32 operands = INS_MemoryOperandCount(ins); - if (INS_HasScatteredMemoryAccess(ins)) - operands = 0; for (UINT32 op = 0; op < operands; op++) { BOOL first = (op == 0); BOOL last = (op == (operands - 1)); - if (INS_MemoryOperandIsRead(ins, op) && INS_MemoryOperandIsWritten(ins, op)) { - USIZE opSize = INS_MemoryOperandSize(ins, op); - INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) - WriteInstructionReadWrite, - IARG_THREAD_ID, - IARG_MEMORYREAD_EA, IARG_UINT32, opSize, - IARG_MEMORYWRITE_EA, IARG_UINT32, opSize, - IARG_INST_PTR, - IARG_UINT32, instClass, - IARG_UINT32, simdOpWidth, - IARG_BOOL, first, - IARG_BOOL, last, - IARG_END); - } else if (INS_MemoryOperandIsRead(ins, op)) { - USIZE opSize = INS_MemoryOperandSize(ins, op); + if (INS_MemoryOperandIsRead(ins, op)) { INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) WriteInstructionReadOnly, IARG_THREAD_ID, - IARG_MEMORYREAD_EA, IARG_UINT32, opSize, + IARG_MEMORYREAD_EA, IARG_UINT32, INS_MemoryOperandSize(ins, op), IARG_INST_PTR, IARG_UINT32, instClass, IARG_UINT32, simdOpWidth, @@ -591,17 +658,17 @@ VOID InstrumentInstruction(INS ins, VOID *v) IARG_BOOL, last, IARG_END); } else { - USIZE opSize = INS_MemoryOperandSize(ins, op); INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) WriteInstructionWriteOnly, IARG_THREAD_ID, - IARG_MEMORYWRITE_EA, IARG_UINT32, opSize, + IARG_MEMORYWRITE_EA, IARG_UINT32, INS_MemoryOperandSize(ins, op), IARG_INST_PTR, IARG_UINT32, instClass, IARG_UINT32, simdOpWidth, IARG_BOOL, first, IARG_BOOL, last, IARG_END); + } } @@ -819,7 +886,29 @@ void mapped_ariel_output_stats() ArielCommand ac; ac.command = ARIEL_OUTPUT_STATS; ac.instPtr = (uint64_t) 0; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); +} + +void mapped_ariel_output_stats_begin_region(const char* name) +{ + THREADID thr = PIN_ThreadId(); + ArielCommand ac; + ac.command = ARIEL_OUTPUT_STATS_BEGIN_REGION; + ac.instPtr = (uint64_t) 0; + strncpy((char*)ac.inst.payload, name, ARIEL_MAX_PAYLOAD_SIZE - 1); + ac.inst.payload[ARIEL_MAX_PAYLOAD_SIZE-1]='\0'; + tunnel->writeMessage(remap_id[thr], ac); +} +void mapped_ariel_output_stats_end_region(const char* name) + +{ + THREADID thr = PIN_ThreadId(); + ArielCommand ac; + ac.command = ARIEL_OUTPUT_STATS_END_REGION; + ac.instPtr = (uint64_t) 0; + strncpy((char*)ac.inst.payload, name, ARIEL_MAX_PAYLOAD_SIZE - 1); + ac.inst.payload[ARIEL_MAX_PAYLOAD_SIZE-1]='\0'; + tunnel->writeMessage(remap_id[thr], ac); } // same effect as mapped_ariel_output_stats(), but it also sends a user-defined reference number back @@ -829,7 +918,7 @@ void mapped_ariel_output_stats_buoy(uint64_t marker) ArielCommand ac; ac.command = ARIEL_OUTPUT_STATS; ac.instPtr = (uint64_t) marker; //user the instruction pointer slot to send the marker number - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } void mapped_ariel_flushline(void *virtualAddress) @@ -851,18 +940,6 @@ void mapped_ariel_fence(void *virtualAddress) WriteFenceInstructionMarker(thr, ip); } -void mapped_api_mpi_init() { - api_mpi_init_used = 1; -} - -int check_for_api_mpi_init() { - if (!api_mpi_init_used && !getenv("ARIEL_DISABLE_MPI_INIT_CHECK")) { - fprintf(stderr, "Error: fesimple.cc: The Ariel API verion of MPI_Init_{thread} was not used, which can result in errors when used in conjunction with OpenMP. Please link against the Ariel API (included in this distribution at src/sst/elements/ariel/api) or disable this message by setting the environment variable `ARIEL_DISABLE_MPI_INIT_CHECK`\n"); - exit(1); - } - return 0; -} - int ariel_mlm_memcpy(void* dest, void* source, size_t size) { #ifdef ARIEL_DEBUG fprintf(stderr, "Perform a mlm_memcpy from Ariel from %p to %p length %llu\n", @@ -880,7 +957,7 @@ int ariel_mlm_memcpy(void* dest, void* source, size_t size) { THREADID currentThread = PIN_ThreadId(); UINT32 thr = (UINT32) currentThread; - if(thr >= core_count) { + if(remap_id[thr] >= core_count) { fprintf(stderr, "Thread ID: %" PRIu32 " is greater than core count.\n", thr); exit(-4); } @@ -895,7 +972,7 @@ int ariel_mlm_memcpy(void* dest, void* source, size_t size) { ac.dma_start.dest = ariel_dest; ac.dma_start.len = length; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); #ifdef ARIEL_DEBUG fprintf(stderr, "Done with ariel memcpy.\n"); @@ -923,7 +1000,7 @@ void ariel_mlm_set_pool(int new_pool) ArielCommand ac; ac.command = ARIEL_SWITCH_POOL; ac.switchPool.pool = newDefaultPool; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); // Keep track of the default pool default_pool = (UINT32) new_pool; @@ -938,8 +1015,8 @@ void* ariel_mmap_mlm(int fileID, size_t size, int level) UINT32 thr = (UINT32) currentThread; #ifdef ARIEL_DEBUG - fprintf(stderr, "%u: Perform a mmap_mlm from Ariel %zu, level %d\n", - thr, size, level); + fprintf(stderr, "%u: Perform a mmap_mlm from Ariel %zu (remapped to %zu), level %d\n", + thr, remap_id[thr], size, level); #endif if(0 == size) @@ -954,8 +1031,8 @@ void* ariel_mmap_mlm(int fileID, size_t size, int level) size_t real_req_size = 4096 * (npages + ((page_diff == 0) ? 0 : 1)); #ifdef ARIEL_DEBUG - fprintf(stderr, "Requested: %llu, but expanded to: %llu (on thread: %lu) \n", - size, real_req_size, thr); + fprintf(stderr, "Requested: %llu, but expanded to: %llu (on thread: %lu (remapped to %zu)) \n", + size, real_req_size, thr, remap_id[thr]); #endif void* real_ptr = 0; @@ -975,11 +1052,11 @@ void* ariel_mmap_mlm(int fileID, size_t size, int level) std::cout<<"File ID at FESIMPLE IS : "<writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); #ifdef ARIEL_DEBUG - fprintf(stderr, "%u: Ariel mmap_mlm call allocates data at address: 0x%llx\n", - thr, (uint64_t) real_ptr); + fprintf(stderr, "%u (remapped to %zu): Ariel mmap_mlm call allocates data at address: 0x%llx\n", + thr, remap_id[thr], (uint64_t) real_ptr); #endif PIN_GetLock(&mainLock, thr); @@ -994,7 +1071,7 @@ void* ariel_mlm_malloc(size_t size, int level) { UINT32 thr = (UINT32) currentThread; #ifdef ARIEL_DEBUG - fprintf(stderr, "%u: Perform a mlm_malloc from Ariel %zu, level %d\n", thr, size, level); + fprintf(stderr, "%u: Perform a mlm_malloc from Ariel %zu (remapped to %zu) , level %d\n", thr, remap_id[thr], size, level); #endif if(0 == size) { @@ -1008,8 +1085,8 @@ void* ariel_mlm_malloc(size_t size, int level) { size_t real_req_size = 4096 * (npages + ((page_diff == 0) ? 0 : 1)); #ifdef ARIEL_DEBUG - fprintf(stderr, "Requested: %llu, but expanded to: %llu (on thread: %lu) \n", - size, real_req_size, thr); + fprintf(stderr, "Requested: %llu, but expanded to: %llu (on thread: %lu (remapped to %zu)) \n", + size, real_req_size, thr, remap_id[thr]); #endif void* real_ptr = 0; @@ -1030,11 +1107,11 @@ void* ariel_mlm_malloc(size_t size, int level) { ac.mlm_map.alloc_level = allocationLevel; } - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); #ifdef ARIEL_DEBUG - fprintf(stderr, "%u: Ariel mlm_malloc call allocates data at address: 0x%llx\n", - thr, (uint64_t) real_ptr); + fprintf(stderr, "%zu (remapped_to %zu): Ariel mlm_malloc call allocates data at address: 0x%llx\n", + thr, remap_id[thr], (uint64_t) real_ptr); #endif PIN_GetLock(&mainLock, thr); @@ -1049,7 +1126,7 @@ void ariel_mlm_free(void* ptr) UINT32 thr = (UINT32) currentThread; #ifdef ARIEL_DEBUG - fprintf(stderr, "Perform a mlm_free from Ariel (pointer = %p) on thread %lu\n", ptr, thr); + fprintf(stderr, "Perform a mlm_free from Ariel (pointer = %p) on thread %zu (remapped to %zu)\n", ptr, thr, remap_id[thr]); #endif bool found = false; @@ -1075,7 +1152,7 @@ void ariel_mlm_free(void* ptr) ArielCommand ac; ac.command = ARIEL_ISSUE_TLM_FREE; ac.mlm_free.vaddr = virtAddr; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } else { fprintf(stderr, "ARIEL: Call to free in Ariel did not find a matching local allocation, this memory will be leaked.\n"); @@ -1127,14 +1204,14 @@ VOID ariel_postmalloc_instrument(ADDRINT allocLocation) if (toFast[thr].count == 0) { toFast[thr].valid = false; } - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } } else if (shouldOverride) { ac.mlm_map.alloc_level = overridePool; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } else if (InterceptMemAllocations.Value()) { ac.mlm_map.alloc_level = allocationLevel; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } /*printf("ARIEL: Created a malloc of size: %" PRIu64 " in Ariel\n", @@ -1152,7 +1229,7 @@ VOID ariel_postfree_instrument(ADDRINT allocLocation) ArielCommand ac; ac.command = ARIEL_ISSUE_TLM_FREE; ac.mlm_free.vaddr = virtAddr; - tunnel->writeMessage(thr, ac); + tunnel->writeMessage(remap_id[thr], ac); } void mapped_ariel_malloc_flag_fortran(int* mallocLocId, int* count, int* level) @@ -1199,7 +1276,7 @@ void ariel_start_RTL_sim(RTL_shmem_info* rtl_shmem) { THREADID thr = PIN_ThreadId(); const uint32_t thrID = (uint32_t) thr; - tunnel->writeMessage(thrID, acRtl); + tunnel->writeMessage(remap_id[thrID], acRtl); #ifdef ARIEL_DEBUG fprintf(stderr, "\nMessage to add RTL Event into Ariel Event Queue successfully delivered via ArielTunnel"); #endif @@ -1220,7 +1297,7 @@ void ariel_update_RTL_signals(RTL_shmem_info* rtl_shmem) { THREADID thr = PIN_ThreadId(); const uint32_t thrID = (uint32_t) thr; - tunnel->writeMessage(thrID, acRtl); + tunnel->writeMessage(remap_id[thrID], acRtl); #ifdef ARIEL_DEBUG fprintf(stderr, "\nMessage to add RTL Event into Ariel Event Queue to update RTL signals successfully delivered via ArielTunnel"); #endif @@ -1258,6 +1335,7 @@ VOID InstrumentRoutine(RTN rtn, VOID* args) RTN_Replace(rtn, (AFUNPTR) mapped_ariel_cycles); fprintf(stderr, "Replacement complete\n"); return; + /* } else if (RTN_Name(rtn) == "MPI_Init" || RTN_Name(rtn) == "_MPI_Init") { fprintf(stderr, "Identified routine: MPI_Init. Instrumenting.\n"); RTN_Open(rtn); @@ -1275,7 +1353,7 @@ VOID InstrumentRoutine(RTN rtn, VOID* args) RTN_Replace(rtn, (AFUNPTR) mapped_api_mpi_init); fprintf(stderr, "Replacement complete\n"); return; - return; + */ #if ! defined(__APPLE__) } else if (RTN_Name(rtn) == "clock_gettime" || RTN_Name(rtn) == "_clock_gettime" || RTN_Name(rtn) == "__clock_gettime") { @@ -1294,7 +1372,6 @@ VOID InstrumentRoutine(RTN rtn, VOID* args) RTN_Replace(rtn, (AFUNPTR) ariel_update_RTL_signals); fprintf(stderr,"Replacement complete.\n"); return; - } else if ((InterceptMemAllocations.Value() > 0) && RTN_Name(rtn) == "mlm_malloc") { // This means we want a special malloc to be used (needs a TLB map inside the virtual core) fprintf(stderr,"Identified routine: mlm_malloc, replacing with Ariel equivalent...\n"); @@ -1346,6 +1423,16 @@ VOID InstrumentRoutine(RTN rtn, VOID* args) RTN_Replace(rtn, (AFUNPTR) mapped_ariel_output_stats); fprintf(stderr, "Replacement complete\n"); return; + } else if (RTN_Name(rtn) == "ariel_output_stats_begin_region" || RTN_Name(rtn) == "_ariel_output_stats_begin_region" || RTN_Name(rtn) == "__arielfort_MOD_ariel_output_stats_begin_region") { + fprintf(stderr, "Identified routine: ariel_output_stats_begin_region, replacing with Ariel equivalent..\n"); + RTN_Replace(rtn, (AFUNPTR) mapped_ariel_output_stats_begin_region); + fprintf(stderr, "Replacement complete\n"); + return; + } else if (RTN_Name(rtn) == "ariel_output_stats_end_region" || RTN_Name(rtn) == "_ariel_output_stats_end_region" || RTN_Name(rtn) == "__arielfort_MOD_ariel_output_stats_end_region") { + fprintf(stderr, "Identified routine: ariel_output_stats_end_region, replacing with Ariel equivalent..\n"); + RTN_Replace(rtn, (AFUNPTR) mapped_ariel_output_stats_end_region); + fprintf(stderr, "Replacement complete\n"); + return; } else if (RTN_Name(rtn) == "ariel_output_stats_buoy" || RTN_Name(rtn) == "_ariel_output_stats_buoy") { fprintf(stderr, "Identified routine: ariel_output_stats_buoy, replacing with Ariel equivalent..\n"); RTN_Replace(rtn, (AFUNPTR) mapped_ariel_output_stats_buoy); @@ -1417,10 +1504,13 @@ int main(int argc, char *argv[]) { if (PIN_Init(argc, argv)) return Usage(); + + // Load the symbols ready for us to mangle functions. //PIN_InitSymbolsAlt(IFUNC_SYMBOLS); PIN_InitSymbols(); PIN_AddFiniFunction(Fini, 0); + PIN_AddSyscallEntryFunction(SyscallEntry, 0); PIN_InitLock(&mainLock); PIN_InitLock(&mallocIndexLock); @@ -1460,6 +1550,9 @@ int main(int argc, char *argv[]) } core_count = MaxCoreCount.Value(); + + init_remapping_data(core_count); + instrument_instructions = InstrumentInstructions.Value(); // Pin version specific tunnel attach diff --git a/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc b/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc index d47db3b2c1..500c690ac3 100644 --- a/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc +++ b/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc @@ -15,9 +15,7 @@ #include - #include "pin3frontend.h" - #include #if !defined(SST_COMPILE_MACOSX) #include @@ -30,10 +28,9 @@ #include #include #include - #include - #include +#include #define ARIEL_INNER_STRINGIZE(input) #input #define ARIEL_STRINGIZE(input) ARIEL_INNER_STRINGIZE(input) @@ -66,7 +63,7 @@ Pin3Frontend::Pin3Frontend(ComponentId_t id, Params& params, uint32_t cores, // Put together execute_args for fork setForkArguments(); // If mpi, use mpi launcher. Otherwise launch pin - app_name = (mpimode == 1) ? mpilauncher : applauncher; + //app_name = (mpimode == 1) ? mpilauncher : applauncher; // Remember that the list of arguments must be NULL terminated for execution //execute_args[(pin_arg_count - 1) + appargcount] = NULL; @@ -86,14 +83,130 @@ void Pin3Frontend::emergencyShutdown() { ArielFrontendCommon::emergencyShutdown(); } +int LaunchMPIChild(char** pin_command, const int ranks, const int tracerank, const char* env) +{ + + // We have one command for ranks before the traced rank, one + // for the ranks after it, and one for the traced rank itself + int cmd_count = 1; + if ( tracerank > 0 ) { + cmd_count++; + } + if ( tracerank < (ranks - 1) ) { + cmd_count++; + } + + // Ariel has already formed the entire string to launch PIN + the app. + // Find where the PIN arguments end and the app starts. The traced rank will + // launch with pin and the other ranks will launch normally. + int app_idx = -1; + for ( int i = 0; pin_command[i] != NULL; i++ ) { + if ( strcmp(pin_command[i], "--") == 0 ) { + app_idx = i + 1; + break; + } + } + + if ( app_idx == -1 ) { + return 1; + } + + int* array_of_maxprocs = (int*)malloc(sizeof(int) * cmd_count); + char** array_of_commands = (char**)malloc(sizeof(char*) * cmd_count); + char*** array_of_argv = (char***)malloc(sizeof(char**) * cmd_count); + const char** array_of_env = (const char**)malloc(sizeof(char*) * cmd_count); + + if ( cmd_count == 1 ) { + array_of_maxprocs[0] = 1; + array_of_commands[0] = pin_command[0]; + array_of_argv[0] = pin_command + 1; + } + else if ( cmd_count == 2 ) { + if ( tracerank == 0 ) { + array_of_maxprocs[0] = 1; + array_of_maxprocs[1] = ranks - 1; + + array_of_commands[0] = pin_command[0]; + array_of_commands[1] = pin_command[app_idx]; + + array_of_argv[0] = pin_command + 1; + array_of_argv[1] = pin_command + app_idx + 1; + } + else { + array_of_maxprocs[0] = ranks - 1; + array_of_maxprocs[1] = 1; + + array_of_commands[0] = pin_command[app_idx]; + array_of_commands[1] = pin_command[0]; + + array_of_argv[0] = pin_command + app_idx + 1; + array_of_argv[1] = pin_command + 1; + } + } + else if ( cmd_count == 3 ) { + array_of_maxprocs[0] = tracerank; + array_of_maxprocs[1] = 1; + array_of_maxprocs[2] = ranks - tracerank - 1; + + array_of_commands[0] = pin_command[app_idx]; + array_of_commands[1] = pin_command[0]; + array_of_commands[2] = pin_command[app_idx]; + + array_of_argv[0] = pin_command + app_idx + 1; + array_of_argv[1] = pin_command + 1; + array_of_argv[2] = pin_command + app_idx + 1; + } + + for (int i = 0; i < cmd_count; i++) { + array_of_env[i] = env; + } + + int ret = SST::Core::Interprocess::SST_MPI_Comm_spawn_multiple(cmd_count, array_of_commands, + array_of_argv, array_of_maxprocs, array_of_env); + + free(array_of_maxprocs); + free(array_of_commands); + free(array_of_argv); + free(array_of_env); + + return ret; +} + +int Pin3Frontend::forkPINChildMPI(char** args, std::map& app_env, ariel_redirect_info_t redirect_info) { + + // NOTE: Env var stuff is not yet implemented in the core + std::ostringstream envstring; + for (const auto& entry : app_env) { + envstring << entry.first << "=" << entry.second << "\n"; + } + + int ret = LaunchMPIChild( + args, + mpiranks, + mpitracerank, + envstring.str().c_str() + ); + + if (ret) { + output->fatal(CALL_INFO, 1, 0, "Non-zero return from LaunchMPIChild\n"); + } + + return 1; +} + int Pin3Frontend::forkChildProcess(const char* app, char** args, std::map& app_env, ariel_redirect_info_t redirect_info) { + // If user only wants to init the simulation then we do NOT fork the binary if(isSimulationRunModeInit()) return 0; + if (mpimode) { + return forkPINChildMPI(execute_args, execute_env, redirect_info); + } + int next_arg_index = 0; int next_line_index = 0; @@ -257,6 +370,8 @@ int Pin3Frontend::forkChildProcess(const char* app, char** args, } + + void Pin3Frontend::setForkArguments() { // Set all the arguments that will be passed to fork in `execute_args` @@ -267,22 +382,19 @@ void Pin3Frontend::setForkArguments() { // // MPI: We need one argument for the launcher, one for the number of ranks, // and one for the rank to trace - uint32_t mpi_arg_count = 0; - if (mpimode == 1) - mpi_arg_count = 3; // PIN: magic number 37 + the arguments for pin const uint32_t pin_arg_count = 37 + launch_param_count; // Allocate - execute_args = (char**) malloc(sizeof(char*) * (mpi_arg_count + - pin_arg_count + appargcount)); + execute_args = (char**) malloc(sizeof(char*) * (pin_arg_count + appargcount)); uint32_t arg = 0; // Track current arg // Next, set the arguments. // Start with MPI + /* if (mpimode == 1) { // Prepend mpilauncher to execute_args output->verbose(CALL_INFO, 1, 0, "Processing mpilauncher arguments...\n"); @@ -304,6 +416,7 @@ void Pin3Frontend::setForkArguments() { snprintf(execute_args[arg], mpitracerank_str_size, "%s", mpitracerank_str.c_str()); arg++; } + */ // Pin + pin arguments: // pin -follow_execv [other launch args] -t arieltool diff --git a/src/sst/elements/ariel/frontend/pin3/pin3frontend.h b/src/sst/elements/ariel/frontend/pin3/pin3frontend.h index f131d07c6b..743f71e34f 100644 --- a/src/sst/elements/ariel/frontend/pin3/pin3frontend.h +++ b/src/sst/elements/ariel/frontend/pin3/pin3frontend.h @@ -84,6 +84,7 @@ class Pin3Frontend : public ArielFrontendCommon { int forkChildProcess(const char* app, char** args, std::map& app_env, ariel_redirect_info_t redirect_info); + int forkPINChildMPI(char** args, std::map& app_env, ariel_redirect_info_t redirect_info); virtual void setForkArguments(); // set execute_args // Other functions diff --git a/src/sst/elements/ariel/mpi/.gitignore b/src/sst/elements/ariel/mpi/.gitignore deleted file mode 100644 index d1037c1166..0000000000 --- a/src/sst/elements/ariel/mpi/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -*.o -*.so -mpilauncher -fakepin diff --git a/src/sst/elements/ariel/mpi/Makefile.am b/src/sst/elements/ariel/mpi/Makefile.am deleted file mode 100644 index 3f61675a24..0000000000 --- a/src/sst/elements/ariel/mpi/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ - -bin_PROGRAMS = mpilauncher fakepin - -mpilauncher_SOURCES = mpilauncher.cc - -fakepin_SOURCES = fakepin.cc - -mpilauncher_CXX = $(ARIEL_MPICXX) -fakepin_CXX = $(ARIEL_MPICXX) - -mpilauncher_CXXFLAGS = $(AM_CFLAGS) -fakepin_CXXFLAGS = $(AM_CFLAGS) diff --git a/src/sst/elements/ariel/mpi/fakepin.cc b/src/sst/elements/ariel/mpi/fakepin.cc deleted file mode 100644 index 1d49080eab..0000000000 --- a/src/sst/elements/ariel/mpi/fakepin.cc +++ /dev/null @@ -1,47 +0,0 @@ -#include -#include -#include -#include - -int main(int argc, char *argv[], char *envp[]) -{ - if (argc < 3) { - printf("Usage: ./fakepin -- [program args...]\n"); - exit(1); - } - - int prog_idx = 1; - - while (strcmp("--", argv[prog_idx])) { - prog_idx++; - } - prog_idx++; - - printf("prog_name: %s\n", argv[prog_idx]); - - // Make a copy of envp so we can add FAKEPIN=1 - char **envp_copy; - - int envp_len = 0; - while(envp[envp_len]!=NULL) { - envp_len++; - } - - envp_copy = (char**) malloc(sizeof(char*) * (envp_len + 1)); - for (int i = 0; i < envp_len - 1; i++) { - envp_copy[i] = (char*) malloc(sizeof(char) * (strlen(envp[i])+1)); - strcpy(envp_copy[i], envp[i]); - } - envp_copy[envp_len-1] = (char*) malloc(sizeof(char) * 10); - strcpy(envp_copy[envp_len-1], "FAKEPIN=1"); - envp_copy[envp_len] = NULL; - - - // Launch the program - char* _argv[] = {NULL}; - printf("Fakepin launching [%s]\n", argv[prog_idx]); - - if (execve(argv[prog_idx], &argv[prog_idx], envp_copy) == -1) { - perror("Could not execve"); - } -} diff --git a/src/sst/elements/ariel/mpi/mpilauncher.cc b/src/sst/elements/ariel/mpi/mpilauncher.cc deleted file mode 100644 index 9d866a0b00..0000000000 --- a/src/sst/elements/ariel/mpi/mpilauncher.cc +++ /dev/null @@ -1,215 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * SLURM-specific MPI launcher for Ariel simulations - * Ariel forks this process which initiates mpirun - * - * If one rank is found in the MPI allocation, all ranks - * will run there. If multiple ranks are found, a single rank - * will run on the node with SST, and the remaining - * ranks will be distributed on the other nodes. - */ - -int pid = 0; // global so we can use it in the signal handler - -// Catch SIGTERM so we can try and shut down the child process -void signalHandler(int signum) { - std::cout << "Caught signal " << signum << ", exiting gracefully." << std::endl; - if (pid != 0) { - kill(pid, signum); - } - exit(0); -} - -int main(int argc, char *argv[]) { - // PIN Example: mpilauncher 8 3 /path/to/pin -t fesimple -- ./myapp -i input1 --otherarg input2 - // EPA Example: mpilauncher 8 3 -- ./myapp.arielinst -i input1 --otherarg input2 - if (argc < 4 || std::string(argv[1]).compare("-H") == 0) { - std::cout << "Usage: " << argv[0] << " [ [pin args]] -- [program args]\n"; - exit(1); - } - - // Set up signal handler - signal(SIGTERM, signalHandler); - - // Get node that SST is running on. - // All processes will run on the same node. - std::array buffer; - gethostname(buffer.data(), 128); - std::string host = buffer.data(); - - // Check inputs - int procs = atoi(argv[1]); - int tracerank = atoi(argv[2]); - - if (procs < 1) { - printf("Error: %s: must be positive\n", argv[0]); - exit(1); - } - - if (tracerank < 0 || tracerank >= procs) { - printf("Error: %s: must be in [0,nprocs)\n", argv[0]); - exit(1); - } - - // To make it easier to build the final command, check if we are using - // pin or an EPA tool. - bool instrument_with_pin = true; - if (std::string(argv[3]).compare("--") == 0) - instrument_with_pin = false; - - // Build two commands -- the pin command (if it exists) which describes how - // to use pin, and the target command, which describes how to run the - // actual application. - // - // Parse the arguments. Build the pin command first (until we hit the --). - // Then build the target command. If there is no pin, the pin command will - // just be "-- " - std::string pin_cmd = ""; - std::string target_cmd = ""; - bool building_pin = true; // Keep track of which command we are building - std::string arg; - - for (int i = 3; i < argc; i++) { - arg = argv[i]; - if (building_pin) { - pin_cmd += arg; - pin_cmd += " "; - } else { - target_cmd += arg; - target_cmd += " "; - } - - if (arg == "--") - building_pin = false; - } - - // Build the final MPI Command - std::stringstream mpi_cmd; - mpi_cmd << "mpirun --oversubscribe"; - - if (instrument_with_pin) { - // For PIN instrumentation, we need to run the regular binary, but - // instrument the trace rank with the pintool (fesimple). Do this by - // starting the ranks before the tracerank, adding the tracerank, and - // adding the remaining ranks - // e.g. mpirun -np M ./myapp -i input1 --anotherarg input2 : - // -np 1 pin -folow_execv -t fesimple.so [fesimple args] -- - // ./myapp -i input1 --anotherarg input2 : - // -np N-M-1 ./myapp -i input1 --anotherarg input2 - - // In order to trace the appropriate rank, determine how many - // should launch before the traced rank, and how many should launch - // after - int ranks_before = tracerank; - int ranks_after = procs - tracerank - 1; - if (ranks_after < 0) { - ranks_after = 0; - } - - // Add processes before the traced rank - if (ranks_before > 0) { - mpi_cmd << " -H " << host - << " -np " << ranks_before - << " " << target_cmd - << " : "; - } - - // Add the traced process - mpi_cmd << " -H " << host - << " -np 1" - << " " << pin_cmd // Should include "--" - << " " << target_cmd; - - // Add processes after the traced rank - if (ranks_after > 0) { - mpi_cmd << " : -H " << host - << " -np " << ranks_after - << " " << target_cmd; - } - } else { - // For EPA-based instrumentation, then run the instrumented application - // just like you would run the original application - // e.g. mpirun -np N ./myapp.arielinst -i input1 --anotherarg input2 - mpi_cmd << " -H " << host - << " -np " << procs - << " " << target_cmd; - } - - - // Execute the command - // Use execve to make sure that the child processes exits when killed by SST - // I am lazily assuming that there are no spaces in any of the arguments. - printf("MPI Command: %s\n", mpi_cmd.str().c_str()); - - // Get a mutable copy - char* cmd_copy = new char[mpi_cmd.str().length() + 1]; - std::strcpy(cmd_copy, mpi_cmd.str().c_str()); - - // Calculate an upper bound for the number of arguments - const int MAX_ARGS = std::strlen(cmd_copy) / 2 + 2; - - // Allocate memory for the pointers - char** exec_argv = new char*[MAX_ARGS]; - for (int i = 0;i < MAX_ARGS; i++) { - exec_argv[i] = NULL; - } - - // Temporary variable to hold each word - char* word; - - // Counter for the number of words - int exec_argc = 0; - - // Use strtok to split the string by spaces - word = std::strtok(cmd_copy, " "); - while (word != nullptr) { - // Allocate memory for the word and copy it - exec_argv[exec_argc] = new char[std::strlen(word) + 1]; - std::strcpy(exec_argv[exec_argc], word); - - // Move to the next word - word = std::strtok(nullptr, " "); - exec_argc++; - } - - assert(exec_argv[exec_argc] == NULL); - - // Forking child process so we can use the parent to kill it if we need to - pid = fork(); - if (pid == -1) { - printf("mpilauncher.cc: fork error: %d, %s\n", errno, strerror(errno)); - exit(-1); - } else if (pid > 1) { // Parent - int status; - waitpid(pid, &status, 0); - if (!WIFEXITED(status)) { - printf("Warning: mpilauncher.cc: Forked process did not exit normally.\n"); - } if (WEXITSTATUS(status) != 0) { - printf("Warning: mpilauncher.cc: Forked process has non-zero exit code: %d\n", WEXITSTATUS(status)); - } - exit(0); - } else { // Child - int ret = execvp(exec_argv[0], exec_argv); - printf("Error: mpilauncher.cc: This should be unreachable. execvp error: %d, %s\n", errno, strerror(errno)); - exit(1); - } - - // TODO cleanup? - // Should not arrive here - return 1; -} diff --git a/src/sst/elements/ariel/tests/testMPI/Makefile b/src/sst/elements/ariel/tests/testMPI/Makefile index 276532273c..3e1820ed01 100644 --- a/src/sst/elements/ariel/tests/testMPI/Makefile +++ b/src/sst/elements/ariel/tests/testMPI/Makefile @@ -9,3 +9,6 @@ hello: mpicxx hello.cc -o hello -I$(API_DIR) -L$(API_DIR) -larielapi -fopenmp reduce: mpicxx reduce.cc -o reduce -I$(API_DIR) -L$(API_DIR) -larielapi -fopenmp +reduce-noapi: + mpicxx reduce-noapi.cc -o reduce-noapi -fopenmp + diff --git a/src/sst/elements/ariel/tests/testMPI/ariel-reduce.py b/src/sst/elements/ariel/tests/testMPI/ariel-reduce.py index 64b7441d35..836878a6c8 100644 --- a/src/sst/elements/ariel/tests/testMPI/ariel-reduce.py +++ b/src/sst/elements/ariel/tests/testMPI/ariel-reduce.py @@ -1,6 +1,7 @@ import sst import sys import os +from pathlib import Path # Detect if we will use MPI mode or not mpi_mode = True @@ -39,11 +40,13 @@ ## Set component parameters and fill subcomponent slots ######################################################################### # Core: 2.4GHz, 2 accesses/cycle, STREAM (triad) pattern generator with 1000 elements per array +exe = "./reduce" +full_exe = str(Path(exe).resolve()) core.addParams({ "clock" : "2.4GHz", "verbose" : 1, #"executable" : "./hello-nompi" - "executable" : "./reduce", + "executable" : full_exe, #"executable" : "/home/prlavin/projects/reference-paper-2024/apps/install/bin/amg", "arielmode" : 0, "corecount" : ncores, From 350450ed273f87afd7453f8cb6eb6951aca07759 Mon Sep 17 00:00:00 2001 From: Patrick Lavin Date: Tue, 3 Mar 2026 14:50:21 -0700 Subject: [PATCH 2/7] cleanup, small bugfix --- src/sst/elements/ariel/arielfrontendcommon.h | 7 +++--- .../elements/ariel/frontend/pin3/fesimple.cc | 25 ++----------------- .../ariel/frontend/pin3/pin3frontend.cc | 25 ++++++++++++------- .../elements/ariel/tests/testIO/runtestio.py | 1 + 4 files changed, 22 insertions(+), 36 deletions(-) diff --git a/src/sst/elements/ariel/arielfrontendcommon.h b/src/sst/elements/ariel/arielfrontendcommon.h index e16cb3a242..c2c733ff9b 100644 --- a/src/sst/elements/ariel/arielfrontendcommon.h +++ b/src/sst/elements/ariel/arielfrontendcommon.h @@ -57,10 +57,9 @@ class ArielFrontendCommon : public ArielFrontend { {"appstdoutappend", "If appstdout is set, set this to 1 to append the file intead of overwriting", "0"}, {"appstderr", "Specify a file to use for the program's stderr", ""}, {"appstderrappend", "If appstderr is set, set this to 1 to append the file intead of overwriting", "0"}, - {"mpimode", "Whether to use to to launch in order to trace MPI-enabled applications.", "0"}, - {"mpilauncher", "Specify a launcher to be used for MPI executables in conjuction with ", STRINGIZE(MPILAUNCHER_EXECUTABLE)}, - {"mpiranks", "Number of ranks to be launched by . Only will be traced by .", "1" }, - {"mpitracerank", "Rank to be traced by .", "0" }, + {"mpimode", "Whether to use MPI_Comm_spawn_multiple to to launch app. Use for MPI aplications. SST-Core must be compiled with MPI enabled.", "0"}, + {"mpiranks", "Number of ranks to be launched. Only will be traced by .", "1" }, + {"mpitracerank", "Rank to be traced", "0" }, {"appargcount", "Number of arguments to the traced executable", "0"}, {"apparg%(appargcount)d", "Arguments for the traced executable", ""}, {"envparamcount", "Number of environment parameters to supply to the Ariel executable, default=-1 (use SST environment)", "-1"}, diff --git a/src/sst/elements/ariel/frontend/pin3/fesimple.cc b/src/sst/elements/ariel/frontend/pin3/fesimple.cc index 4d6458494c..493bc8a779 100644 --- a/src/sst/elements/ariel/frontend/pin3/fesimple.cc +++ b/src/sst/elements/ariel/frontend/pin3/fesimple.cc @@ -36,19 +36,8 @@ #define HAVE_MPI_H #endif - -// TODO add check for PinCRT compatible libz and try to pick that up -/*#ifdef HAVE_PINCRT_LIBZ - -#include "zlib.h" -#define BT_PRINTF(fmt, args...) gzprintf(btfiles[thr], fmt, ##args); - -#else -*/ #define BT_PRINTF(fmt, args...) fprintf(btfiles[thr], fmt, ##args); -//#endif - //This must be defined before inclusion of intttypes.h #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS @@ -180,8 +169,6 @@ class StackRecord { std::vector > arielStack; // Per-thread stacks - - // Returns true iff "libmpi.so" or "libmpi_cray.so" is found // in the call stack, indicating that the calling thread is // currently inside an MPI library. We use this information @@ -212,7 +199,6 @@ bool is_mpi_thread(CONTEXT* ctxt) { return false; } - // Would be more efficient to implement as a TLS_KEY, // but it is rarely written so false sharing should not // be an issue @@ -1504,8 +1490,6 @@ int main(int argc, char *argv[]) { if (PIN_Init(argc, argv)) return Usage(); - - // Load the symbols ready for us to mangle functions. //PIN_InitSymbolsAlt(IFUNC_SYMBOLS); PIN_InitSymbols(); @@ -1541,21 +1525,16 @@ int main(int argc, char *argv[]) if(PerformWriteTrace.Value() > 0) { writeTrace = true; - } - - if( writeTrace ) { - if( SSTVerbosity.Value() > 0 ) { + if(SSTVerbosity.Value() > 0) { printf("SSTARIEL: Performing write tracing (this is an expensive operation.)\n"); } } core_count = MaxCoreCount.Value(); - init_remapping_data(core_count); - instrument_instructions = InstrumentInstructions.Value(); -// Pin version specific tunnel attach + // Pin version specific tunnel attach tunnelmgr = new SST::Core::Interprocess::MMAPChild_Pin3(SSTNamedPipe.Value()); tunnel = tunnelmgr->getTunnel(); lastMallocSize = (UINT64*) malloc(sizeof(UINT64) * core_count); diff --git a/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc b/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc index 500c690ac3..1323e7320c 100644 --- a/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc +++ b/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc @@ -64,6 +64,7 @@ Pin3Frontend::Pin3Frontend(ComponentId_t id, Params& params, uint32_t cores, setForkArguments(); // If mpi, use mpi launcher. Otherwise launch pin //app_name = (mpimode == 1) ? mpilauncher : applauncher; + app_name = applauncher; // Remember that the list of arguments must be NULL terminated for execution //execute_args[(pin_arg_count - 1) + appargcount] = NULL; @@ -317,20 +318,25 @@ int Pin3Frontend::forkChildProcess(const char* app, char** args, if(0 == app_env.size()) { #if defined(SST_COMPILE_MACOSX) - char *dyldpath = getenv("DYLD_LIBRARY_PATH"); - if(dyldpath) { - setenv("PIN_APP_DYLD_LIBRARY_PATH", dyldpath, 1); - setenv("PIN_DYLD_RESTORE_REQUIRED", "t", 1); - unsetenv("DYLD_LIBRARY_PATH"); - } + char *dyldpath = getenv("DYLD_LIBRARY_PATH"); + + if(dyldpath) { + setenv("PIN_APP_DYLD_LIBRARY_PATH", dyldpath, 1); + setenv("PIN_DYLD_RESTORE_REQUIRED", "t", 1); + unsetenv("DYLD_LIBRARY_PATH"); + } + #else #if defined(HAVE_SET_PTRACER) - prctl(PR_SET_PTRACER, getppid(), 0, 0 ,0); + + prctl(PR_SET_PTRACER, getppid(), 0, 0 ,0); + #endif // End of HAVE_SET_PTRACER #endif // End SST_COMPILE_MACOSX (else branch) + int ret_code = execvp(app, args); - perror("execve"); + perror("execvp"); output->verbose(CALL_INFO, 1, 0, "Call to execvp returned: %d\n", ret_code); @@ -338,6 +344,7 @@ int Pin3Frontend::forkChildProcess(const char* app, char** args, output->fatal(CALL_INFO, -1, "Error executing: %s under a PIN fork\n", app); + } else { char** execute_env_cp = (char**) malloc(sizeof(char*) * (app_env.size() + 1)); uint32_t next_env_cp_index = 0; @@ -359,7 +366,7 @@ int Pin3Frontend::forkChildProcess(const char* app, char** args, execute_env_cp[app_env.size()] = NULL; int ret_code = execve(app, args, execute_env_cp); - perror("execvep"); + perror("execve"); output->verbose(CALL_INFO, 1, 0, "Call to execvpe returned %d\n", ret_code); output->fatal(CALL_INFO, -1, "Error executing %s under a PIN fork\n", app); diff --git a/src/sst/elements/ariel/tests/testIO/runtestio.py b/src/sst/elements/ariel/tests/testIO/runtestio.py index 773774fb8f..ca27155aba 100644 --- a/src/sst/elements/ariel/tests/testIO/runtestio.py +++ b/src/sst/elements/ariel/tests/testIO/runtestio.py @@ -6,6 +6,7 @@ app = os.getenv("ARIEL_EXE") if app == None or not os.path.exists(app): + print("ERROR: Environment variable ARIEL_EXE not set") sys.exit(os.EX_CONFIG) frontend_type = os.getenv("ARIEL_TEST_FRONTEND") From d9ff8d589be803007d9ce86b3456e3e6759ffb70 Mon Sep 17 00:00:00 2001 From: Patrick Lavin Date: Wed, 4 Mar 2026 08:52:25 -0700 Subject: [PATCH 3/7] fixed pin non-mpi tests --- .../ariel/frontend/pin3/pin3frontend.cc | 2 +- .../ariel/tests/testsuite_mpi_Ariel.py | 58 ++++++++++++------- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc b/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc index 1323e7320c..21e042b0cb 100644 --- a/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc +++ b/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc @@ -368,7 +368,7 @@ int Pin3Frontend::forkChildProcess(const char* app, char** args, int ret_code = execve(app, args, execute_env_cp); perror("execve"); - output->verbose(CALL_INFO, 1, 0, "Call to execvpe returned %d\n", ret_code); + output->verbose(CALL_INFO, 1, 0, "Call to execve returned %d\n", ret_code); output->fatal(CALL_INFO, -1, "Error executing %s under a PIN fork\n", app); } } diff --git a/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py b/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py index 9b3f74028a..00941aeece 100644 --- a/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py +++ b/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py @@ -18,7 +18,7 @@ def get_hello_string(rank, ranks, tracerank, threads, frontend): ################################################################################ ############################################## - # EPA Frontend Functions not yet in sst-core # + # Checks not not yet in sst-core # ############################################## def is_EPAX_loaded() -> bool: # Check if arm-based epa tool is available @@ -35,6 +35,20 @@ def is_PEBIL_loaded() -> bool: if pebil_path is not None: pebildir_found = os.path.isdir(pebil_path) return pebildir_found + +# Check that MPICC is set +def check_mpi_support(): + try: + # Query only the MPICC setting + output = subprocess.check_output( + ["sst-config", "--MPICC"], + stderr=subprocess.DEVNULL, + text=True + ).strip() + mpi_support = bool(output) + except subprocess.CalledProcessError: + mpi_support = False + return mpi_support ################################################################################ class testcase_Ariel(SSTTestCase): @@ -82,123 +96,124 @@ def assert_nonzero_stat(self, filename, stat): using_osx = host_os_get_distribution_type() == OS_DIST_OSX osx_error_msg = "Ariel: OpenMP is not supported on macOS" + mpi_support = check_mpi_support() + mpi_error_msg = f"Ariel: Core was compiled without MPI" + testsuite_dir = os.path.dirname(__file__) bindir = sstsimulator_conf_get_value('SST_ELEMENT_LIBRARY', 'SST_ELEMENT_LIBRARY_BINDIR', str) - mpilauncher_exists = os.path.isfile(bindir + '/mpilauncher') - mpi_error_msg = f"Ariel: The mpilauncher executable was not found" - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) def test_Ariel_mpi_hello_01_pin(self): self.ariel_Template(threads=1, ranks=1) - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not epa_loaded, epa_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) def test_Ariel_mpi_hello_01_epa(self): self.ariel_Template(threads=1, ranks=1, frontend="epa") - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) def test_Ariel_mpi_hello_02_pin(self): self.ariel_Template(threads=1, ranks=2) - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not epa_loaded, epa_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) def test_Ariel_mpi_hello_02_epa(self): self.ariel_Template(threads=1, ranks=2, frontend="epa") - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) @unittest.skipIf(using_osx, osx_error_msg) def test_Ariel_mpi_hello_03_pin(self): self.ariel_Template(threads=2, ranks=1) - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not epa_loaded, epa_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) @unittest.skipIf(using_osx, osx_error_msg) def test_Ariel_mpi_hello_03_epa(self): self.ariel_Template(threads=2, ranks=1, frontend="epa") - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) def test_Ariel_mpi_hello_04_pin(self): self.ariel_Template(threads=1, ranks=2, tracerank=1) - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not epa_loaded, epa_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) def test_Ariel_mpi_hello_04_epa(self): self.ariel_Template(threads=1, ranks=2, tracerank=1, frontend="epa") - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) @unittest.skipIf(using_osx, osx_error_msg) def test_Ariel_mpi_hello_05_pin(self): self.ariel_Template(threads=2, ranks=3, tracerank=1) - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not epa_loaded, epa_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) @unittest.skipIf(using_osx, osx_error_msg) def test_Ariel_mpi_hello_05_epa(self): self.ariel_Template(threads=2, ranks=3, tracerank=1, frontend="epa") - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) @unittest.skipIf(using_osx, osx_error_msg) def test_Ariel_mpi_hello_06_pin(self): self.ariel_Template(threads=2, ranks=2) - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not epa_loaded, epa_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) @unittest.skipIf(using_osx, osx_error_msg) def test_Ariel_mpi_hello_06_epa(self): self.ariel_Template(threads=2, ranks=2, frontend="epa") - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) def test_Ariel_mpi_reduce_01_pin(self): self.ariel_Template(threads=1, ranks=1, program="reduce") - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not epa_loaded, epa_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) def test_Ariel_mpi_reduce_01_epa(self): self.ariel_Template(threads=1, ranks=1, program="reduce", frontend="epa") - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) @unittest.skipIf(using_osx, osx_error_msg) def test_Ariel_mpi_reduce_02_pin(self): self.ariel_Template(threads=2, ranks=2, program="reduce") - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not epa_loaded, epa_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) @unittest.skipIf(using_osx, osx_error_msg) def test_Ariel_mpi_reduce_02_epa(self): self.ariel_Template(threads=2, ranks=2, program="reduce", frontend="epa") - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) @unittest.skipIf(using_osx, osx_error_msg) def test_Ariel_mpi_reduce_03_pin(self): self.ariel_Template(threads=2, ranks=4, program="reduce", tracerank=1) - @unittest.skipIf(not mpilauncher_exists, mpi_error_msg) + @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not epa_loaded, epa_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) @unittest.skipIf(using_osx, osx_error_msg) @@ -277,7 +292,6 @@ def _setup_ariel_test_files(self): test_path = self.get_testsuite_dir() outdir = self.get_test_output_run_dir() - # Set the paths to the various directories self.ArielElementDir = os.path.abspath("{0}/../".format(test_path)) self.ArielElementTestMPIDir = "{0}/tests/testMPI".format(self.ArielElementDir) From bace7c89b5c01ffb5076cf0f735e4dd3330cb4b0 Mon Sep 17 00:00:00 2001 From: Patrick Lavin Date: Wed, 4 Mar 2026 10:04:36 -0700 Subject: [PATCH 4/7] fix MPI tests --- src/sst/elements/ariel/tests/testMPI/demo1.py | 5 ++++- .../elements/ariel/tests/testMPI/test-mpi.py | 21 +++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/sst/elements/ariel/tests/testMPI/demo1.py b/src/sst/elements/ariel/tests/testMPI/demo1.py index 1f01b2e26f..12cc502c0e 100644 --- a/src/sst/elements/ariel/tests/testMPI/demo1.py +++ b/src/sst/elements/ariel/tests/testMPI/demo1.py @@ -33,8 +33,11 @@ #"executable" : "./hello-nompi" "executable" : "./hello", #"executable" : "/home/prlavin/projects/reference-paper-2024/apps/install/bin/amg", - "arielmode" : 1, + "arielmode" : 0, "corecount" : ncores, + "envparamcount" : 1, + "envparamname0" : "OMP_NUM_THREADS", + "envparamval0": ncores, }) if mpi_mode: diff --git a/src/sst/elements/ariel/tests/testMPI/test-mpi.py b/src/sst/elements/ariel/tests/testMPI/test-mpi.py index 5f2487dad7..0cb208830e 100644 --- a/src/sst/elements/ariel/tests/testMPI/test-mpi.py +++ b/src/sst/elements/ariel/tests/testMPI/test-mpi.py @@ -55,15 +55,18 @@ # 2.4GHz cores. One for each omp thread core.addParams({ - "clock" : "2.4GHz", - "verbose" : 1, - "frontend" : frontend, - "executable" : exe, - "arielmode" : 0, # Disable tracing at start - "corecount" : ncores, - "mpimode" : 1, - "mpiranks" : mpiranks, - "mpitracerank" : tracerank, + "clock" : "2.4GHz", + "verbose" : 1, + "frontend" : frontend, + "executable" : exe, + "arielmode" : 0, # Disable tracing at start + "corecount" : ncores, + "mpimode" : 1, + "mpiranks" : mpiranks, + "mpitracerank" : tracerank, + "envparamcount" : 1, + "envparamname0" : "OMP_NUM_THREADS", + "envparamval0" : ncores, }) # This should be detected in Ariel but checking here allows us to fail From 99d72aaef433c34acda0628d636cbb028d9652ff Mon Sep 17 00:00:00 2001 From: Patrick Lavin Date: Wed, 4 Mar 2026 14:08:03 -0700 Subject: [PATCH 5/7] disable EPA+MPI in Ariel, remove some extra output --- .../ariel/frontend/epa/epafrontend.cc | 12 ++- .../elements/ariel/frontend/pin3/fesimple.cc | 4 +- .../ariel/tests/testsuite_mpi_Ariel.py | 102 +++++++++--------- 3 files changed, 61 insertions(+), 57 deletions(-) diff --git a/src/sst/elements/ariel/frontend/epa/epafrontend.cc b/src/sst/elements/ariel/frontend/epa/epafrontend.cc index f5ca12d907..9278c7d622 100644 --- a/src/sst/elements/ariel/frontend/epa/epafrontend.cc +++ b/src/sst/elements/ariel/frontend/epa/epafrontend.cc @@ -52,9 +52,14 @@ EPAFrontend::EPAFrontend(ComponentId_t id, Params& params, uint32_t cores, // Put together execute_args for fork setForkArguments(); // If mpi, use mpi launcher. Otherwise launch instrumented app - app_name = (mpimode == 1) ? mpilauncher : executable; + //app_name = (mpimode == 1) ? mpilauncher : executable; + app_name = executable; -// output->verbose(CALL_INFO, 1, 0, "Processing application arguments...\n"); + if (mpimode == 1) { + output->fatal(CALL_INFO, -1, "Error: Running the EPA frontend with MPI is currently not supported. The code needs to be adapted to use SST_MPI_Comm_spawn_multiple() to launch the app.\n"); + } + + //output->verbose(CALL_INFO, 1, 0, "Processing application arguments...\n"); // Remember that the list of arguments must be NULL terminated for execution @@ -75,7 +80,7 @@ int EPAFrontend::forkChildProcess(const char* app, char** args, std::mapverbose(CALL_INFO, 1, 0, "Executing command: %s\n", full_execute_line); free(full_execute_line); - // Fork this binary, then exec to get around waiting for // child to exit. pid_t the_child; diff --git a/src/sst/elements/ariel/frontend/pin3/fesimple.cc b/src/sst/elements/ariel/frontend/pin3/fesimple.cc index 493bc8a779..8b87f5498d 100644 --- a/src/sst/elements/ariel/frontend/pin3/fesimple.cc +++ b/src/sst/elements/ariel/frontend/pin3/fesimple.cc @@ -238,13 +238,13 @@ VOID SyscallEntry(THREADID tid, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v) } PIN_ReleaseLock(&mainLock); -//#ifdef ARIEL_DEBUG +#ifdef ARIEL_DEBUG fprintf(stderr, "Caught clone syscall. Launching thread %d (MPI?: %d). Current thread map is:\n", next_thread, is_mpi); for (const auto& pair : remap_id) { std::cout << "[ " << pair.first << " -> " << pair.second << " ]\n"; } -//#endif +#endif } diff --git a/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py b/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py index 00941aeece..5d613b3ec3 100644 --- a/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py +++ b/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py @@ -108,11 +108,11 @@ def assert_nonzero_stat(self, filename, stat): def test_Ariel_mpi_hello_01_pin(self): self.ariel_Template(threads=1, ranks=1) - @unittest.skipIf(not mpi_support, mpi_error_msg) - @unittest.skipIf(not epa_loaded, epa_error_msg) - @unittest.skipIf(multi_rank, multi_rank_error_msg) - def test_Ariel_mpi_hello_01_epa(self): - self.ariel_Template(threads=1, ranks=1, frontend="epa") +# @unittest.skipIf(not mpi_support, mpi_error_msg) +# @unittest.skipIf(not epa_loaded, epa_error_msg) +# @unittest.skipIf(multi_rank, multi_rank_error_msg) +# def test_Ariel_mpi_hello_01_epa(self): +# self.ariel_Template(threads=1, ranks=1, frontend="epa") @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @@ -120,11 +120,11 @@ def test_Ariel_mpi_hello_01_epa(self): def test_Ariel_mpi_hello_02_pin(self): self.ariel_Template(threads=1, ranks=2) - @unittest.skipIf(not mpi_support, mpi_error_msg) - @unittest.skipIf(not epa_loaded, epa_error_msg) - @unittest.skipIf(multi_rank, multi_rank_error_msg) - def test_Ariel_mpi_hello_02_epa(self): - self.ariel_Template(threads=1, ranks=2, frontend="epa") +# @unittest.skipIf(not mpi_support, mpi_error_msg) +# @unittest.skipIf(not epa_loaded, epa_error_msg) +# @unittest.skipIf(multi_rank, multi_rank_error_msg) +# def test_Ariel_mpi_hello_02_epa(self): +# self.ariel_Template(threads=1, ranks=2, frontend="epa") @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @@ -133,12 +133,12 @@ def test_Ariel_mpi_hello_02_epa(self): def test_Ariel_mpi_hello_03_pin(self): self.ariel_Template(threads=2, ranks=1) - @unittest.skipIf(not mpi_support, mpi_error_msg) - @unittest.skipIf(not epa_loaded, epa_error_msg) - @unittest.skipIf(multi_rank, multi_rank_error_msg) - @unittest.skipIf(using_osx, osx_error_msg) - def test_Ariel_mpi_hello_03_epa(self): - self.ariel_Template(threads=2, ranks=1, frontend="epa") +# @unittest.skipIf(not mpi_support, mpi_error_msg) +# @unittest.skipIf(not epa_loaded, epa_error_msg) +# @unittest.skipIf(multi_rank, multi_rank_error_msg) +# @unittest.skipIf(using_osx, osx_error_msg) +# def test_Ariel_mpi_hello_03_epa(self): +# self.ariel_Template(threads=2, ranks=1, frontend="epa") @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @@ -146,11 +146,11 @@ def test_Ariel_mpi_hello_03_epa(self): def test_Ariel_mpi_hello_04_pin(self): self.ariel_Template(threads=1, ranks=2, tracerank=1) - @unittest.skipIf(not mpi_support, mpi_error_msg) - @unittest.skipIf(not epa_loaded, epa_error_msg) - @unittest.skipIf(multi_rank, multi_rank_error_msg) - def test_Ariel_mpi_hello_04_epa(self): - self.ariel_Template(threads=1, ranks=2, tracerank=1, frontend="epa") +# @unittest.skipIf(not mpi_support, mpi_error_msg) +# @unittest.skipIf(not epa_loaded, epa_error_msg) +# @unittest.skipIf(multi_rank, multi_rank_error_msg) +# def test_Ariel_mpi_hello_04_epa(self): +# self.ariel_Template(threads=1, ranks=2, tracerank=1, frontend="epa") @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @@ -159,12 +159,12 @@ def test_Ariel_mpi_hello_04_epa(self): def test_Ariel_mpi_hello_05_pin(self): self.ariel_Template(threads=2, ranks=3, tracerank=1) - @unittest.skipIf(not mpi_support, mpi_error_msg) - @unittest.skipIf(not epa_loaded, epa_error_msg) - @unittest.skipIf(multi_rank, multi_rank_error_msg) - @unittest.skipIf(using_osx, osx_error_msg) - def test_Ariel_mpi_hello_05_epa(self): - self.ariel_Template(threads=2, ranks=3, tracerank=1, frontend="epa") +# @unittest.skipIf(not mpi_support, mpi_error_msg) +# @unittest.skipIf(not epa_loaded, epa_error_msg) +# @unittest.skipIf(multi_rank, multi_rank_error_msg) +# @unittest.skipIf(using_osx, osx_error_msg) +# def test_Ariel_mpi_hello_05_epa(self): +# self.ariel_Template(threads=2, ranks=3, tracerank=1, frontend="epa") @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @@ -173,12 +173,12 @@ def test_Ariel_mpi_hello_05_epa(self): def test_Ariel_mpi_hello_06_pin(self): self.ariel_Template(threads=2, ranks=2) - @unittest.skipIf(not mpi_support, mpi_error_msg) - @unittest.skipIf(not epa_loaded, epa_error_msg) - @unittest.skipIf(multi_rank, multi_rank_error_msg) - @unittest.skipIf(using_osx, osx_error_msg) - def test_Ariel_mpi_hello_06_epa(self): - self.ariel_Template(threads=2, ranks=2, frontend="epa") +# @unittest.skipIf(not mpi_support, mpi_error_msg) +# @unittest.skipIf(not epa_loaded, epa_error_msg) +# @unittest.skipIf(multi_rank, multi_rank_error_msg) +# @unittest.skipIf(using_osx, osx_error_msg) +# def test_Ariel_mpi_hello_06_epa(self): +# self.ariel_Template(threads=2, ranks=2, frontend="epa") @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @@ -186,12 +186,12 @@ def test_Ariel_mpi_hello_06_epa(self): def test_Ariel_mpi_reduce_01_pin(self): self.ariel_Template(threads=1, ranks=1, program="reduce") - @unittest.skipIf(not mpi_support, mpi_error_msg) - @unittest.skipIf(not epa_loaded, epa_error_msg) - @unittest.skipIf(multi_rank, multi_rank_error_msg) - def test_Ariel_mpi_reduce_01_epa(self): - self.ariel_Template(threads=1, ranks=1, program="reduce", frontend="epa") - +# @unittest.skipIf(not mpi_support, mpi_error_msg) +# @unittest.skipIf(not epa_loaded, epa_error_msg) +# @unittest.skipIf(multi_rank, multi_rank_error_msg) +# def test_Ariel_mpi_reduce_01_epa(self): +# self.ariel_Template(threads=1, ranks=1, program="reduce", frontend="epa") +# @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @unittest.skipIf(multi_rank, multi_rank_error_msg) @@ -199,12 +199,12 @@ def test_Ariel_mpi_reduce_01_epa(self): def test_Ariel_mpi_reduce_02_pin(self): self.ariel_Template(threads=2, ranks=2, program="reduce") - @unittest.skipIf(not mpi_support, mpi_error_msg) - @unittest.skipIf(not epa_loaded, epa_error_msg) - @unittest.skipIf(multi_rank, multi_rank_error_msg) - @unittest.skipIf(using_osx, osx_error_msg) - def test_Ariel_mpi_reduce_02_epa(self): - self.ariel_Template(threads=2, ranks=2, program="reduce", frontend="epa") +# @unittest.skipIf(not mpi_support, mpi_error_msg) +# @unittest.skipIf(not epa_loaded, epa_error_msg) +# @unittest.skipIf(multi_rank, multi_rank_error_msg) +# @unittest.skipIf(using_osx, osx_error_msg) +# def test_Ariel_mpi_reduce_02_epa(self): +# self.ariel_Template(threads=2, ranks=2, program="reduce", frontend="epa") @unittest.skipIf(not mpi_support, mpi_error_msg) @unittest.skipIf(not pin_loaded, pin_error_msg) @@ -213,12 +213,12 @@ def test_Ariel_mpi_reduce_02_epa(self): def test_Ariel_mpi_reduce_03_pin(self): self.ariel_Template(threads=2, ranks=4, program="reduce", tracerank=1) - @unittest.skipIf(not mpi_support, mpi_error_msg) - @unittest.skipIf(not epa_loaded, epa_error_msg) - @unittest.skipIf(multi_rank, multi_rank_error_msg) - @unittest.skipIf(using_osx, osx_error_msg) - def test_Ariel_mpi_reduce_03_epa(self): - self.ariel_Template(threads=2, ranks=4, program="reduce", tracerank=1, frontend="epa") +# @unittest.skipIf(not mpi_support, mpi_error_msg) +# @unittest.skipIf(not epa_loaded, epa_error_msg) +# @unittest.skipIf(multi_rank, multi_rank_error_msg) +# @unittest.skipIf(using_osx, osx_error_msg) +# def test_Ariel_mpi_reduce_03_epa(self): +# self.ariel_Template(threads=2, ranks=4, program="reduce", tracerank=1, frontend="epa") def ariel_Template(self, threads, ranks, program="hello", tracerank=0, testtimeout=60, size=8000, frontend="pin"): # Set the paths to the various directories From 88ba37662248f86d613539df0a66f786bd1773f2 Mon Sep 17 00:00:00 2001 From: Patrick Lavin Date: Wed, 4 Mar 2026 15:16:24 -0700 Subject: [PATCH 6/7] set OMPI_MCA_rmaps_base_oversubscribe for MPI tests --- src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py b/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py index 5d613b3ec3..60302d94c4 100644 --- a/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py +++ b/src/sst/elements/ariel/tests/testsuite_mpi_Ariel.py @@ -255,6 +255,9 @@ def ariel_Template(self, threads, ranks, program="hello", tracerank=0, testtimeo log_debug("out file = {0}".format(outfile)) log_debug("err file = {0}".format(errfile)) + # Allow application ranks to share slots with SST ranks + os.environ['OMPI_MCA_rmaps_base_oversubscribe'] = '1' + # Run SST in the tests directory self.run_sst(sdlfile, outfile, errfile, set_cwd=ArielElementTestMPIDir, mpi_out_files=mpioutfiles, timeout_sec=testtimeout, other_args=other_args) From 84377cfda0b91f9d0a3443a1039bf2d9d4f6b847 Mon Sep 17 00:00:00 2001 From: Patrick Lavin Date: Mon, 20 Apr 2026 08:35:37 -0600 Subject: [PATCH 7/7] remove references to macOS in the pin3 frontend --- .../ariel/frontend/pin3/pin3frontend.cc | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc b/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc index 21e042b0cb..d4441caa91 100644 --- a/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc +++ b/src/sst/elements/ariel/frontend/pin3/pin3frontend.cc @@ -17,9 +17,7 @@ #include #include "pin3frontend.h" #include -#if !defined(SST_COMPILE_MACOSX) #include -#endif #include #include #include @@ -317,23 +315,9 @@ int Pin3Frontend::forkChildProcess(const char* app, char** args, output->verbose(CALL_INFO, 1, 0, "Launching executable: %s...\n", app); if(0 == app_env.size()) { -#if defined(SST_COMPILE_MACOSX) - - char *dyldpath = getenv("DYLD_LIBRARY_PATH"); - - if(dyldpath) { - setenv("PIN_APP_DYLD_LIBRARY_PATH", dyldpath, 1); - setenv("PIN_DYLD_RESTORE_REQUIRED", "t", 1); - unsetenv("DYLD_LIBRARY_PATH"); - } - -#else #if defined(HAVE_SET_PTRACER) - prctl(PR_SET_PTRACER, getppid(), 0, 0 ,0); - #endif // End of HAVE_SET_PTRACER -#endif // End SST_COMPILE_MACOSX (else branch) int ret_code = execvp(app, args); perror("execvp"); @@ -543,11 +527,7 @@ void Pin3Frontend::parsePin3Params(Params& params) { // Parse pintool size_t tool_path_size = sizeof(char) * 1024; char* tool_path = (char*) malloc(tool_path_size); -#ifdef SST_COMPILE_MACOSX - snprintf(tool_path, tool_path_size, "%s/fesimple.dylib", ARIEL_STRINGIZE(ARIEL_TOOL_DIR)); -#else snprintf(tool_path, tool_path_size, "%s/fesimple.so", ARIEL_STRINGIZE(ARIEL_TOOL_DIR)); -#endif arieltool = params.find("arieltool", tool_path); if("" == arieltool) { output->fatal(CALL_INFO, -1, "The arieltool parameter specifying which PIN tool to run was not specified\n");