Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 25 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,18 @@ endif()
# and generate documentation. Instead, we can simply include the list of .f90
# files that will eventually be used to compile <target>.

macro(HANDLE_SOURCES target useCommon)
macro(HANDLE_SOURCES target useCommon useOpenACC useOpenMP)

if (${useOpenACC} AND ${useOpenMP})
message(FATAL_ERROR "OpenACC and OpenMP at same time is unsupported.")
elseif (${useOpenACC})
set(MFC_GPU_MODE "OpenACC")
elseif (${useOpenMP})
set(MFC_GPU_MODE "OpenMP")
else()
set(MFC_GPU_MODE "")
endif()

set(${target}_DIR "${CMAKE_SOURCE_DIR}/src/${target}")
set(common_DIR "${CMAKE_SOURCE_DIR}/src/common")

Expand Down Expand Up @@ -372,6 +383,7 @@ macro(HANDLE_SOURCES target useCommon)
-D MFC_${${target}_UPPER}
-D MFC_COMPILER="${CMAKE_Fortran_COMPILER_ID}"
-D MFC_CASE_OPTIMIZATION=False
-D MFC_GPU_MODE="${MFC_GPU_MODE}"
-D chemistry=False
--line-numbering
--no-folding
Expand All @@ -388,10 +400,10 @@ macro(HANDLE_SOURCES target useCommon)
endmacro()


HANDLE_SOURCES(pre_process ON)
HANDLE_SOURCES(simulation ON)
HANDLE_SOURCES(post_process ON)
HANDLE_SOURCES(syscheck OFF)
HANDLE_SOURCES(pre_process ON OFF OFF)
HANDLE_SOURCES(simulation ON ${MFC_OpenACC} ${MFC_OpenMP})
HANDLE_SOURCES(post_process ON OFF OFF)
HANDLE_SOURCES(syscheck OFF ${MFC_OpenACC} ${MFC_OpenMP})


# MFC_SETUP_TARGET: Given a target (herein <target>), this macro creates a new
Expand Down Expand Up @@ -430,6 +442,7 @@ function(MFC_SETUP_TARGET)

foreach (a_target ${IPO_TARGETS})
set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON)
message(STATUS ${CMAKE_Fortran_COMPILER_ID})

target_include_directories(${a_target} PRIVATE
"${CMAKE_SOURCE_DIR}/src/common"
Expand All @@ -450,8 +463,8 @@ function(MFC_SETUP_TARGET)
if (MFC_MPI AND ARGS_MPI)
find_package(MPI COMPONENTS Fortran REQUIRED)

target_compile_definitions(${a_target} PRIVATE MFC_MPI)
target_link_libraries (${a_target} PRIVATE MPI::MPI_Fortran)
target_compile_definitions(${a_target} PRIVATE $ENV{CRAY_MPICH_INC})
target_link_libraries (${a_target} PRIVATE $ENV{CRAY_MPICH_LIB})
endif()

if (ARGS_SILO)
Expand All @@ -470,8 +483,8 @@ function(MFC_SETUP_TARGET)
find_package(CUDAToolkit REQUIRED)
target_link_libraries(${a_target} PRIVATE CUDA::cudart CUDA::cufft)
else()
find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED)
target_link_libraries(${a_target} PRIVATE hipfort::hipfft)
# find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED)
target_link_libraries(${a_target} PRIVATE $ENV{CRAY_HIPFORT_INC})
endif()
else()
find_package(FFTW REQUIRED)
Expand Down Expand Up @@ -517,8 +530,8 @@ function(MFC_SETUP_TARGET)
target_compile_options(${a_target} PRIVATE -fopenmp)
target_link_options(${a_target} PRIVATE -fopenmp)
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a)
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a)
target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a $ENV{CRAY_MPICH_INC})
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a $ENV{CRAY_MPICH_LIB})
endif()
endif()

Expand Down Expand Up @@ -584,7 +597,7 @@ function(MFC_SETUP_TARGET)
PRIVATE -DFRONTIER_UNIFIED)
endif()

find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn flang_rt.hostdevice)
endif()
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
Expand Down
39 changes: 32 additions & 7 deletions load_amd.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,32 @@
module use /ccs/home/bcornille/afar-drops/modulefiles/Core/
module load rocm-afar-drop mpich cray-python
module load cmake
#export OMPX_APU_MAPS=0
#export HSA_XNACK=0
#export LIBOMPTARGET_INFO=0
#export AMD_LOG_LEVEL=1
module load python cmake
module load cpe/25.09
module load PrgEnv-amd

AFAR_UMS_BASEDIR="/sw/crusher/ums/compilers/afar"
AFAR_UMS_LATEST=$(ls -d --color=never ${AFAR_UMS_BASEDIR}/*/ | tail -n1)
export OLCF_AFAR_ROOT=${AFAR_UMS_BASEDIR}/"rocm-afar-8873-drop-22.2.0"

export PATH=${OLCF_AFAR_ROOT}/lib/llvm/bin:${PATH}
export LD_LIBRARY_PATH=${OLCF_AFAR_ROOT}/lib:${OLCF_AFAR_ROOT}/lib/llvm/lib:${LD_LIBRARY_PATH}

export CRAY_MPICH_INC="-I${OLCF_AFAR_ROOT}/include/mpich3.4a2"
export CRAY_HIPFORT_INC="-I${OLCF_AFAR_ROOT}/include/hipfort/amdgcn"
export CRAY_HIP_INC="-I${OLCF_AFAR_ROOT}/include/hip"
export CRAY_MPICH_LIB="-L${CRAY_MPICH_PREFIX}/lib \
${CRAY_PMI_POST_LINK_OPTS} \
-lmpifort_amd -lmpi_amd -lmpi -lpmi -lpmi2"
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${CRAY_LD_LIBRARY_PATH}"
export CMAKE_PREFIX_PATH="${OLCF_AFAR_ROOT}:${CMAKE_PREFIX_PATH}"
export FC="${OLCF_AFAR_ROOT}/bin/amdflang"

unset MPICH_GPU_SUPPORT_ENABLED

# module use /ccs/home/bcornille/afar-drops/modulefiles/Core/
# module load rocm-afar-drop mpich cray-python
# module load cmake

export OMPX_APU_MAPS=0
export HSA_XNACK=0
export LIBOMPTARGET_INFO=0
export AMD_LOG_LEVEL=0
export OFFLOAD_TRACK_ALLOCATION_TRACES=false
66 changes: 15 additions & 51 deletions src/common/include/omp_macros.fpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
#:include 'shared_parallel_macros.fpp'

#:set NVIDIA_COMPILER_ID="NVHPC"
#:set PGI_COMPILER_ID="PGI"
#:set INTEL_COMPILER_ID="Intel"
#:set CCE_COMPILER_ID="Cray"
#:set AMD_COMPILER_ID="LLVMFlang"

#:def OMP_MAP_STR(map_type, var_list)
#:assert map_type is not None
#:assert isinstance(map_type, str)
Expand All @@ -23,11 +17,11 @@
#:assert isinstance(default, str)
#:assert (default == 'present' or default == 'none')
#:if default == 'present'
#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
#:if USING_NVHPC
#:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) '
#:elif MFC_COMPILER == CCE_COMPILER_ID
#:elif USING_CCE
#:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(present:allocatable) defaultmap(present:pointer) '
#:elif MFC_COMPILER == AMD_COMPILER_ID
#:elif USING_AMD
#:set default_val = ''
#:else
#:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) '
Expand Down Expand Up @@ -176,13 +170,13 @@
& deviceptr_val.strip('\n') + attach_val.strip('\n')
#! Hardcoding the parallelism for now

#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
#:if USING_NVHPC
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
#:set omp_end_directive = '!$omp end target teams loop'
#:elif MFC_COMPILER == CCE_COMPILER_ID
#:elif USING_CCE
#:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) '
#:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
#:elif MFC_COMPILER == AMD_COMPILER_ID
#:elif USING_AMD
#:set omp_start_directive = '!$omp target teams distribute parallel do '
#:set omp_end_directive = '!$omp end target teams distribute parallel do'
#:else
Expand Down Expand Up @@ -223,11 +217,11 @@
& deviceptr_val.strip('\n') + attach_val.strip('\n')
#! Hardcoding the parallelism for now

#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
#:if USING_NVHPC
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
#:elif MFC_COMPILER == CCE_COMPILER_ID
#:elif USING_CCE
#:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) '
#:elif MFC_COMPILER == AMD_COMPILER_ID
#:elif USING_AMD
#:set omp_start_directive = '!$omp target teams distribute parallel do '
#:else
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
Expand All @@ -239,11 +233,11 @@

#:def END_OMP_PARALLEL_LOOP()

#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
#:if USING_NVHPC
#:set omp_end_directive = '!$omp end target teams loop'
#:elif MFC_COMPILER == CCE_COMPILER_ID
#:elif USING_CCE
#:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
#:elif MFC_COMPILER == AMD_COMPILER_ID
#:elif USING_AMD
#:set omp_end_directive = '!$omp end target teams distribute parallel do'
#:else
#:set omp_end_directive = '!$omp end target teams loop'
Expand All @@ -266,7 +260,7 @@
#:set function_name_val = ''
#:endif

#:if MFC_COMPILER == AMD_COMPILER_ID
#:if USING_AMD
#:set clause_val = ''
#:else
#:set clause_val = nohost_val.strip('\n')
Expand All @@ -290,9 +284,9 @@

#! Not fully implemented yet (ignores most args right now)
#:def OMP_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraOmpArgs=None)
#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
#:if USING_NVHPC
#:set omp_directive = '!$omp loop bind(thread)'
#:elif MFC_COMPILER == CCE_COMPILER_ID or MFC_COMPILER == AMD_COMPILER_ID
#:elif USING_CCE or USING_AMD
#:set omp_directive = ''
#:else
#:set omp_directive = ''
Expand Down Expand Up @@ -390,34 +384,4 @@
#:set omp_directive = '!$omp barrier ' + clause_val + extraOmpArgs_val.strip('\n')
$:omp_directive
#:enddef

#:def UNDEF_AMD(code)
#:if MFC_COMPILER != AMD_COMPILER_ID
$:code
#:endif
#:enddef

#:def DEF_AMD(code)
#:if MFC_COMPILER == AMD_COMPILER_ID
$:code
#:endif
#:enddef

#:def UNDEF_CCE(code)
#:if MFC_COMPILER != CCE_COMPILER_ID
$:code
#:endif
#:enddef

#:def DEF_CCE(code)
#:if MFC_COMPILER == CCE_COMPILER_ID
$:code
#:endif
#:enddef

#:def UNDEF_NVIDIA(code)
#:if MFC_COMPILER != NVIDIA_COMPILER_ID and MFC_COMPILER != PGI_COMPILER_ID
$:code
#:endif
#:enddef
! New line at end of file is required for FYPP
Loading
Loading