diff --git a/.gitignore b/.gitignore index dfec331..74334c5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ *.o *.a +*.so *~ *.ptx *.ptxh +*.cle +*.clh diff --git a/gpl-3.0.txt b/LICENSE similarity index 100% rename from gpl-3.0.txt rename to LICENSE diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fd28e3b --- /dev/null +++ b/Makefile @@ -0,0 +1,213 @@ +CXX ?= g++ +CC ?= gcc +PREFIX ?= /usr/local + + +.PHONY: all +all: libsapporo.a libsapporo.so emulated_interfaces + + +# Detect CUDA +ifndef CUDA_TK + NVCC := $(shell which nvcc || echo NOTFOUND) + ifeq ($(NVCC), NOTFOUND) + $(info The nvcc command is not available in your shell.) + $(info To compile with CUDA, please install it, set up your environment) + $(info according to the CUDA installation instructions, and try again.) + $(info ) + else + CUDA_TK := $(dir $(NVCC)).. + CUDA_AVAILABLE := 1 + endif +else + NVCC ?= $(CUDA_TK)/bin/nvcc + CUDA_AVAILABLE := 1 +endif + + +# Detect OpenCL +OPENCL_LDFLAGS := -lOpenCL +ifdef OPENCL + OPENCL_LDFLAGS := -L$(OPENCL)/lib -lOpenCL +endif + +OPENCL_STATUS := $(shell echo 'int main() {}' | $(CXX) -x c++ $(OPENCL_LDFLAGS) - && rm a.out || echo NOTFOUND) + +ifeq ($(OPENCL_STATUS), NOTFOUND) + $(info OpenCL support was not detected on the system.) + $(info If it is installed in a non-standard location, then set OPENCL to) + $(info the installation prefix and try again.) + $(info ) +else + OPENCL_AVAILABLE := 1 +endif + + +# Select backend +ifeq ($(filter clean,$(MAKECMDGOALS)),) +ifndef BACKEND + ifdef CUDA_AVAILABLE + $(info BACKEND not set and CUDA was detected, using CUDA) + BACKEND := CUDA + else + ifdef OPENCL_AVAILABLE + $(info BACKEND not set and OpenCL was detected, using OpenCL) + BACKEND := OpenCL + else + $(error BACKEND not set and neither CUDA nor OpenGL was detected.) + endif + endif +else + ifeq ($(BACKEND), CUDA) + ifndef CUDA_AVAILABLE + $(error BACKEND set to CUDA but it was not found.) + endif + $(info Using selected backend CUDA) + else + ifeq ($(BACKEND), OpenCL) + ifndef OPENCL_AVAILABLE + $(error BACKEND set to OpenCL but it was not found.) + endif + else + $(error BACKEND set to unknown value "$(BACKEND)", please use CUDA or OpenCL) + endif + $(info Using selected backend OpenCL) + endif +endif +endif +$(info ) + +# Testing/optimisation support +ifdef NTHREADS + CXXFLAGS += -DNTHREADS=$(NTHREADS) -DTIMING_STATS=1 +endif + +ifdef NBLOCKS_PER_MULTI + CXXFLAGS += -DNBLOCKS_PER_MULTI=$(NBLOCKS_PER_MULTI) -DTIMING_STATS=1 +endif + + +# CUDA kernels +ifeq ($(BACKEND), CUDA) + +INCLUDES = -I$(CUDA_TK) +CXXFLAGS += -D__INCLUDE_KERNELS__ +LDFLAGS += -lcuda -fopenmp + +CUDA_SRC = $(wildcard src/CUDA/*.cu) +PTX = $(CUDA_SRC:src/CUDA/%.cu=src/CUDA/%.ptx) +PTXH = $(CUDA_SRC:src/CUDA/%.cu=src/CUDA/%.ptxh) +NVCCFLAGS += -Isrc + +KERNELS = $(PTX) $(PTXH) + +%.ptx: %.cu + $(NVCC) --forward-unknown-to-host-compiler $(CXXFLAGS) $(NVCCFLAGS) -ptx $< -o $@ + +src/CUDA/%.ptxh: src/CUDA/%.ptx + xxd -i $< $@ + +endif + + +# OpenCL kernels +ifeq ($(BACKEND), OpenCL) + +ifdef OPENCL + CXXFLAGS += -I$(OPENCL)/include + LDFLAGS += -L$(OPENCL)/lib +endif + +INCLUDES = +CXXFLAGS += -D_OCL_ -D__INCLUDE_KERNELS__ +LDFLAGS += -lOpenCL -fopenmp + +OPENCL_SRC = $(wildcard src/OpenCL/*.cl) +CLE = $(OPENCL_SRC:src/OpenCL/%.cl=src/OpenCL/%.cle) +CLH = $(OPENCL_SRC:src/OpenCL/%.cl=src/OpenCL/%.clh) + +KERNELS = $(CLE) $(CLH) + +%.cle: %.cl + $(CC) -E -Isrc -o $@ - <$< + +# xxd names the variable after the file name argument, and we expect +# the variable to not have a src_ prefix, so we have to remove it. +src/OpenCL/%.clh: src/OpenCL/%.cle + cd src && xxd -i $(<:src/%=%) $(@:src/%=%) + +endif + + +# Main implementation +CXX_SRC := $(wildcard src/*.cpp src/SSE_AVX/*.cpp) +OBJS := $(CXX_SRC:%.cpp=%.o) +INCLUDES += -Isrc +CXXFLAGS += $(INCLUDES) -fPIC -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp + +src/sapporohostclass.o: $(KERNELS) + +%.o: %.cpp + $(CXX) $(CXXFLAGS) -c $< -o $@ + +libsapporo.a: $(OBJS) + ar qv $@ $^ + +libsapporo.so: $(OBJS) + $(CXX) -o $@ -shared $^ $(LDFLAGS) + + +# API compatibility libraries +EMU_SRC := $(wildcard src/interfaces/*lib.cpp) +EMU_STATIC_LIBS := $(EMU_SRC:src/interfaces/%lib.cpp=lib%.a) +EMU_SHARED_LIBS := $(EMU_SRC:src/interfaces/%lib.cpp=lib%.so) + +.PHONY: emulated_interfaces +emulated_interfaces: $(EMU_STATIC_LIBS) $(EMU_SHARED_LIBS) + +$(EMU_STATIC_LIBS): libsapporo.a + +$(EMU_SHARED_LIBS): libsapporo.so + + +lib%.a: src/interfaces/%lib.o + ar qv $@ $^ + +lib%.so: src/interfaces/%lib.o + $(CXX) -o $@ -shared $^ -L. -lsapporo $(LDFLAGS) + + +# Installation +INSTALLED_LIBS := $(PREFIX)/lib/libsapporo.a $(PREFIX)/lib/libsapporo.so +INSTALLED_LIBS += $(EMU_STATIC_LIBS:%.a=$(PREFIX)/lib/%.a) +INSTALLED_LIBS += $(EMU_SHARED_LIBS:%.so=$(PREFIX)/lib/%.so) + +INSTALLED_LIBS: $(PREFIX)/lib + +HEADERS := $(wildcard include/*) +INSTALLED_HEADERS := $(HEADERS:include/%=$(PREFIX)/include/%) + +INSTALLED_HEADERS: $(PREFIX)/include + +$(PREFIX)/include: + mkdir -p $(PREFIX)/include + +$(PREFIX)/include/%: include/% $(PREFIX)/include + install -m 644 $< $@ + +$(PREFIX)/lib: + mkdir -p $(PREFIX)/lib + +$(PREFIX)/lib/%: % $(PREFIX)/lib + install -m 644 $< $@ + +.PHONY: install +install: $(INSTALLED_LIBS) $(INSTALLED_HEADERS) + + +# Clean-up +.PHONY: clean +clean: + rm -f *.a *.so src/*.o src/SSE_AVX/SSE/*.o src/SSE_AVX/AVX/*.o + rm -f src/CUDA/*.ptx src/CUDA/*.ptxh src/OpenCL/*.cle src/OpenCL/*.clh + diff --git a/README.extended b/README.extended index c604e72..d81cb59 100644 --- a/README.extended +++ b/README.extended @@ -86,15 +86,33 @@ With some luck a simple 'make' in the lib folder is sufficient to build the library, if not then here are some pointers: CUDA -To build the CUDA library; Set the 'CUDA_TK' path to the location -where the CUDA toolkit is installed e.g.. CUDA_TK = /usr/local/cuda and -type: 'make' . + +If CUDA is installed via the nVidia installer, Conda, or HPC modules, +then it should be detected automatically by the build system. If it +somehow isn't, then you can try to set 'CUDA_TK' to the location where +it is installed, e.g. + +CUDA_TK=/usr/local/cuda make + OpenCL -To build the CUDA library; Set the 'CUDA_TK' path to the location -where the cuda or AMD OpenCL toolkit is installed eg. CUDA_TK = -/usr/local/cuda or CUDA_TK = /opt/AMDAPP/ and type: 'make -f -Makefile_ocl' . + +If OpenCL is installed in a standard location (e.g. via apt or yum), +then it should be detected automatically by the build system. If it +isn't, then you can set the 'OPENCL' variable to the location where +it is installed, e.g. + +OPENCL=/opt/opencl make + +If both CUDA and OpenCL are detected, then CUDA is used by default. +To select OpenCL, set BACKEND to OpenCL: + +BACKEND=OpenCL make + +You can combine these options as well: + +BACKEND=OpenCL OPENCL=/home/user/.local make + Interfaces: The library has built-in support for a couple of default interfaces to diff --git a/conda/conda_build_config.yaml b/conda/conda_build_config.yaml new file mode 100644 index 0000000..99df767 --- /dev/null +++ b/conda/conda_build_config.yaml @@ -0,0 +1,4 @@ +gpu_backend: + - cuda + - opencl + diff --git a/conda/meta.yaml b/conda/meta.yaml new file mode 100644 index 0000000..aa16b56 --- /dev/null +++ b/conda/meta.yaml @@ -0,0 +1,52 @@ +{% set name = "sapporo2" %} +{% set version = "0.0.1" %} + +package: + name: {{ name|lower }} + version: {{ version }} + +source: + path: ../ + # git_rev: work + # git_url: https://github.com/LourensVeen/sapporo2.git + +build: + number: 0 + string: {{ gpu_backend }}_{{ PKG_BUILDNUM }} + + script_env: + - BACKEND=CUDA # [gpu_backend == 'cuda'] + - BACKEND=OpenCL # [gpu_backend == 'opencl'] + script: make install + + missing_dso_whitelist: + - "*/libcuda.so*" + +requirements: + build: + - {{ compiler('cxx') }} + - git + - git-lfs + - make + - cuda-compiler # [linux and gpu_backend == 'cuda'] + - pocl # [linux and gpu_backend == 'opencl'] + - vim + # - conda-verify + + host: + - ocl-icd # [linux and gpu_backend == 'opencl'] + + run: + - cuda-runtime # [linux and gpu_backend == 'cuda'] + - libgcc-ng + - libstdcxx-ng + - _openmp_mutex + +test: + +about: + home: https://github.com/treecode/sapporo2 + summary: Library for emulating GRAPE6 n-body calculations + license: GPL-3.0-only + license_family: GPL + license_file: LICENSE diff --git a/include/sapporo6thlib.h b/include/sapporo6thlib.h new file mode 100644 index 0000000..87e245d --- /dev/null +++ b/include/sapporo6thlib.h @@ -0,0 +1,79 @@ +/* 6th order integrator as implemented by Sapporo2 +*/ + +void initialize_special(int ndev, int *list); + +void initialize(); + +/* + +add = address +pos, vel, acc, jrk, snp, crk, +mass, +time = current particle time +id = unique particle id +eps2 = softening of j-particle + +*/ +void set_j_particle(int add, double pos[3], double vel[3], double acc[3], + double jrk[3], double snp[3], double crk[3], double mass, + double time, int id, double eps2); + +/* + +Set time of the prediction + +time = time to which particles are predicted +nj = amount of particles that are predicted + +*/ +void predict_all(double time, int nj); + +/* + +Do not execute prediction, but only copy the particles +into the predicted buffers. + +*/ +void no_predict_all(double time, int nj); + +/* + +Return the predicted values for a particle at an address + +addr = address of the particle + +id = the particle id +mass = the mass of the particle +eps2 = the softening value of the particle +pos = buffer to store predicted position +vel = buffer to store predicted velocity +acc = buffer to store predicted acceleration + +*/ +void pick_up_predictor_2(int addr, int &id, double &mass, double &eps2, + double pos[3], double vel[3], double acc[3]); + +/* + +Calculate the gravity on the i-particles + +//Input +ni = number of particles to be integrated +nj = number of sources +pos, vel, acc, mass, eps2 + +//Output +acc, jrk, snp, potential (phi) +nnb = nearest neighbour ID +nnb_r2 = distance to the nearest neighbour. (Squared distance + softening) +nnb_r2 = double r2 = EPS2 + dx*dx + dy*dy + dz*dz; + +*/ + +void calc_force_on_predictors(int ni, int nj, int ids[], double pos[][3], + double vel[][3],double acc[][3], double mass[], + double eps2[], double accNew[][3], double jrkNew[][3], + double snpNew[][3], double crkNew[][3], double phi[], + int nnb[], double nnb_r2[]); + diff --git a/include/sapporoG6lib.h b/include/sapporoG6lib.h new file mode 100644 index 0000000..804fe99 --- /dev/null +++ b/include/sapporoG6lib.h @@ -0,0 +1,187 @@ +/* GRAPE6 API as implemented by Sapporo2 + * + * See https://www.cfca.nao.ac.jp/files/grape6user.pdf for the documentation. + * + * Note that some functions appear to be missing, e.g. g6_set_nip() and + * g6_set_i_particle_scales_from_real_value(). +*/ + +// Fortran ABI +int g6_open_(int *id); + +int g6_close_(int *id); + +int g6_npipes_(); + +int g6_set_tunit_(double*); + +int g6_set_xunit_(double*); + +int g6_set_ti_(int *id, double *ti); + +int g6_set_j_particle_(int *cluster_id, + int *address, + int *index, + double *tj, double *dtj, + double *mass, + double k18[3], double j6[3], + double a2[3], double v[3], double x[3]); + +void g6calc_firsthalf_(int *cluster_id, + int *nj, int *ni, + int index[], + double xi[][3], double vi[][3], + double aold[][3], double j6old[][3], + double phiold[3], + double *eps2, double h2[]); + +int g6calc_lasthalf_(int *cluster_id, + int *nj, int *ni, + int index[], + double xi[][3], double vi[][3], + double *eps2, double h2[], + double acc[][3], double jerk[][3], double pot[]); + +int g6calc_lasthalf2_(int *cluster_id, + int *nj, int *ni, + int index[], + double xi[][3], double vi[][3], + double *eps2, double h2[], + double acc[][3], double jerk[][3], double pot[], + int *inn); + +int g6_initialize_jp_buffer_(int* cluster_id, int* buf_size) { cluster_id = cluster_id; buf_size=buf_size; return 0;} +int g6_flush_jp_buffer_(int* cluster_id) { cluster_id = cluster_id; return 0;} +int g6_reset_(int* cluster_id) {cluster_id = cluster_id; return 0;} +int g6_reset_fofpga_(int* cluster_id) {cluster_id = cluster_id; return 0;} + +int g6_read_neighbour_list_(int* cluster_id); + +int g6_get_neighbour_list_(int *cluster_id, + int *ipipe, + int *maxlength, + int *n_neighbours, + int neighbour_list[]); + +// This is not part of the GRAPE6 API, but is useful for debugging. +void get_j_part_data(int addr, int nj, + double *pos, + double *vel, + double *acc, + double *jrk, + double *ppos, + double *pvel, + double &mass, + double &eps2, + int &id); + + +// C ABI +// These forward to the Fortran versions above, which are actually implemented by +// Sapporo2. + +extern "C" { + +inline int g6_open(int id) { + g6_open_(&id); +} + +inline int g6_close(int id) { + g6_close(&id); +} + +inline int g6_npipes() { + return g6_npipes_(); +} + +inline int g6_set_tunit(double tu) { + return g6_set_tunit_(&tu); +} + +inline int g6_set_xunit(double xu) { + return g6_set_xunit_(&xu); +} + +inline int g6_set_ti(int id, double ti) { + return g6_set_ti_(&id, &ti); +} + +inline int g6_set_j_particle(int cluster_id, + int address, + int index, + double tj, double dtj, + double mass, + double k18[3], double j6[3], + double a2[3], double v[3], double x[3]) +{ + return g6_set_j_particle( + &cluster_id, &address, &index, &tj, &dtj, &mass, k18, j6, a2, v, x); +} + +inline void g6calc_firsthalf(int cluster_id, + int nj, int ni, + int index[], + double xi[][3], double vi[][3], + double aold[][3], double j6old[][3], + double phiold[3], + double eps2, double h2[]) +{ + g6calc_firsthalf( + &cluster_id, &nj, &ni, index, xi, vi, aold, j6old, piold, &eps2, h2); +} + +inline int g6calc_lasthalf(int cluster_id, + int nj, int ni, + int index[], + double xi[][3], double vi[][3], + double eps2, double h2[], + double acc[][3], double jerk[][3], double pot[]) +{ + return g6calc_lasthalf_( + &cluster_id, &nj, &ni, index, xi, vi, &eps2, h2, acc, jerk, pot); +} + +inline int g6calc_lasthalf2(int cluster_id, + int nj, int ni, + int index[], + double xi[][3], double vi[][3], + double eps2, double h2[], + double acc[][3], double jerk[][3], double pot[], + int *inn) +{ + return g6calc_lasthalf2_( + &cluster_id, &nj, &ni, index, xy, vi, &eps2, h2, acc, jerk, pot, inn); +} + +inline int g6_initialize_jp_buffer(int cluster_id, int buf_size) { + return g6_initialize_jp_buffer_(&cluster_id, &buf_size); +} + +inline int g6_flush_jp_buffer(int cluster_id) { + return g6_flush_jp_buffer_(&cluster_id); +} + +inline int g6_reset(int cluster_id) { + return g6_reset_(&cluster_id); +} + +inline int g6_reset_fofpga(int cluster_id) { + return g6_reset_fofpga_(&cluster_id); +} + +inline int g6_read_neighbour_list(int cluster_id) { + return g6_read_neighbour_list_(&cluster_id); +} + +inline int g6_get_neighbour_list(int cluster_id, + int ipipe, + int maxlength, + int n_neighbours, + int neighbour_list[]) +{ + return g6_get_neighbour_list( + &cluster_id, &ipipe, &maxlength, &n_neighbours, neighbour_list); +} + +} + diff --git a/lib/Makefile b/lib/Makefile deleted file mode 100644 index f70fd40..0000000 --- a/lib/Makefile +++ /dev/null @@ -1,114 +0,0 @@ -CXX = g++ -CC = gcc -LD = g++ -F90 = ifort - -.SUFFIXES: .o .cpp .ptx .cu - -CUDA_TK ?= /usr/local/cuda - - -testRunFlags1= -testRunFlags2= -testRunFlags3= - -#Check for the defines - -ifdef NTHREADS - testRunFlags1="-D NTHREADS=$(NTHREADS)" - testRunFlags3="-D TIMING_STATS=1" -endif - -ifdef NBLOCKS_PER_MULTI - testRunFlags2="-D NBLOCKS_PER_MULTI=$(NBLOCKS_PER_MULTI)" - testRunFlags3="-D TIMING_STATS=1" -endif - -OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp - -#Use below if compiling with CPU_SUPPORT (SSE) -#CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4 -CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include - -testRunFlags= $(testRunFlags1) $(testRunFlags2) $(testRunFlags3) -$(info $(testRunFlags)) - -NVCC = $(CUDA_TK)/bin/nvcc - - -# Support older CUDA versions out of the box -NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g') -ifeq "${NVCCVERSION}" "V5.5.22" - NVCCFLAGS ?= -arch sm_20 -else - NVCCFLAGS ?= -arch sm_30 -endif - -#NVCCFLAGS = -arch sm_35 -#NVCCFLAGS ?= -arch sm_30 -#NVCCFLAGS = -arch sm_20 -NVCCFLAGS += ${testRunFlags} - -# Use with Mac OS X -# NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int" - -LDFLAGS = -lcuda -fopenmp - - -INCLUDEPATH = ./include -CXXFLAGS += -I$(INCLUDEPATH) -I./ -NVCCFLAGS += -I$(INCLUDEPATH) -I./ - -INTERFACEPATH =./interfaces - -CUDAKERNELSPATH = ./CUDAKernels -CUDAKERNELS = kernels.cu - -CUDAPTX = $(CUDAKERNELS:%.cu=$(CUDAKERNELSPATH)/%.ptx) - -SRCPATH = src -SRC = sapporohostclass.cpp sapporoG6lib.cpp sapporoYeblib.cpp sapporoG5lib.cpp sapporo6thlib.cpp -OBJ = $(SRC:%.cpp=%.o) - -LIBOBJ = sapporohostclass.o $(INTERFACEPATH)/sapporoG6lib.o $(INTERFACEPATH)/sapporoYeblib.o -LIBOBJ += $(INTERFACEPATH)/sapporoG5lib.o -TARGET = libsapporo.a - - -all: $(OBJ) $(CUDAPTX) $(TARGET) -kernels: $(CUDAPTX) - - -$(TARGET): $(LIBOBJ) - ar qv $@ $^ - -%.o: $(SRCPATH)/%.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - -$(CUDAKERNELSPATH)/%.ptx: $(CUDAKERNELSPATH)/%.cu - $(NVCC) $(NVCCFLAGS) -ptx $< -o $@ - -$(INCLUDEPATH)/%.ptxh: $(CUDAKERNELSPATH)/%.ptx - xxd -i $< $@ - -clean: - /bin/rm -rf *.o *.ptx *.a - cd $(INTERFACEPATH); /bin/rm -rf *.o; cd .. - cd $(CUDAKERNELSPATH); /bin/rm -rf *.ptx; cd .. - rm -f *.ptxh $(INCLUDEPATH)/*.ptxh - -$(OBJ): $(INCLUDEPATH)/*.h - - -sapporohostclass.o : $(INCLUDEPATH)/kernels.ptxh $(INCLUDEPATH)/sapporohostclass.h $(INCLUDEPATH)/sapdevclass.h $(INCLUDEPATH)/defines.h -$(CUDAKERNELSPATH)/kernels.ptx : $(INCLUDEPATH)/defines.h - -libsapporo.a : sapporohostclass.o - - - - - - - - diff --git a/lib/Makefile_ocl b/lib/Makefile_ocl deleted file mode 100644 index 3e26eab..0000000 --- a/lib/Makefile_ocl +++ /dev/null @@ -1,105 +0,0 @@ -CXX = g++ -CC = gcc -LD = g++ -F90 = ifort - -.SUFFIXES: .o .cpp .ptx .cu - -CUDA_TK = /usr/local/cuda -#CUDA_TK = /opt/AMDAPP/ - - -testRunFlags1= -testRunFlags2= -testRunFlags3= - -#Check for the defines - -ifdef NTHREADS - testRunFlags1="-D NTHREADS=$(NTHREADS)" - testRunFlags3="-D TIMING_STATS=1" -endif - -ifdef NBLOCKS_PER_MULTI - testRunFlags2="-D NBLOCKS_PER_MULTI=$(NBLOCKS_PER_MULTI)" - testRunFlags3="-D TIMING_STATS=1" -endif - -OFLAGS = -g -Wall -Wextra -Wstrict-aliasing=2 -fopenmp -D_OCL_ -D__INCLUDE_KERNELS__ - -CXXFLAGS = ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4 - -testRunFlags= $(testRunFlags1) $(testRunFlags2) $(testRunFlags3) -$(info $(testRunFlags)) - -NVCC = $(CUDA_TK)/bin/nvcc -NVCCFLAGS = -arch sm_20 ${testRunFlags} - -# Use with Mac OS X -# NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int" - -LDFLAGS = -lOpenCL -fopenmp - - -INCLUDEPATH = ./include -CXXFLAGS += -I$(INCLUDEPATH) -I./ -NVCCFLAGS += -I$(INCLUDEPATH) -I./ - -INTERFACEPATH =./interfaces - -SRCPATH = src -SRC = sapporohostclass.cpp sapporoG6lib.cpp sapporoYeblib.cpp sapporoG5lib.cpp sapporo6thlib.cpp -OBJ = $(SRC:%.cpp=%_ocl.o) - -LIBOBJ = sapporohostclass_ocl.o $(INTERFACEPATH)/sapporoG6lib_ocl.o $(INTERFACEPATH)/sapporoYeblib_ocl.o $(INTERFACEPATH)/sapporoG5lib_ocl.o -TARGET = libsapporo_ocl.a - -OCLKERNELSPATH = ./OpenCLKernels -OCLKERNELS=kernels4th.cl kernels4thDP.cl kernels6th.cl kernelsG5DS.cl kernelsG5SP.cl sharedKernels.cl -OPENCL_CLH = $(OCLKERNELS:%.cl=$(INCLUDEPATH)/%.clh) - -all: $(OBJ) $(TARGET) - echo $(OPENCL_CLH) - -$(TARGET): $(LIBOBJ) - ar qv $@ $^ - -%_ocl.o: $(SRCPATH)/%.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - -$(INTERFACEPATH)/%_ocl.o: $(INTERFACEPATH)/%.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - - -$(CUDAKERNELSPATH)/%.ptx: $(CUDAKERNELSPATH)/%.cu - $(NVCC) $(NVCCFLAGS) -ptx $< -o $@ - - -$(OCLKERNELSPATH)/%.cle: $(OCLKERNELSPATH)/%.cl - rm -f OpenCL - ln -s $(OCLKERNELSPATH) OpenCL - $(CC) -E -I. -c - -o $@ < $< - rm OpenCL - -$(INCLUDEPATH)/%.clh: $(OCLKERNELSPATH)/%.cle - xxd -i $< $@ - -clean: - /bin/rm -rf *.o *.ptx *.a - cd $(INTERFACEPATH); /bin/rm -rf *.o; cd .. - cd $(CUDAKERNELSPATH); /bin/rm -rf *.ptx; cd .. - rm -f *.clh $(INCLUDEPATH)/*.clh - -$(OBJ): $(INCLUDEPATH)/*.h - - -sapporohostclass_ocl.o : $(OPENCL_CLH) $(INCLUDEPATH)/sapporohostclass.h $(INCLUDEPATH)/sapdevclass.h $(INCLUDEPATH)/defines.h - -libsapporo_ocl.a : sapporohostclass_ocl.o - - - - - - - diff --git a/lib/CUDAKernels/kernels.cu b/src/CUDA/kernels.cu similarity index 99% rename from lib/CUDAKernels/kernels.cu rename to src/CUDA/kernels.cu index ebb271c..b0ca6ad 100644 --- a/lib/CUDAKernels/kernels.cu +++ b/src/CUDA/kernels.cu @@ -15,7 +15,7 @@ Sixt order hermite, in double precision, including neighbour lists #include -#include "include/defines.h" +#include "defines.h" #define inout #define __out @@ -206,7 +206,7 @@ __device__ __forceinline__ double RSQRT(double val) { return rsqrt(val); } // template<> __device__ __forceinline__ double RSQRT(double val) { return 1.0/sqrt(val); } - +#if __CUDA_ARCH__ < 600 __device__ double atomicAdd(double* address, double val) { unsigned long long int* address_as_ull = @@ -220,6 +220,7 @@ __device__ double atomicAdd(double* address, double val) } while (assumed != old); return __longlong_as_double(old); } +#endif __device__ __forceinline__ double atomicMin(double *address, double val) diff --git a/lib/OpenCLKernels/kernels4th.cl b/src/OpenCL/kernels4th.cl similarity index 100% rename from lib/OpenCLKernels/kernels4th.cl rename to src/OpenCL/kernels4th.cl diff --git a/lib/OpenCLKernels/kernels4thDP.cl b/src/OpenCL/kernels4thDP.cl similarity index 100% rename from lib/OpenCLKernels/kernels4thDP.cl rename to src/OpenCL/kernels4thDP.cl diff --git a/lib/OpenCLKernels/kernels6th.cl b/src/OpenCL/kernels6th.cl similarity index 100% rename from lib/OpenCLKernels/kernels6th.cl rename to src/OpenCL/kernels6th.cl diff --git a/lib/OpenCLKernels/kernelsG5DS.cl b/src/OpenCL/kernelsG5DS.cl similarity index 100% rename from lib/OpenCLKernels/kernelsG5DS.cl rename to src/OpenCL/kernelsG5DS.cl diff --git a/lib/OpenCLKernels/kernelsG5SP.cl b/src/OpenCL/kernelsG5SP.cl similarity index 100% rename from lib/OpenCLKernels/kernelsG5SP.cl rename to src/OpenCL/kernelsG5SP.cl diff --git a/lib/OpenCLKernels/sharedKernels.cl b/src/OpenCL/sharedKernels.cl similarity index 100% rename from lib/OpenCLKernels/sharedKernels.cl rename to src/OpenCL/sharedKernels.cl diff --git a/lib/include/SSE_AVX/AVX/avx.h b/src/SSE_AVX/avx.h similarity index 94% rename from lib/include/SSE_AVX/AVX/avx.h rename to src/SSE_AVX/avx.h index 3e6570f..50352b6 100644 --- a/lib/include/SSE_AVX/AVX/avx.h +++ b/src/SSE_AVX/avx.h @@ -31,8 +31,8 @@ namespace SIMD namespace SIMD { -#include "avx_fp32.h" -#include "avx_fp64.h" +#include "SSE_AVX/avx_fp32.h" +#include "SSE_AVX/avx_fp64.h" template T broadcast(const T x) { return T::template broadcast(x); } diff --git a/lib/include/SSE_AVX/AVX/avx_fp32.h b/src/SSE_AVX/avx_fp32.h similarity index 100% rename from lib/include/SSE_AVX/AVX/avx_fp32.h rename to src/SSE_AVX/avx_fp32.h diff --git a/lib/include/SSE_AVX/AVX/avx_fp64.h b/src/SSE_AVX/avx_fp64.h similarity index 100% rename from lib/include/SSE_AVX/AVX/avx_fp64.h rename to src/SSE_AVX/avx_fp64.h diff --git a/lib/include/SSE_AVX/SSE/sse.h b/src/SSE_AVX/sse.h similarity index 94% rename from lib/include/SSE_AVX/SSE/sse.h rename to src/SSE_AVX/sse.h index 2b276fc..0b4bcf4 100644 --- a/lib/include/SSE_AVX/SSE/sse.h +++ b/src/SSE_AVX/sse.h @@ -31,8 +31,8 @@ namespace SIMD namespace SIMD { -#include "sse_fp32.h" -#include "sse_fp64.h" +#include "SSE_AVX/sse_fp32.h" +#include "SSE_AVX/sse_fp64.h" template T broadcast(const T x) { return T::template broadcast(x); } diff --git a/lib/include/SSE_AVX/SSE/sse_fp32.h b/src/SSE_AVX/sse_fp32.h similarity index 100% rename from lib/include/SSE_AVX/SSE/sse_fp32.h rename to src/SSE_AVX/sse_fp32.h diff --git a/lib/include/SSE_AVX/SSE/sse_fp64.h b/src/SSE_AVX/sse_fp64.h similarity index 100% rename from lib/include/SSE_AVX/SSE/sse_fp64.h rename to src/SSE_AVX/sse_fp64.h diff --git a/lib/include/cudadev.h b/src/cudadev.h similarity index 99% rename from lib/include/cudadev.h rename to src/cudadev.h index b2af740..042c13c 100644 --- a/lib/include/cudadev.h +++ b/src/cudadev.h @@ -710,12 +710,14 @@ namespace dev { // jitOptionCount++; // } - + +#if CUDA_VERSION < 6000 if(computeMode < CU_TARGET_COMPUTE_20) { fprintf(stderr,"Sapporo2 requires at least a Fermi or newer NVIDIA architecture.\n"); exit(-1); } +#endif //Set the architecture // { diff --git a/lib/include/defines.h b/src/defines.h similarity index 89% rename from lib/include/defines.h rename to src/defines.h index 16e01f7..726c836 100644 --- a/lib/include/defines.h +++ b/src/defines.h @@ -55,8 +55,15 @@ inline const char* get_kernelName(const int integrator, case SIXTH: if(precision == DOUBLESINGLE) { +#ifdef _OCL_ + fprintf(stderr, "ERROR: Sixth order integrator with double single precision"); + fprintf(stderr, "ERROR: is not implemented in OpenCL, only in CUDA. Please"); + fprintf(stderr, "ERROR: file an issue on GitHub if you need this combination."); + exit(1); +#else perThreadSM = sizeof(float4)*2 + sizeof(float4) + sizeof(float3); - return "dev_evaluate_gravity_sixth_DS"; +#endif + return "dev_evaluate_gravity_sixth_DS"; } else if(precision == DOUBLE){ #ifdef _OCL_ @@ -72,7 +79,7 @@ inline const char* get_kernelName(const int integrator, //Here we come if all switch/case/if combo's failed fprintf(stderr,"ERROR: Unknown combination of integrator type ( %d ) and precision ( %d ) \n", integrator, precision); - fprintf(stderr,"ERROR: See 'include/defines.h' for the possible combinations \n"); + fprintf(stderr,"ERROR: See 'defines.h' for the possible combinations \n"); exit(0); return ""; } diff --git a/lib/include/hostFunc.h b/src/hostFunc.h similarity index 100% rename from lib/include/hostFunc.h rename to src/hostFunc.h diff --git a/lib/interfaces/sapporo6thlib.cpp b/src/interfaces/sapporo6thlib.cpp similarity index 100% rename from lib/interfaces/sapporo6thlib.cpp rename to src/interfaces/sapporo6thlib.cpp diff --git a/lib/interfaces/sapporoG5lib.cpp b/src/interfaces/sapporoG5lib.cpp similarity index 100% rename from lib/interfaces/sapporoG5lib.cpp rename to src/interfaces/sapporoG5lib.cpp diff --git a/lib/interfaces/sapporoG6lib.cpp b/src/interfaces/sapporoG6lib.cpp similarity index 100% rename from lib/interfaces/sapporoG6lib.cpp rename to src/interfaces/sapporoG6lib.cpp diff --git a/lib/interfaces/sapporoYeblib.cpp b/src/interfaces/sapporoYeblib.cpp similarity index 100% rename from lib/interfaces/sapporoYeblib.cpp rename to src/interfaces/sapporoYeblib.cpp diff --git a/lib/include/ocldev.h b/src/ocldev.h similarity index 97% rename from lib/include/ocldev.h rename to src/ocldev.h index 453e67b..68ea286 100644 --- a/lib/include/ocldev.h +++ b/src/ocldev.h @@ -22,18 +22,18 @@ #endif #ifdef __INCLUDE_KERNELS__ -extern unsigned char OpenCLKernels_kernels4th_cle[]; -extern unsigned int OpenCLKernels_kernels4th_cle_len; -extern unsigned char OpenCLKernels_kernels4thDP_cle[]; -extern unsigned int OpenCLKernels_kernels4thDP_cle_len; -extern unsigned char OpenCLKernels_kernels6th_cle[]; -extern unsigned int OpenCLKernels_kernels6th_cle_len; -extern unsigned char OpenCLKernels_kernelsG5DS_cle[]; -extern unsigned int OpenCLKernels_kernelsG5DS_cle_len; -extern unsigned char OpenCLKernels_kernelsG5SP_cle[]; -extern unsigned int OpenCLKernels_kernelsG5SP_cle_len; -extern unsigned char OpenCLKernels_sharedKernels_cle[]; -extern unsigned int OpenCLKernels_sharedKernels_cle_len; +extern unsigned char OpenCL_kernels4th_cle[]; +extern unsigned int OpenCL_kernels4th_cle_len; +extern unsigned char OpenCL_kernels4thDP_cle[]; +extern unsigned int OpenCL_kernels4thDP_cle_len; +extern unsigned char OpenCL_kernels6th_cle[]; +extern unsigned int OpenCL_kernels6th_cle_len; +extern unsigned char OpenCL_kernelsG5DS_cle[]; +extern unsigned int OpenCL_kernelsG5DS_cle_len; +extern unsigned char OpenCL_kernelsG5SP_cle[]; +extern unsigned int OpenCL_kernelsG5SP_cle_len; +extern unsigned char OpenCL_sharedKernels_cle[]; +extern unsigned int OpenCL_sharedKernels_cle_len; #endif namespace dev { @@ -70,8 +70,8 @@ namespace dev { int data_len = 0; if(temp.rfind("kernels4th.cl") != string::npos) { - data = (char *) OpenCLKernels_kernels4th_cle; - data_len = OpenCLKernels_kernels4th_cle_len; + data = (char *) OpenCL_kernels4th_cle; + data_len = OpenCL_kernels4th_cle_len; fprintf(stderr, "Found compiled in version of file: %s\n", cFilename); } else { fprintf(stderr, "Could not find kernel file: %s\n", cFilename); @@ -574,8 +574,8 @@ namespace dev { void copy(const memory &src, const cl_bool OCL_BLOCKING = CL_TRUE) { assert(ContextFlag); if (n != src.n) { - ocl_free(); - cmalloc(src.n, DeviceMemFlags); + ocl_free(); + allocate(src.n, DeviceMemFlags); } oclSafeCall(clEnqueueCopyBuffer(CommandQueue, src.DeviceMem, diff --git a/lib/include/ocldev.h-default b/src/ocldev.h-default similarity index 100% rename from lib/include/ocldev.h-default rename to src/ocldev.h-default diff --git a/lib/include/sapdevclass.h b/src/sapdevclass.h similarity index 99% rename from lib/include/sapdevclass.h rename to src/sapdevclass.h index 01b996d..c7c175b 100644 --- a/lib/include/sapdevclass.h +++ b/src/sapdevclass.h @@ -12,7 +12,7 @@ to allocate, load, start functions, etc. */ #ifdef _OCL_ - #include "include/ocldev.h" + #include "ocldev.h" typedef cl_float2 float2; typedef cl_float4 float4; @@ -28,7 +28,7 @@ to allocate, load, start functions, etc. typedef cl_int4 int4; #else - #include "include/cudadev.h" + #include "cudadev.h" #endif #include @@ -37,7 +37,7 @@ to allocate, load, start functions, etc. #include -#include "include/defines.h" +#include "defines.h" namespace sapporo2 { diff --git a/lib/src/sapporohostclass.cpp b/src/sapporohostclass.cpp similarity index 99% rename from lib/src/sapporohostclass.cpp rename to src/sapporohostclass.cpp index 6cac1d4..b610a13 100644 --- a/lib/src/sapporohostclass.cpp +++ b/src/sapporohostclass.cpp @@ -18,14 +18,14 @@ vel_j.w = eps2 #ifdef __INCLUDE_KERNELS__ #ifdef _OCL_ -#include "kernels4th.clh" -#include "kernels4thDP.clh" -#include "kernels6th.clh" -#include "kernelsG5DS.clh" -#include "kernelsG5SP.clh" -#include "sharedKernels.clh" +#include "OpenCL/kernels4th.clh" +#include "OpenCL/kernels4thDP.clh" +#include "OpenCL/kernels6th.clh" +#include "OpenCL/kernelsG5DS.clh" +#include "OpenCL/kernelsG5SP.clh" +#include "OpenCL/sharedKernels.clh" #else -#include "kernels.ptxh" +#include "CUDA/kernels.ptxh" #endif #endif diff --git a/lib/include/sapporohostclass.h b/src/sapporohostclass.h similarity index 100% rename from lib/include/sapporohostclass.h rename to src/sapporohostclass.h diff --git a/lib/include/vec.h b/src/vec.h similarity index 100% rename from lib/include/vec.h rename to src/vec.h diff --git a/tests/.Makefile.swp b/tests/.Makefile.swp new file mode 100644 index 0000000..6f7c95f Binary files /dev/null and b/tests/.Makefile.swp differ diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000..583b07d --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1,2 @@ +/test_* +CUDA diff --git a/testCodes/Makefile b/tests/Makefile similarity index 53% rename from testCodes/Makefile rename to tests/Makefile index 3d9324f..f453db9 100644 --- a/testCodes/Makefile +++ b/tests/Makefile @@ -1,24 +1,25 @@ -CXX = g++ -CC = gcc -LD = g++ -F90 = ifort +CXX ?= g++ +CC ?= gcc +LD ?= g++ +F90 ?= ifort .SUFFIXES: .o .cpp .ptx .cu -SAPPOROPATH=../lib/ +SAPPOROPATH=.. SAPLIB2 = sapporo SAPLIB = lib$(SAPLIB2).a +SAPLIBG6 = sapporoG6 -CUDA_TK = /usr/local/cuda +CUDA_TK ?= /usr/local/cuda -OFLAGS = -g -O3 -Wall -fopenmp -Wextra -Wstrict-aliasing=2 -fopenmp -CXXFLAGS = -fPIC $(OFLAGS) -I$(CUDA_TK)/include +OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp +CXXFLAGS += -fPIC -fopenmp $(OFLAGS) -I$(CUDA_TK)/include -LDFLAGS = -lcuda -fopenmp -L$(CUDA_TK)/lib64 +LDFLAGS += -lcuda -L$(CUDA_TK)/lib64 INCLUDEPATH = $(SAPPOROPATH)/include -CXXFLAGS += -I$(INCLUDEPATH) -I./ -I $(SAPPOROPATH) +CXXFLAGS += -I$(INCLUDEPATH) -I./ -I $(SAPPOROPATH)/src SRCPATH = src @@ -31,34 +32,34 @@ all: $(OBJ) $(PROG) kernels kernels: - ln -s $(SAPPOROPATH)/CUDAKernels/ CUDA/ + rm -f CUDA && ln -s $(SAPPOROPATH)/CUDA CUDA #$(PROG): $(OBJ) -# $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -lsapporo +# $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -lsapporo test_gravity_block_cuda : test_gravity_block.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_gravity_block_g5_cuda: test_gravity_block_g5.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_gravity_block_6th_cuda : test_gravity_block_6th.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_performance_rangeN_cuda : test_performance_rangeN.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_performance_blockStep_cuda : test_performance_blockStep.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_performance_rangeN_6th_cuda : test_performance_rangeN_6th.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_performance_rangeN_g5_cuda : test_performance_rangeN_g5.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_integrator_cuda : test_integrator.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIBG6) $(LDFLAGS) %.o: $(SRCPATH)/%.cpp @@ -66,15 +67,4 @@ test_integrator_cuda : test_integrator.o clean: - /bin/rm -rf *.o *.ptx *.a $(PROG) - - -$(OBJ): $(SAPPOROPATH)/$(SAPLIB) - - - - - - - - + /bin/rm -rf *.o *.ptx *.a $(PROG) CUDA diff --git a/testCodes/Makefile_ocl b/tests/Makefile_ocl similarity index 53% rename from testCodes/Makefile_ocl rename to tests/Makefile_ocl index 9a50894..11ea0c1 100644 --- a/testCodes/Makefile_ocl +++ b/tests/Makefile_ocl @@ -1,24 +1,22 @@ -CXX = g++ -CC = gcc -LD = g++ -F90 = ifort +CXX ?= g++ .SUFFIXES: .o .cpp .ptx .cu -SAPPOROPATH=../lib/ -SAPLIB2 = sapporo_ocl +SAPPOROPATH=.. +SAPLIB2 = sapporo SAPLIB = lib$(SAPLIB2).a +SAPLIBG6 = sapporoG6 -CUDA_TK = /usr/local/cuda +CUDA_TK ?= /usr/local/cuda #CUDA_TK = /opt/AMDAPP/ -OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp -CXXFLAGS = -fPIC $(OFLAGS) -D_OCL_ +OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp +CXXFLAGS += -fPIC -fopenmp $(OFLAGS) -D_OCL_ -LDFLAGS = -lOpenCL -fopenmp +LDFLAGS += -lOpenCL -fopenmp -INCLUDEPATH = $(SAPPOROPATH)/include -CXXFLAGS += -I$(INCLUDEPATH) -I./ -I $(SAPPOROPATH) -I$(CUDA_TK)/include +INCLUDEPATH = $(SAPPOROPATH)/include +CXXFLAGS += -I$(INCLUDEPATH) -I./ -I $(SAPPOROPATH)/src -I$(CUDA_TK)/include SRCPATH = src @@ -30,46 +28,35 @@ PROG = test_gravity_block_ocl test_gravity_block_6th_ocl test_performance_rangeN all: $(OBJ) $(PROG) kernels kernels: - ln -s $(SAPPOROPATH)/OpenCLKernels OpenCL + rm -f OpenCL && ln -s $(SAPPOROPATH)/OpenCL OpenCL test_gravity_block_ocl : test_gravity_block_ocl.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_gravity_block_g5_ocl: test_gravity_block_g5_ocl.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_gravity_block_6th_ocl : test_gravity_block_6th_ocl.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_performance_rangeN_ocl : test_performance_rangeN_ocl.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_performance_blockStep_ocl : test_performance_blockStep_ocl.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_performance_rangeN_6th_ocl : test_performance_rangeN_6th_ocl.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_performance_rangeN_g5_ocl : test_performance_rangeN_g5_ocl.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) test_integrator_ocl : test_integrator_ocl.o - $(LD) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) $(LDFLAGS) + $(CXX) $(LDFLAGS) $^ -o $@ -L $(SAPPOROPATH) -l$(SAPLIB2) -l$(SAPLIBG6) $(LDFLAGS) %_ocl.o: $(SRCPATH)/%.cpp $(CXX) $(CXXFLAGS) -c $< -o $@ clean: - /bin/rm -rf *.o *.ptx *.a $(PROG) - - -$(OBJ): $(SAPPOROPATH)/$(SAPLIB) - - - - - - - - + /bin/rm -rf *.o *.ptx *.a $(PROG) OpenCL diff --git a/testCodes/README b/tests/README similarity index 87% rename from testCodes/README rename to tests/README index 4e667a4..3d1fff0 100644 --- a/testCodes/README +++ b/tests/README @@ -1,7 +1,7 @@ Sapporo2, test and example programs This folder contains a set of test and example programs that can -be used with Sapporo2. +be used with Sapporo2. =============================================================================== @@ -13,12 +13,18 @@ Makefile and Makefile_ocl (the CUDA_TK variable) To build the CUDA versions: 'make' -Tol build the OpenCL versions: -'make -f Makefile_ocl' +To build the OpenCL versions: +'make -f Makefile_ocl' -In both cases make sure you build the matching sapporo library in the 'lib' folder. +In both cases make sure you build the matching sapporo library in the 'lib' folder, and +use -Make sure that the GPU kernel sources and ptx files are in a subfolder 'CUDA' can be +'export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../lib' + +to ensure the linker can find the sapporo2 libraries, or better, use the absolute path +to the 'lib' directory. + +Make sure that the GPU kernel sources and ptx files are in a subfolder 'CUDA' can be symlinked from the lib folder. This is done by default in the Makefiles. =============================================================================== @@ -38,12 +44,12 @@ USAGE / Examples How to start a 4th order test using different precision: CUDA, 4th order default -./test_gravity_block_cuda +./test_gravity_block_cuda ./test_gravity_block_cuda 16364 -> number of particles is 16384 using CUDA ./test_gravity_block_ocl 16364 -> number of particles is 16384 using OpenCL ./test_gravity_block_cuda 16364 CUDA/kernels4thDP.ptx 1 1 -> - number of particles is 16384 using CUDA, using a full double precision kernel, with + number of particles is 16384 using CUDA, using a full double precision kernel, with integration order 1 (=FOURTH order) with precision 1 (=DOUBLE precision) ./test_gravity_block_ocl 16364 OpenCL/kernels4thDP.cl 1 1 -> same as above but now using OpenCL. diff --git a/testCodes/runScripts/compile_timingTest.sh b/tests/runScripts/compile_timingTest.sh similarity index 100% rename from testCodes/runScripts/compile_timingTest.sh rename to tests/runScripts/compile_timingTest.sh diff --git a/testCodes/runScripts/profiler.conf b/tests/runScripts/profiler.conf similarity index 100% rename from testCodes/runScripts/profiler.conf rename to tests/runScripts/profiler.conf diff --git a/testCodes/runScripts/run_activeParticleSizeTest.sh b/tests/runScripts/run_activeParticleSizeTest.sh similarity index 100% rename from testCodes/runScripts/run_activeParticleSizeTest.sh rename to tests/runScripts/run_activeParticleSizeTest.sh diff --git a/testCodes/runScripts/run_rangeN.sh b/tests/runScripts/run_rangeN.sh similarity index 100% rename from testCodes/runScripts/run_rangeN.sh rename to tests/runScripts/run_rangeN.sh diff --git a/testCodes/sapporo2.config b/tests/sapporo2.config similarity index 100% rename from testCodes/sapporo2.config rename to tests/sapporo2.config diff --git a/testCodes/sapporo2.config.README b/tests/sapporo2.config.README similarity index 100% rename from testCodes/sapporo2.config.README rename to tests/sapporo2.config.README diff --git a/testCodes/src/g6lib.h b/tests/src/g6lib.h similarity index 100% rename from testCodes/src/g6lib.h rename to tests/src/g6lib.h diff --git a/testCodes/src/test_gravity_block.cpp b/tests/src/test_gravity_block.cpp similarity index 100% rename from testCodes/src/test_gravity_block.cpp rename to tests/src/test_gravity_block.cpp diff --git a/testCodes/src/test_gravity_block_6th.cpp b/tests/src/test_gravity_block_6th.cpp similarity index 100% rename from testCodes/src/test_gravity_block_6th.cpp rename to tests/src/test_gravity_block_6th.cpp diff --git a/testCodes/src/test_gravity_block_g5.cpp b/tests/src/test_gravity_block_g5.cpp similarity index 100% rename from testCodes/src/test_gravity_block_g5.cpp rename to tests/src/test_gravity_block_g5.cpp diff --git a/testCodes/src/test_integrator.cpp b/tests/src/test_integrator.cpp similarity index 100% rename from testCodes/src/test_integrator.cpp rename to tests/src/test_integrator.cpp diff --git a/testCodes/src/test_performance_blockStep.cpp b/tests/src/test_performance_blockStep.cpp similarity index 100% rename from testCodes/src/test_performance_blockStep.cpp rename to tests/src/test_performance_blockStep.cpp diff --git a/testCodes/src/test_performance_rangeN.cpp b/tests/src/test_performance_rangeN.cpp similarity index 100% rename from testCodes/src/test_performance_rangeN.cpp rename to tests/src/test_performance_rangeN.cpp diff --git a/testCodes/src/test_performance_rangeN_6th.cpp b/tests/src/test_performance_rangeN_6th.cpp similarity index 100% rename from testCodes/src/test_performance_rangeN_6th.cpp rename to tests/src/test_performance_rangeN_6th.cpp diff --git a/testCodes/src/test_performance_rangeN_g5.cpp b/tests/src/test_performance_rangeN_g5.cpp similarity index 100% rename from testCodes/src/test_performance_rangeN_g5.cpp rename to tests/src/test_performance_rangeN_g5.cpp