diff --git a/src/cuda/memory.c b/src/cuda/memory.c index 00857f30..f98d2a92 100755 --- a/src/cuda/memory.c +++ b/src/cuda/memory.c @@ -3,6 +3,7 @@ #include "allocator/allocator.h" #include "include/libcuda_hook.h" +#include "include/libvgpu.h" #include "include/memory_limit.h" extern int pidfound; @@ -555,6 +556,7 @@ CUresult cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) { CUresult cuLaunchKernel ( CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra ){ ENSURE_RUNNING(); + ensure_post_init(); pre_launch_kernel(); if (pidfound==1){ rate_limiter(gridDimX * gridDimY * gridDimZ, @@ -566,6 +568,7 @@ CUresult cuLaunchKernel ( CUfunction f, unsigned int gridDimX, unsigned int gr CUresult cuLaunchKernelEx(const CUlaunchConfig *config, CUfunction f, void **kernelParams, void **extra) { ENSURE_RUNNING(); + ensure_post_init(); pre_launch_kernel(); if (pidfound==1){ rate_limiter(config->gridDimX * config->gridDimY * config->gridDimZ, @@ -577,6 +580,7 @@ CUresult cuLaunchKernelEx(const CUlaunchConfig *config, CUfunction f, void **ker CUresult cuLaunchCooperativeKernel ( CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams ){ ENSURE_RUNNING(); + ensure_post_init(); pre_launch_kernel(); CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuLaunchCooperativeKernel,f,gridDimX,gridDimY,gridDimZ,blockDimX,blockDimY,blockDimZ,sharedMemBytes,hStream,kernelParams); return res; diff --git a/src/include/libvgpu.h b/src/include/libvgpu.h index e596497e..1092a7b0 100755 --- a/src/include/libvgpu.h +++ b/src/include/libvgpu.h @@ -67,5 +67,6 @@ typedef void* (*fp_dlsym)(void*, const char*); nvmlReturn_t set_task_pid(); int map_cuda_visible_devices(); +void ensure_post_init(); -#endif // __LIBVGPU_GLIBC_H__ \ No newline at end of file +#endif // SRC_INCLUDE_LIBVGPU_H_ diff --git a/src/libvgpu.c b/src/libvgpu.c index befc16c3..a5011bbd 100644 --- a/src/libvgpu.c +++ b/src/libvgpu.c @@ -19,6 +19,7 @@ extern void initial_virtual_map(void); extern int set_host_pid(int hostpid); extern void allocator_init(void); void preInit(); +void childReinitPostInit(); char *(*real_realpath)(const char *path, char *resolved_path); void *vgpulib; @@ -884,6 +885,7 @@ void preInit(){ load_cuda_libraries(); //nvmlInit(); ENSURE_INITIALIZED(); + pthread_atfork(NULL, NULL, childReinitPostInit); } void postInit(){ @@ -919,6 +921,16 @@ void postInit(){ init_utilization_watcher(); } +void childReinitPostInit() { + LOG_DEBUG("Reset postInit state after fork"); + post_cuinit_flag = PTHREAD_ONCE_INIT; + pidfound = 0; +} + +void ensure_post_init() { + pthread_once(&post_cuinit_flag, (void(*) (void))postInit); +} + CUresult cuInit(unsigned int Flags){ LOG_INFO("Into cuInit"); pthread_once(&pre_cuinit_flag,(void(*)(void))preInit); @@ -928,6 +940,6 @@ CUresult cuInit(unsigned int Flags){ LOG_ERROR("cuInit failed:%d",res); return res; } - pthread_once(&post_cuinit_flag, (void(*) (void))postInit); + ensure_post_init(); return CUDA_SUCCESS; } diff --git a/src/multiprocess/multiprocess_memory_limit.c b/src/multiprocess/multiprocess_memory_limit.c index fce713ba..6e458c01 100755 --- a/src/multiprocess/multiprocess_memory_limit.c +++ b/src/multiprocess/multiprocess_memory_limit.c @@ -411,7 +411,6 @@ int init_gpu_device_utilization(){ 0, memory_order_relaxed); atomic_store_explicit(®ion_info.shared_region->procs[i].monitorused[dev], 0, memory_order_relaxed); - break; } } return 1;