File tree Expand file tree Collapse file tree 1 file changed +6
-1
lines changed
installation/docker-amd64-cuda/entrypoints Expand file tree Collapse file tree 1 file changed +6
-1
lines changed Original file line number Diff line number Diff line change 66# In the end all variables exported should be present and the command given by the user should run with PID 1.
77
88# In distributed jobs the number of times the entrypoint is run should match the number of containers created.
9- # On Slurm, if the entrypoint is called multiple times in the same container we can skip it with the following variables:
9+ # On Slurm, for example, with Pyxis a single container is created per node,
10+ # and if the entrypoint is called manually after srun, it will run multiple times in the same container (ntasks-per-node)
11+ # so we can skip it with the following variables:
12+
13+ # If nodes share the same container:
1014if [ -n " ${SLURM_ONE_ENTRYPOINT_SCRIPT_PER_JOB} " ] && [ " ${SLURM_PROCID} " -gt 0 ]; then
1115 echo " [TEMPLATE INFO] Running the entrypoing only once for the job."
1216 echo " [TEMPLATE INFO] Skipping entrypoints on SLURM_PROCID ${SLURM_PROCID} ."
1317 echo " [TEMPLATE INFO] Executing the command" " $@ "
1418 exec " $@ "
1519fi
20+ # If tasks on the same node share the same container:
1621if [ -n " ${SLURM_ONE_ENTRYPOINT_SCRIPT_PER_NODE} " ] && [ " ${SLURM_LOCALID} " -gt 0 ]; then
1722 echo " [TEMPLATE INFO] Running the entrypoint once per node."
1823 echo " [TEMPLATE INFO] Skipping entrypoints on SLURM_PROCID ${SLURM_PROCID} ."
You can’t perform that action at this time.
0 commit comments