Skip to content

Commit 2d8f5d4

Browse files
authored
feat: export resources information to the environment variables (#478)
1 parent a64a81a commit 2d8f5d4

File tree

8 files changed

+68
-7
lines changed

8 files changed

+68
-7
lines changed

doc/conf.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
project = "DPDispatcher"
2323
copyright = "2020-%d, Deep Modeling" % date.today().year
24-
author = "Deep Modeling"
24+
author = "DeepModeling"
2525

2626

2727
# -- General configuration ---------------------------------------------------
@@ -97,3 +97,7 @@ def setup(app):
9797
"python": ("https://docs.python.org/", None),
9898
"dargs": ("https://docs.deepmodeling.com/projects/dargs/en/latest/", None),
9999
}
100+
101+
myst_enable_extensions = [
102+
"colon_fence",
103+
]

doc/context.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ One needs to set {dargs:argument}`context_type <machine/context_type>` to one of
99

1010
`LazyLocal` directly runs jobs in the local server and local directory.
1111

12-
Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current environmental variables. Therefore, it's advisable to explicitly set the environmental variables using {dargs:argument}`envs <resources/envs>` or {dargs:argument}`source_list <resources/source_list>`.
12+
Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current environment variables. Therefore, it's advisable to explicitly set the environment variables using {dargs:argument}`envs <resources/envs>` or {dargs:argument}`source_list <resources/source_list>`.
1313

1414
## Local
1515

@@ -18,7 +18,7 @@ Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Ba
1818
`Local` runs jobs in the local server, but in a different directory.
1919
Files will be copied to the remote directory before jobs start and copied back after jobs finish.
2020

21-
Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current environmental variables. Therefore, it's advisable to explicitly set the environmental variables using {dargs:argument}`envs <resources/envs>` or {dargs:argument}`source_list <resources/source_list>`.
21+
Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current environment variables. Therefore, it's advisable to explicitly set the environment variables using {dargs:argument}`envs <resources/envs>` or {dargs:argument}`source_list <resources/source_list>`.
2222

2323
## SSH
2424

doc/env.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Environment variables
2+
3+
When launching a job, dpdispatcher sets the following environment variables according to the resources, in addition to user-defined environment variables:
4+
5+
:::{envvar} DPDISPATCHER_NUMBER_NODE
6+
7+
The number of nodes required for each job.
8+
9+
:::
10+
11+
:::{envvar} DPDISPATCHER_CPU_PER_NODE
12+
13+
CPU numbers of each node assigned to each job.
14+
15+
:::
16+
17+
:::{envvar} DPDISPATCHER_GPU_PER_NODE
18+
19+
GPU numbers of each node assigned to each job.
20+
21+
:::
22+
23+
:::{envvar} DPDISPATCHER_QUEUE_NAME
24+
25+
The queue name of batch job scheduler system.
26+
27+
:::
28+
29+
:::{envvar} DPDISPATCHER_GROUP_SIZE
30+
31+
The number of tasks in a job. 0 means infinity.
32+
33+
:::
34+
35+
These environment variables can be used in the {dargs:argument}`command <task/command>`, for example, `mpirun -n ${DPDISPATCHER_CPU_PER_NODE} xx.run`.

doc/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ DPDispatcher will monitor (poke) until these jobs finish and download the result
2222
machine
2323
resources
2424
task
25+
env
2526
run
2627
cli
2728
api/api

dpdispatcher/machine.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,15 @@ def gen_script_env(self, job):
265265

266266
export_envs_part = ""
267267
envs = job.resources.envs
268+
envs = {
269+
# export resources information to the environment variables
270+
"DPDISPATCHER_NUMBER_NODE": job.resources.number_node,
271+
"DPDISPATCHER_CPU_PER_NODE": job.resources.cpu_per_node,
272+
"DPDISPATCHER_GPU_PER_NODE": job.resources.gpu_per_node,
273+
"DPDISPATCHER_QUEUE_NAME": job.resources.queue_name,
274+
"DPDISPATCHER_GROUP_SIZE": job.resources.group_size,
275+
**envs,
276+
}
268277
for k, v in envs.items():
269278
if isinstance(v, list):
270279
for each_value in v:

dpdispatcher/submission.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,9 +1122,9 @@ def load_from_dict(cls, resources_dict):
11221122

11231123
@staticmethod
11241124
def arginfo(detail_kwargs=True):
1125-
doc_number_node = "The number of node need for each `job`"
1126-
doc_cpu_per_node = "cpu numbers of each node assigned to each job."
1127-
doc_gpu_per_node = "gpu numbers of each node assigned to each job."
1125+
doc_number_node = "The number of nodes required for each `job`."
1126+
doc_cpu_per_node = "CPU numbers of each node assigned to each job."
1127+
doc_gpu_per_node = "GPU numbers of each node assigned to each job."
11281128
doc_queue_name = "The queue name of batch job scheduler system."
11291129
doc_group_size = "The number of `tasks` in a `job`. 0 means infinity."
11301130
doc_custom_flags = "The extra lines pass to job submitting script header"

tests/test_lsf_script_generation.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ def test_shell_trival(self):
111111
source /data/home/ypliu/scripts/avail_gpu.sh
112112
source /data/home/ypliu/dprun/tf_envs.sh
113113
114+
export DPDISPATCHER_NUMBER_NODE=1
115+
export DPDISPATCHER_CPU_PER_NODE=4
116+
export DPDISPATCHER_GPU_PER_NODE=1
117+
export DPDISPATCHER_QUEUE_NAME=gpu
118+
export DPDISPATCHER_GROUP_SIZE=4
114119
export DP_DISPATCHER_EXPORT=test_foo_bar_baz
115120
116121
echo 'The summer you were there.'

tests/test_run_submission.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,14 @@ def test_run_submission(self):
6565
task_list = []
6666
for ii in range(4):
6767
task = Task(
68-
command=f"echo dpdispatcher_unittest_{ii}",
68+
command=(
69+
f"[[ $DPDISPATCHER_NUMBER_NODE -eq {self.resources_dict['number_node']} ]] &&"
70+
f"[[ $DPDISPATCHER_CPU_PER_NODE -eq {self.resources_dict['cpu_per_node']} ]] &&"
71+
f"[[ $DPDISPATCHER_GPU_PER_NODE -eq {self.resources_dict['gpu_per_node']} ]] &&"
72+
f"[[ $DPDISPATCHER_QUEUE_NAME == {self.resources_dict['queue_name']} ]] &&"
73+
f"[[ $DPDISPATCHER_GROUP_SIZE -eq {self.resources_dict['group_size']} ]] &&"
74+
f"echo dpdispatcher_unittest_{ii}"
75+
),
6976
task_work_path="./",
7077
forward_files=[],
7178
backward_files=[f"out{ii}.txt"],

0 commit comments

Comments
 (0)