Skip to content

Commit b7b907d

Browse files
improve SGE system (#446)
- add new key `sge_pe_name` into `resources` - allow `customized_script_header_template` in SGE system - ... <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Enhanced PBS script header generation with updated job name, node selection, and custom script headers. - **Bug Fixes** - Corrected shell script sourcing commands in test scripts by removing unnecessary double curly braces. - **Improvements** - Simplified source file handling in job submission scripts. - Added support for SGE parallel environment names in resource configurations. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 20a872d commit b7b907d

File tree

3 files changed

+62
-11
lines changed

3 files changed

+62
-11
lines changed

dpdispatcher/machine.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,7 @@ def gen_script_env(self, job):
261261

262262
source_list = job.resources.source_list
263263
for ii in source_list:
264-
line = f"{{ source {ii}; }} \n"
265-
source_files_part += line
264+
source_files_part += f"source {ii}\n"
266265

267266
export_envs_part = ""
268267
envs = job.resources.envs

dpdispatcher/machines/pbs.py

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
import shlex
2+
from typing import List
3+
4+
from dargs import Argument
25

36
from dpdispatcher.dlog import dlog
47
from dpdispatcher.machine import Machine
@@ -181,10 +184,10 @@ def gen_script_header(self, job):
181184

182185
sge_script_header_template = """
183186
#!/bin/bash
184-
#$ -N dpdispatcher_submit
185-
{select_node_line}
187+
#$ -S /bin/bash
186188
#$ -cwd
187-
189+
#$ -N DPjob
190+
{select_node_line}
188191
"""
189192

190193

@@ -209,21 +212,38 @@ def __init__(
209212
)
210213

211214
def gen_script_header(self, job):
215+
### Ref:https://softpanorama.org/HPC/PBS_and_derivatives/Reference/pbs_command_vs_sge_commands.shtml
216+
# resources.number_node is not used in SGE
212217
resources = job.resources
218+
sge_pe_name = resources.kwargs.get("sge_pe_name", "mpi")
213219
sge_script_header_dict = {}
214-
# resources.number_node is not used
215220
sge_script_header_dict["select_node_line"] = (
216-
f"#$ -pe mpi {resources.cpu_per_node} "
221+
f"#$ -pe {sge_pe_name} {resources.cpu_per_node}\n"
217222
)
218-
# resources.queue_name is not necessary
219-
sge_script_header = sge_script_header_template.format(**sge_script_header_dict)
223+
if resources.queue_name != "":
224+
sge_script_header_dict["select_node_line"] += (
225+
f"#$ -q {resources.queue_name}"
226+
)
227+
if (
228+
resources["strategy"].get("customized_script_header_template_file")
229+
is not None
230+
):
231+
file_name = resources["strategy"]["customized_script_header_template_file"]
232+
sge_script_header = customized_script_header_template(file_name, resources)
233+
else:
234+
sge_script_header = sge_script_header_template.format(
235+
**sge_script_header_dict
236+
)
220237
return sge_script_header
221238

222239
def do_submit(self, job):
223240
script_file_name = job.script_file_name
224241
script_str = self.gen_script(job)
225242
job_id_name = job.job_hash + "_job_id"
226243
self.context.write_file(fname=script_file_name, write_str=script_str)
244+
script_run_str = self.gen_script_command(job)
245+
script_run_file_name = f"{job.script_file_name}.run"
246+
self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
227247
script_file_dir = self.context.remote_root
228248
stdin, stdout, stderr = self.context.block_checkcall(
229249
"cd {} && {} {}".format(script_file_dir, "qsub", script_file_name)
@@ -281,3 +301,35 @@ def check_status(self, job):
281301
def check_finish_tag(self, job):
282302
job_tag_finished = job.job_hash + "_job_tag_finished"
283303
return self.context.check_file_exists(job_tag_finished)
304+
305+
@classmethod
306+
def resources_subfields(cls) -> List[Argument]:
307+
"""Generate the resources subfields.
308+
309+
sge_pe_name : str
310+
The parallel environment name of SGE.
311+
312+
Returns
313+
-------
314+
list[Argument]
315+
resources subfields
316+
"""
317+
doc_sge_pe_name = "The parallel environment name of SGE."
318+
319+
return [
320+
Argument(
321+
"kwargs",
322+
dict,
323+
[
324+
Argument(
325+
"sge_pe_name",
326+
str,
327+
optional=True,
328+
default="mpi",
329+
doc=doc_sge_pe_name,
330+
),
331+
],
332+
optional=False,
333+
doc="Extra arguments.",
334+
)
335+
]

tests/test_lsf_script_generation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ def test_shell_trival(self):
108108
module load use.own
109109
module load deepmd/1.3
110110
111-
{{ source /data/home/ypliu/scripts/avail_gpu.sh; }}
112-
{{ source /data/home/ypliu/dprun/tf_envs.sh; }}
111+
source /data/home/ypliu/scripts/avail_gpu.sh
112+
source /data/home/ypliu/dprun/tf_envs.sh
113113
114114
export DP_DISPATCHER_EXPORT=test_foo_bar_baz
115115

0 commit comments

Comments
 (0)