Skip to content

Commit 4f90686

Browse files
authored
slurm job_array: only resubmit failed tasks (#171)
1 parent b80d4cf commit 4f90686

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

dpdispatcher/slurm.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,14 @@ def resources_subfields(cls) -> List[Argument]:
139139
class SlurmJobArray(Slurm):
140140
"""Slurm with job array enabled for multiple tasks in a job"""
141141
def gen_script_header(self, job):
142+
if job.fail_count > 0:
143+
# resubmit jobs, check if some of tasks have been finished
144+
job_array = []
145+
for ii, task in enumerate(job.job_task_list):
146+
task_tag_finished = (pathlib.PurePath(task.task_work_path)/(task.task_hash + '_task_tag_finished')).as_posix()
147+
if not self.context.check_file_exists(task_tag_finished):
148+
job_array.append(ii)
149+
return super().gen_script_header(job) + "\n#SBATCH --array=%s" % (",".join(map(str, job_array)))
142150
return super().gen_script_header(job) + "\n#SBATCH --array=0-%d" % (len(job.job_task_list)-1)
143151

144152
def gen_script_command(self, job):

0 commit comments

Comments
 (0)