Skip to content

Commit f6072fe

Browse files
authored
feat: allow copying files to remote with LocalContext (#487)
Fix #486. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Updated documentation to clarify file handling with the `Local` context type, emphasizing the use of symlinks for improved efficiency. - Introduced a new class attribute for configurable symlink options during file operations. - Added a new key for symlink configuration in the remote profile JSON. - **Bug Fixes** - Enhanced error handling for file copying processes to ensure smoother operation. - **Tests** - Added a new test class for local context submissions, improving test coverage for file handling scenarios. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
1 parent 4b98dcd commit f6072fe

File tree

5 files changed

+84
-18
lines changed

5 files changed

+84
-18
lines changed

doc/context.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Ba
1616
{dargs:argument}`context_type <machine/context_type>`: `Local`
1717

1818
`Local` runs jobs in the local server, but in a different directory.
19-
Files will be copied to the remote directory before jobs start and copied back after jobs finish.
19+
Files will be symlinked to the remote directory before jobs start and copied back after jobs finish.
20+
If the local directory is not accessible with the [batch system](./batch.md), turn off {dargs:argument}`symlink <machine[SSHContext]/remote_profile/symlink>`, and then files on the local directory will be copied to the remote directory.
2021

2122
Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current environment variables. Therefore, it's advisable to explicitly set the environment variables using {dargs:argument}`envs <resources/envs>` or {dargs:argument}`source_list <resources/source_list>`.
2223

dpdispatcher/contexts/local_context.py

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
import subprocess as sp
44
from glob import glob
55
from subprocess import TimeoutExpired
6+
from typing import List
7+
8+
from dargs import Argument
69

710
from dpdispatcher.base_context import BaseContext
811
from dpdispatcher.dlog import dlog
@@ -60,6 +63,7 @@ def __init__(
6063
self.temp_local_root = os.path.abspath(local_root)
6164
self.temp_remote_root = os.path.abspath(remote_root)
6265
self.remote_profile = remote_profile
66+
self.symlink = remote_profile.get("symlink", True)
6367

6468
@classmethod
6569
def load_from_dict(cls, context_dict):
@@ -83,6 +87,25 @@ def bind_submission(self, submission):
8387
self.temp_remote_root, submission.submission_hash
8488
)
8589

90+
def _copy_from_local_to_remote(self, local_path, remote_path):
91+
if not os.path.exists(local_path):
92+
raise FileNotFoundError(
93+
f"cannot find uploaded file {os.path.join(local_path)}"
94+
)
95+
if os.path.exists(remote_path):
96+
os.remove(remote_path)
97+
_check_file_path(remote_path)
98+
99+
if self.symlink:
100+
# ensure the file exist
101+
os.symlink(local_path, remote_path)
102+
elif os.path.isfile(local_path):
103+
shutil.copyfile(local_path, remote_path)
104+
elif os.path.isdir(local_path):
105+
shutil.copytree(local_path, remote_path)
106+
else:
107+
raise ValueError(f"Unknown file type: {local_path}")
108+
86109
def upload(self, submission):
87110
os.makedirs(self.remote_root, exist_ok=True)
88111
for ii in submission.belonging_tasks:
@@ -103,14 +126,9 @@ def upload(self, submission):
103126
file_list.extend(rel_file_list)
104127

105128
for jj in file_list:
106-
if not os.path.exists(os.path.join(local_job, jj)):
107-
raise FileNotFoundError(
108-
"cannot find upload file " + os.path.join(local_job, jj)
109-
)
110-
if os.path.exists(os.path.join(remote_job, jj)):
111-
os.remove(os.path.join(remote_job, jj))
112-
_check_file_path(os.path.join(remote_job, jj))
113-
os.symlink(os.path.join(local_job, jj), os.path.join(remote_job, jj))
129+
self._copy_from_local_to_remote(
130+
os.path.join(local_job, jj), os.path.join(remote_job, jj)
131+
)
114132

115133
local_job = self.local_root
116134
remote_job = self.remote_root
@@ -128,14 +146,9 @@ def upload(self, submission):
128146
file_list.extend(rel_file_list)
129147

130148
for jj in file_list:
131-
if not os.path.exists(os.path.join(local_job, jj)):
132-
raise FileNotFoundError(
133-
"cannot find upload file " + os.path.join(local_job, jj)
134-
)
135-
if os.path.exists(os.path.join(remote_job, jj)):
136-
os.remove(os.path.join(remote_job, jj))
137-
_check_file_path(os.path.join(remote_job, jj))
138-
os.symlink(os.path.join(local_job, jj), os.path.join(remote_job, jj))
149+
self._copy_from_local_to_remote(
150+
os.path.join(local_job, jj), os.path.join(remote_job, jj)
151+
)
139152

140153
def download(
141154
self, submission, check_exists=False, mark_failure=True, back_error=False
@@ -336,3 +349,31 @@ def get_return(self, proc):
336349
stdout = None
337350
stderr = None
338351
return ret, stdout, stderr
352+
353+
@classmethod
354+
def machine_subfields(cls) -> List[Argument]:
355+
"""Generate the machine subfields.
356+
357+
Returns
358+
-------
359+
list[Argument]
360+
machine subfields
361+
"""
362+
doc_remote_profile = "The information used to maintain the local machine."
363+
return [
364+
Argument(
365+
"remote_profile",
366+
dict,
367+
optional=True,
368+
doc=doc_remote_profile,
369+
sub_fields=[
370+
Argument(
371+
"symlink",
372+
bool,
373+
optional=True,
374+
default=True,
375+
doc="Whether to use symbolic links to replace copy. This option should be turned off if the local directory is not accessible on the Batch system.",
376+
),
377+
],
378+
)
379+
]

dpdispatcher/machine.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ def serialize(self, if_empty_remote_profile=False):
161161
machine_dict["remote_profile"] = self.context.remote_profile
162162
else:
163163
machine_dict["remote_profile"] = {}
164+
# normalize the dict
165+
base = self.arginfo()
166+
machine_dict = base.normalize_value(machine_dict, trim_pattern="_*")
164167
return machine_dict
165168

166169
def __eq__(self, other):

tests/test_argcheck.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ def test_machine_argcheck(self):
2323
"context_type": "LocalContext",
2424
"local_root": "./",
2525
"remote_root": "/some/path",
26-
"remote_profile": {},
26+
"remote_profile": {
27+
"symlink": True,
28+
},
29+
"clean_asynchronously": False,
2730
}
2831
self.assertDictEqual(norm_dict, expected_dict)
2932

tests/test_run_submission.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,24 @@ def test_async_run_submission(self):
291291
return super().test_async_run_submission()
292292

293293

294+
@unittest.skipIf(sys.platform == "win32", "Shell is not supported on Windows")
295+
class TestLocalContextCopy(RunSubmission, unittest.TestCase):
296+
def setUp(self):
297+
super().setUp()
298+
self.temp_dir = tempfile.TemporaryDirectory()
299+
self.machine_dict["context_type"] = "LocalContext"
300+
self.machine_dict["remote_root"] = self.temp_dir.name
301+
self.machine_dict["remote_profile"]["symlink"] = False
302+
303+
def tearDown(self):
304+
super().tearDown()
305+
self.temp_dir.cleanup()
306+
307+
@unittest.skip("It seems the remote file may be deleted")
308+
def test_async_run_submission(self):
309+
return super().test_async_run_submission()
310+
311+
294312
@unittest.skipIf(sys.platform == "win32", "Shell is not supported on Windows")
295313
class TestLazyLocalContext(RunSubmission, unittest.TestCase):
296314
def setUp(self):

0 commit comments

Comments
 (0)