Skip to content

Commit 38adf1e

Browse files
authored
Merge pull request #14 from dwhswenson/restructure_file_functions
Restructure file functions
2 parents 8a830a6 + 222bd2f commit 38adf1e

File tree

7 files changed

+239
-82
lines changed

7 files changed

+239
-82
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ miscellaneous operations on OPS output files.
2828
**Miscellaneous Commands:**
2929

3030
* `contents`: List named objects from an OPS .nc file
31-
* `strip-snapshots`: Remove coordinates/velocities from an OPS storage
3231
* `append`: add objects from INPUT_FILE to another file
3332

3433
Full documentation is at https://openpathsampling-cli.readthedocs.io/; a brief

docs/for_core/cli.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,13 @@ foregoing the CLI tools to run simulations, some of the "miscellaneous"
7979
commands are likely to be quite useful. Here are some that are available in
8080
the CLI:
8181

82-
* ``nclist``: list all the named objects in an OPS storage, organized by
82+
* ``contents``: list all the named objects in an OPS storage, organized by
8383
store (type); this is extremely useful to get the name of an object to use
8484
as command-line input to one of the simulation scripts
85-
* ``strip-snapshots``: create a copy of the input storage file with the
85+
.. * ``strip-snapshots``: create a copy of the input storage file with the
8686
details (coordinates/velocities) of all snapshots removed; this allows you
8787
to make a much smaller copy (with results of CVs) to copy back to a local
8888
computer for analysis
89+
* ``append`` : add an object from once OPS storage into another one; this is
90+
useful for getting everything into a single file before running a
91+
simulation

paths_cli/cli.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,9 @@ def format_commands(self, ctx, formatter):
128128
OpenPathSampling is a Python library for path sampling simulations. This
129129
command line tool facilitates common tasks when working with
130130
OpenPathSampling. To use it, use one of the subcommands below. For example,
131-
you can get more information about the strip-snapshots (filesize reduction)
132-
tool with:
131+
you can get more information about the pathsampling tool with:
133132
134-
openpathsampling strip-snapshots --help
133+
openpathsampling pathsampling --help
135134
"""
136135

137136
@click.command(cls=OpenPathSamplingCLI, name="openpathsampling",

paths_cli/commands/strip_snapshots.py

Lines changed: 0 additions & 75 deletions
This file was deleted.

paths_cli/file_copying.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
"""Tools to facilitate copying files.
2+
3+
This is mainly aimed at cases where a file is being copied with some sort of
4+
modification, or where CVs need to be disk-cached.
5+
"""
6+
7+
import click
8+
from tqdm.auto import tqdm
9+
from paths_cli.parameters import (
10+
Option, Argument, HELP_MULTIPLE, StorageLoader, OPSStorageLoadNames
11+
)
12+
13+
INPUT_APPEND_FILE = StorageLoader(
14+
param=Argument('append_file',
15+
type=click.Path(writable=True, readable=True)),
16+
mode='a'
17+
)
18+
19+
class PrecomputeLoadNames(OPSStorageLoadNames):
20+
def get(self, storage, name):
21+
if len(name) == 0:
22+
return list(getattr(storage, self.store))
23+
elif len(name) == 1 and name[0] == '--':
24+
return []
25+
26+
return super(PrecomputeLoadNames, self).get(storage, name)
27+
28+
PRECOMPUTE_CVS = PrecomputeLoadNames(
29+
param=Option('--cv', type=str, multiple=True,
30+
help=('name of CV to precompute; if not specified all will'
31+
+ ' be used' + HELP_MULTIPLE
32+
+ ' (use `--cv --` to disable precomputing)')),
33+
store='cvs'
34+
)
35+
36+
37+
def make_blocks(listlike, blocksize):
38+
"""Make blocks out of a listlike object.
39+
40+
Parameters
41+
----------
42+
listlike : Iterable
43+
must be an iterable that supports slicing
44+
blocksize : int
45+
number of objects per block
46+
47+
48+
Returns
49+
-------
50+
List[List[Any]] :
51+
the input iterable chunked into blocks
52+
"""
53+
n_objs = len(listlike)
54+
partial_block = 1 if n_objs % blocksize else 0
55+
n_blocks = (n_objs // blocksize) + partial_block
56+
minval = lambda i: i * blocksize
57+
maxval = lambda i: min((i + 1) * blocksize, n_objs)
58+
blocks = [listlike[minval(i):maxval(i)] for i in range(n_blocks)]
59+
return blocks
60+
61+
62+
def precompute_cvs(cvs, block):
63+
"""Calculate a CV for a a given block.
64+
65+
Parameters
66+
----------
67+
cvs : List[:class:`openpathsampling.CollectiveVariable`]
68+
CVs to precompute
69+
block : List[Any]
70+
b
71+
"""
72+
for cv in cvs:
73+
cv.enable_diskcache()
74+
_ = cv(block)
75+
76+
77+
def precompute_cvs_func_and_inputs(input_storage, cvs, blocksize):
78+
"""
79+
Parameters
80+
----------
81+
input_storage : :class:`openpathsampling.Storage`
82+
storage file to read from
83+
cvs : List[:class:`openpathsampling.CollectiveVariable`]
84+
list of CVs to precompute; if None, use all CVs in ``input_storage``
85+
blocksize : int
86+
number of snapshots per block to precompute
87+
"""
88+
if cvs is None:
89+
cvs = list(input_storage.cvs)
90+
91+
precompute_func = lambda inps: precompute_cvs(cvs, inps)
92+
snapshot_proxies = input_storage.snapshots.all().as_proxies()
93+
snapshot_blocks = make_blocks(snapshot_proxies, blocksize)
94+
return precompute_func, snapshot_blocks
95+
96+
97+
def rewrite_file(stage_names, stage_mapping):
98+
stages = tqdm(stage_names, desc="All stages")
99+
for stage in stages:
100+
store_func, inputs = stage_mapping[stage]
101+
desc = "This stage: {}".format(stage)
102+
for obj in tqdm(inputs, desc=desc, leave=False):
103+
store_func(obj)

paths_cli/tests/commands/test_append.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ def test_append_remove_tag(tps_network_and_traj):
8888
result = runner.invoke(append,
8989
[in_file, '-a', "output.nc",
9090
"--tag", 'template', '--save-tag', ''])
91-
print(result.output)
9291
assert result.exception is None
9392
assert result.exit_code == 0
9493

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import collections
2+
import functools
3+
import os
4+
import tempfile
5+
from unittest.mock import MagicMock, patch
6+
import pytest
7+
8+
import openpathsampling as paths
9+
from openpathsampling.tests.test_helpers import make_1d_traj
10+
11+
from paths_cli.file_copying import *
12+
13+
class Test_PRECOMPUTE_CVS(object):
14+
def setup(self):
15+
self.tmpdir = tempfile.mkdtemp()
16+
self.storage_filename = os.path.join(self.tmpdir, "test.nc")
17+
self.storage = paths.Storage(self.storage_filename, mode='w')
18+
snap = make_1d_traj([1])[0]
19+
self.storage.save(snap)
20+
self.cv_x = paths.CoordinateFunctionCV("x", lambda s: s.xyz[0][0])
21+
self.cv_y = paths.CoordinateFunctionCV("y", lambda s: s.xyz[0][1])
22+
self.storage.save([self.cv_x, self.cv_y])
23+
24+
def teardown(self):
25+
self.storage.close()
26+
27+
for filename in os.listdir(self.tmpdir):
28+
os.remove(os.path.join(self.tmpdir, filename))
29+
os.rmdir(self.tmpdir)
30+
31+
@pytest.mark.parametrize('getter', ['x', None, '--'])
32+
def test_get(self, getter):
33+
expected = {'x': [self.cv_x],
34+
None: [self.cv_x, self.cv_y],
35+
'--': []}[getter]
36+
getter = [] if getter is None else [getter] # CLI gives a list
37+
cvs = PRECOMPUTE_CVS.get(self.storage, getter)
38+
assert len(cvs) == len(expected)
39+
assert set(cvs) == set(expected)
40+
41+
42+
@pytest.mark.parametrize('blocksize', [2, 3, 5, 10, 12])
43+
def test_make_blocks(blocksize):
44+
expected_lengths = {2: [2, 2, 2, 2, 2],
45+
3: [3, 3, 3, 1],
46+
5: [5, 5],
47+
10: [10],
48+
12: [10]}[blocksize]
49+
ll = list(range(10))
50+
blocks = make_blocks(ll, blocksize)
51+
assert [len(block) for block in blocks] == expected_lengths
52+
assert sum(blocks, []) == ll
53+
54+
55+
class TestPrecompute(object):
56+
def setup(self):
57+
class RunOnceFunction(object):
58+
def __init__(self):
59+
self.previously_seen = set([])
60+
61+
def __call__(self, snap):
62+
if snap in self.previously_seen:
63+
raise AssertionError("Second CV eval for " + str(snap))
64+
self.previously_seen.update({snap})
65+
return snap.xyz[0][0]
66+
67+
self.cv = paths.FunctionCV("test", RunOnceFunction())
68+
traj = make_1d_traj([2, 1])
69+
self.snap = traj[0]
70+
self.other_snap = traj[1]
71+
72+
def test_precompute_cvs(self):
73+
precompute_cvs([self.cv], [self.snap])
74+
assert self.cv.f.previously_seen == {self.snap}
75+
recalced = self.cv(self.snap) # AssertionError if func called
76+
assert recalced == 2
77+
assert self.cv.diskcache_enabled is True
78+
79+
@pytest.mark.parametrize('cvs', [['test'], None])
80+
def test_precompute_cvs_and_inputs(self, cvs):
81+
with tempfile.TemporaryDirectory() as tmpdir:
82+
storage = paths.Storage(os.path.join(tmpdir, "test.nc"),
83+
mode='w')
84+
traj = make_1d_traj(list(range(10)))
85+
cv = paths.FunctionCV("test", lambda s: s.xyz[0][0])
86+
storage.save(traj)
87+
storage.save(cv)
88+
89+
if cvs is not None:
90+
cvs = [storage.cvs[cv] for cv in cvs]
91+
92+
precompute_func, blocks = precompute_cvs_func_and_inputs(
93+
input_storage=storage,
94+
cvs=cvs,
95+
blocksize=2
96+
)
97+
assert len(blocks) == 5
98+
for block in blocks:
99+
assert len(block) == 2
100+
101+
# smoke test: only effect should be caching results
102+
precompute_func(blocks[0])
103+
104+
105+
def test_rewrite_file():
106+
# making a mock for storage instead of actually testing integration
107+
class FakeStore(object):
108+
def __init__(self):
109+
self._stores = collections.defaultdict(list)
110+
111+
def store(self, obj, store_name):
112+
self._stores[store_name].append(obj)
113+
114+
stage_names = ['foo', 'bar']
115+
storage = FakeStore()
116+
store_funcs = {
117+
name: functools.partial(storage.store, store_name=name)
118+
for name in stage_names
119+
}
120+
stage_mapping = {
121+
'foo': (store_funcs['foo'], [0, 1, 2]),
122+
'bar': (store_funcs['bar'], [[3], [4], [5]])
123+
}
124+
silent_tqdm = lambda x, desc=None, leave=True: x
125+
with patch('paths_cli.file_copying.tqdm', silent_tqdm):
126+
rewrite_file(stage_names, stage_mapping)
127+
128+
assert storage._stores['foo'] == [0, 1, 2]
129+
assert storage._stores['bar'] == [[3], [4], [5]]

0 commit comments

Comments
 (0)