diff --git a/schedview/collect/scheduler_pickle.py b/schedview/collect/scheduler_pickle.py index 981c5acb..44ed55f6 100644 --- a/schedview/collect/scheduler_pickle.py +++ b/schedview/collect/scheduler_pickle.py @@ -7,11 +7,14 @@ import os import pickle from collections.abc import Sequence +from pathlib import Path from lsst.resources import ResourcePath from rubin_scheduler.scheduler.model_observatory import ModelObservatory from rubin_scheduler.scheduler.schedulers.core_scheduler import CoreScheduler +from schedview.testing.sample_data import SAMPLE_DATA_DIR_ENV_VAR + try: PICKLE_FNAME = os.environ["SCHED_PICKLE"] except KeyError: @@ -124,6 +127,12 @@ def sample_pickle(base_fname="sample_scheduler.pickle.xz"): fname : `str` File name of the sample pickle. """ + sample_data_dir = os.environ.get(SAMPLE_DATA_DIR_ENV_VAR) + if sample_data_dir is not None: + sample_path = Path(sample_data_dir).joinpath(base_fname) + if sample_path.exists(): + return str(sample_path) + root_package = __package__.split(".")[0] try: diff --git a/schedview/testing/__init__.py b/schedview/testing/__init__.py new file mode 100644 index 00000000..26822004 --- /dev/null +++ b/schedview/testing/__init__.py @@ -0,0 +1,7 @@ +from .sample_data import ensure_cached_sample_data, get_sample_data_path, write_sample_data + +__all__ = [ + "ensure_cached_sample_data", + "get_sample_data_path", + "write_sample_data", +] diff --git a/schedview/testing/sample_data.py b/schedview/testing/sample_data.py new file mode 100644 index 00000000..0dd97bf5 --- /dev/null +++ b/schedview/testing/sample_data.py @@ -0,0 +1,331 @@ +from __future__ import annotations + +import hashlib +import importlib.resources +import json +import lzma +import os +import pickle +import shutil +import sys +import warnings +from importlib.metadata import PackageNotFoundError, version +from pathlib import Path + +import numpy as np +from astropy.time import Time +from rubin_scheduler.scheduler import sim_runner +from rubin_scheduler.scheduler.example import example_scheduler +from rubin_scheduler.scheduler.model_observatory import ModelObservatory +from rubin_scheduler.scheduler.utils import SchemaConverter + +SAMPLE_DATA_DIR_ENV_VAR = "SCHEDVIEW_SAMPLE_DATA_DIR" +SAMPLE_PICKLE_ENV_VAR = "SCHED_PICKLE" +_CACHE_SCHEMA_VERSION = 1 +_SAMPLE_OPSIM_DB = "sample_opsim.db" +_SAMPLE_REWARDS_H5 = "sample_rewards.h5" +_SAMPLE_SCHEDULER_PICKLE = "sample_scheduler.pickle.xz" +_MANIFEST_JSON = "manifest.json" +_SAMPLE_DATA_FILE_NAMES = ( + _SAMPLE_OPSIM_DB, + _SAMPLE_REWARDS_H5, + _SAMPLE_SCHEDULER_PICKLE, +) + +__all__ = [ + "SAMPLE_DATA_DIR_ENV_VAR", + "SAMPLE_PICKLE_ENV_VAR", + "ensure_cached_sample_data", + "get_sample_data_path", + "write_sample_data", +] + + +def _get_sample_data_dir() -> Path: + """Return the directory holding sample test data. + + Returns + ------- + directory : `pathlib.Path` + Directory containing the sample data artifacts. + """ + override_dir = os.environ.get(SAMPLE_DATA_DIR_ENV_VAR) + if override_dir: + return Path(override_dir) + + root_package = __package__.split(".")[0] + return Path(str(importlib.resources.files(root_package).joinpath("data"))) + + +def get_sample_data_path(file_name: str) -> Path: + """Return the path to a sample test data artifact. + + Parameters + ---------- + file_name : `str` + Basename of the sample data artifact. + + Returns + ------- + path : `pathlib.Path` + Path to the requested sample data artifact. + """ + return _get_sample_data_dir().joinpath(file_name) + + +def _default_sample_date() -> str: + """Return the default date used for sample-data generation. + + Returns + ------- + date : `str` + ISO date string for the scheduler survey start night. + """ + from astropy.time import Time + from rubin_scheduler.utils import SURVEY_START_MJD + + return Time(SURVEY_START_MJD, format="mjd").iso[:10] + + +def _configure_generation_warnings() -> None: + """Configure warning filters for sample-data generation. + + The filters suppress known noisy warnings emitted by dependencies during + scheduler simulation and artifact generation. + """ + warnings.filterwarnings( + "ignore", + module="astropy.time", + message="Numerical value without unit or explicit format passed to TimeDelta, assuming days", + ) + warnings.filterwarnings( + "ignore", + module="healpy", + message="divide by zero encountered in divide", + ) + warnings.filterwarnings( + "ignore", + module="healpy", + message="invalid value encountered in multiply", + ) + warnings.filterwarnings( + "ignore", + module="holoviews", + message="Discarding nonzero nanoseconds in conversion.", + ) + warnings.filterwarnings( + "ignore", + module="rubin_scheduler", + message="invalid value encountered in arcsin", + ) + warnings.filterwarnings( + "ignore", + module="rubin_scheduler", + message="All-NaN slice encountered", + ) + + +def _manifest(date: str | None = None, duration: int | None = None) -> dict[str, object]: + """Build the cache manifest for a sample-data generation request. + + Parameters + ---------- + date : `str`, optional + Date of the simulated night in ISO format. If `None`, use the + scheduler survey start night. + duration : `int`, optional + Number of hours to simulate. If `None`, simulate the full night. + + Returns + ------- + manifest : `dict` [`str`, `object`] + Manifest describing the generation inputs and expected artifact set. + """ + resolved_date = _default_sample_date() if date is None else date + source_hash = hashlib.sha256(Path(__file__).read_bytes()).hexdigest() + + try: + rubin_scheduler_version = version("rubin-scheduler") + except PackageNotFoundError: + rubin_scheduler_version = "unknown" + + return { + "cache_schema_version": _CACHE_SCHEMA_VERSION, + "python": ".".join(str(part) for part in sys.version_info[:2]), + "rubin_scheduler": rubin_scheduler_version, + "date": resolved_date, + "duration_hours": duration, + "generator_source_hash": source_hash, + "file_names": list(_SAMPLE_DATA_FILE_NAMES), + } + + +def write_sample_data( + opsim_output_path: str | Path, + scheduler_output_path: str | Path, + rewards_output_path: str | Path, + *, + date: str | None = None, + duration: int | None = None, +) -> dict[str, Path]: + """Generate and write the sample test data artifacts. + + Parameters + ---------- + opsim_output_path : `str` or `pathlib.Path` + Output path for the generated opsim database. + scheduler_output_path : `str` or `pathlib.Path` + Output path for the generated scheduler pickle. + rewards_output_path : `str` or `pathlib.Path` + Output path for the generated rewards file. + date : `str`, optional + Date of the simulated night in ISO format. If `None`, use the + scheduler survey start night. + duration : `int`, optional + Number of hours to simulate. If `None`, simulate the full night. + + Returns + ------- + output_paths : `dict` [`str`, `pathlib.Path`] + Mapping from sample artifact filename to the generated output path. + """ + _configure_generation_warnings() + + resolved_date = _default_sample_date() if date is None else date + opsim_output_path = Path(opsim_output_path) + scheduler_output_path = Path(scheduler_output_path) + rewards_output_path = Path(rewards_output_path) + + for output_path in (opsim_output_path, scheduler_output_path, rewards_output_path): + output_path.parent.mkdir(parents=True, exist_ok=True) + + observatory = ModelObservatory() + evening_mjd = Time(resolved_date).mjd + this_night = ( + np.floor(observatory.almanac.sunsets["sunset"] + observatory.site.longitude / 360) == evening_mjd + ) + sim_start_mjd = observatory.almanac.sunsets[this_night]["sun_n12_setting"][0] + sim_end_mjd = observatory.almanac.sunsets[this_night]["sunrise"][0] + sim_duration = duration / 24.0 if duration is not None else sim_end_mjd - sim_start_mjd + + observatory = ModelObservatory(mjd_start=sim_start_mjd) + scheduler = example_scheduler(mjd_start=sim_start_mjd) + scheduler.keep_rewards = True + + observatory, scheduler, observations, reward_df, obs_rewards = sim_runner( + observatory, + scheduler, + sim_start_mjd=sim_start_mjd, + sim_duration=sim_duration, + record_rewards=True, + ) + + SchemaConverter().obs2opsim(observations, filename=str(opsim_output_path)) + + with lzma.open(scheduler_output_path, "wb", format=lzma.FORMAT_XZ) as pickle_io: + pickle.dump((scheduler, scheduler.conditions), pickle_io) + + reward_df.to_hdf(str(rewards_output_path), key="reward_df") + obs_rewards.to_hdf(str(rewards_output_path), key="obs_rewards") + + return { + _SAMPLE_OPSIM_DB: opsim_output_path, + _SAMPLE_SCHEDULER_PICKLE: scheduler_output_path, + _SAMPLE_REWARDS_H5: rewards_output_path, + } + + +def _generate_sample_data_dir( + output_dir: str | Path, + *, + date: str | None = None, + duration: int | None = None, +) -> Path: + """Generate a complete sample test data directory. + + Parameters + ---------- + output_dir : `str` or `pathlib.Path` + Directory in which the sample artifacts will be written. + date : `str`, optional + Date of the simulated night in ISO format. If `None`, use the + scheduler survey start night. + duration : `int`, optional + Number of hours to simulate. If `None`, simulate the full night. + + Returns + ------- + output_dir : `pathlib.Path` + Directory containing the generated sample data artifacts. + """ + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + write_sample_data( + output_dir.joinpath(_SAMPLE_OPSIM_DB), + output_dir.joinpath(_SAMPLE_SCHEDULER_PICKLE), + output_dir.joinpath(_SAMPLE_REWARDS_H5), + date=date, + duration=duration, + ) + return output_dir + + +def ensure_cached_sample_data( + cache_root: str | Path, + *, + date: str | None = None, + duration: int | None = None, +) -> Path: + """Return a cache directory containing generated sample test data. + + The cache directory name is derived from a hash of a manifest that + records the generation inputs and expected artifact set. + + Parameters + ---------- + cache_root : `str` or `pathlib.Path` + Directory under which hashed cache entries are created. + date : `str`, optional + Date of the simulated night in ISO format. If `None`, use the + scheduler survey start night. + duration : `int`, optional + Number of hours to simulate. If `None`, simulate the full night. + + Returns + ------- + cache_dir : `pathlib.Path` + Directory containing the generated sample data artifacts for the + requested inputs. + """ + cache_root = Path(cache_root) + cache_root.mkdir(parents=True, exist_ok=True) + + manifest = _manifest(date=date, duration=duration) + manifest_json = json.dumps(manifest, sort_keys=True) + digest = hashlib.sha256(manifest_json.encode("utf-8")).hexdigest()[:16] + cache_dir = cache_root.joinpath(digest) + manifest_path = cache_dir.joinpath(_MANIFEST_JSON) + required_paths = [cache_dir.joinpath(file_name) for file_name in _SAMPLE_DATA_FILE_NAMES] + + if manifest_path.exists() and all(path.exists() for path in required_paths): + cached_manifest = json.loads(manifest_path.read_text()) + if cached_manifest == manifest: + return cache_dir + regeneration_reason = "cache inputs changed" + else: + regeneration_reason = "cache entry missing or incomplete" + + print( + f"Regenerating sample test data in {cache_dir} ({regeneration_reason}).", + flush=True, + ) + if cache_dir.exists(): + shutil.rmtree(cache_dir) + + _generate_sample_data_dir( + cache_dir, + date=manifest["date"], + duration=duration, + ) + manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n") + return cache_dir diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..55c0413c --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,133 @@ +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + +from schedview.testing.sample_data import ( + SAMPLE_DATA_DIR_ENV_VAR, + SAMPLE_PICKLE_ENV_VAR, + ensure_cached_sample_data, +) + +SAMPLE_SCHEDULER_PICKLE = "sample_scheduler.pickle.xz" + + +def _resolve_sample_data_dir(root_path: Path) -> Path: + """Resolve and validate the sample-data directory for the test session. + + Parameters + ---------- + root_path : `pathlib.Path` + Root directory of the pytest run. + + Returns + ------- + sample_data_dir : `pathlib.Path` + Directory containing the sample test data artifacts for this test + session. + + Raises + ------ + pytest.UsageError + Raised if the resolved sample data directory or scheduler pickle does + not exist. + """ + override_dir = os.environ.get(SAMPLE_DATA_DIR_ENV_VAR) + if override_dir: + sample_data_dir = Path(override_dir) + else: + cache_root = root_path.joinpath(".pytest_cache", "schedview-sample-data") + sample_data_dir = ensure_cached_sample_data(cache_root) + + if not sample_data_dir.exists(): + raise pytest.UsageError(f"Sample data directory does not exist: {sample_data_dir}") + + sample_pickle = sample_data_dir.joinpath(SAMPLE_SCHEDULER_PICKLE) + if not sample_pickle.exists(): + raise pytest.UsageError(f"Sample scheduler pickle does not exist: {sample_pickle}") + + os.environ[SAMPLE_DATA_DIR_ENV_VAR] = str(sample_data_dir) + os.environ[SAMPLE_PICKLE_ENV_VAR] = str(sample_pickle) + return sample_data_dir + + +def pytest_configure(config: pytest.Config) -> None: + """Initialize sample-data environment variables before test collection. + + Parameters + ---------- + config : `pytest.Config` + Pytest configuration object for the current test run. + """ + _resolve_sample_data_dir(Path(config.rootpath)) + + +@pytest.fixture(scope="session") +def sample_data_dir(pytestconfig: pytest.Config) -> Path: + """Return the sample-data directory for the test session. + + Parameters + ---------- + pytestconfig : `pytest.Config` + Pytest configuration object for the current test run. + + Returns + ------- + sample_data_dir : `pathlib.Path` + Directory containing the sample test data artifacts for this test + session. + """ + return _resolve_sample_data_dir(Path(pytestconfig.rootpath)) + + +@pytest.fixture(scope="session") +def sample_opsim_path(sample_data_dir: Path) -> Path: + """Return the generated opsim sample-data path. + + Parameters + ---------- + sample_data_dir : `pathlib.Path` + Directory containing the sample test data artifacts. + + Returns + ------- + opsim_path : `pathlib.Path` + Path to the generated opsim database. + """ + return sample_data_dir.joinpath("sample_opsim.db") + + +@pytest.fixture(scope="session") +def sample_rewards_path(sample_data_dir: Path) -> Path: + """Return the generated rewards sample-data path. + + Parameters + ---------- + sample_data_dir : `pathlib.Path` + Directory containing the sample test data artifacts. + + Returns + ------- + rewards_path : `pathlib.Path` + Path to the generated rewards file. + """ + return sample_data_dir.joinpath("sample_rewards.h5") + + +@pytest.fixture(scope="session") +def sample_scheduler_pickle_path(sample_data_dir: Path) -> Path: + """Return the generated scheduler-pickle sample-data path. + + Parameters + ---------- + sample_data_dir : `pathlib.Path` + Directory containing the sample test data artifacts. + + Returns + ------- + scheduler_pickle_path : `pathlib.Path` + Path to the generated scheduler pickle. + """ + return sample_data_dir.joinpath(SAMPLE_SCHEDULER_PICKLE) diff --git a/tests/test_examples.py b/tests/test_examples.py index 7255d8ba..e938bd19 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -1,4 +1,3 @@ -import importlib.resources import os import unittest from pathlib import Path @@ -6,6 +5,7 @@ import astropy.utils.exceptions import astropy.utils.iers +import bokeh.io from rubin_scheduler.utils import SURVEY_START_MJD from schedview.dayobs import DayObs @@ -27,6 +27,7 @@ from schedview.examples.visitmap import make_visit_map from schedview.examples.visitparam import make_visit_param_vs_time_plot from schedview.examples.visittable import make_visit_table +from schedview.testing.sample_data import get_sample_data_path astropy.utils.iers.conf.iers_degraded_accuracy = "ignore" @@ -36,6 +37,9 @@ class TestExamples(unittest.TestCase): + def setUp(self): + bokeh.io.reset_output() + def test_nightevents(self): with TemporaryDirectory() as dir: report = str(Path(dir).joinpath("nightevents.txt")) @@ -80,18 +84,14 @@ def test_visitparam(self): def test_surveyrewards(self): with TemporaryDirectory() as dir: report = str(Path(dir).joinpath("surveyrewards.html")) - rewards_uri: str = str( - importlib.resources.files("schedview").joinpath("data").joinpath("sample_rewards.h5") - ) + rewards_uri: str = str(get_sample_data_path("sample_rewards.h5")) make_survey_reward_plot(TEST_ISO_DATE, rewards_uri, report=report) assert os.path.exists(report) def test_bfrewards(self): with TemporaryDirectory() as dir: report = str(Path(dir).joinpath("surveyrewards.html")) - rewards_uri: str = str( - importlib.resources.files("schedview").joinpath("data").joinpath("sample_rewards.h5") - ) + rewards_uri: str = str(get_sample_data_path("sample_rewards.h5")) make_basis_function_reward_plot(TEST_ISO_DATE, rewards_uri, report=report) assert os.path.exists(report) diff --git a/tests/test_nightly.py b/tests/test_nightly.py index 7d3d012b..a562de28 100644 --- a/tests/test_nightly.py +++ b/tests/test_nightly.py @@ -1,4 +1,3 @@ -import importlib.resources import unittest from pathlib import Path from tempfile import TemporaryDirectory @@ -11,10 +10,11 @@ import schedview import schedview.plot.nightly +from schedview.testing.sample_data import get_sample_data_path def _load_sample_visits(): - visits_path = importlib.resources.files(schedview).joinpath("data").joinpath("sample_opsim.db") + visits_path = str(get_sample_data_path("sample_opsim.db")) visits = pd.DataFrame(SchemaConverter().opsim2obs(visits_path)) if "observationStartMJD" not in visits.columns and "mjd" in visits.columns: visits["observationStartMJD"] = visits["mjd"] @@ -40,11 +40,13 @@ def test_plot_airmass_vs_time(self): fig = schedview.plot.nightly.plot_airmass_vs_time(visits, almanac_events) + bokeh.io.reset_output() with TemporaryDirectory() as dir: out_path = Path(dir) saved_html_fname = out_path.joinpath("test_page.html") bokeh.plotting.output_file(filename=saved_html_fname, title="Test Page") bokeh.plotting.save(fig) + bokeh.io.reset_output() if __name__ == "__main__": diff --git a/tests/test_plot_rewards.py b/tests/test_plot_rewards.py index 116c3bae..01e8328e 100644 --- a/tests/test_plot_rewards.py +++ b/tests/test_plot_rewards.py @@ -1,4 +1,3 @@ -import importlib.resources import os import time import unittest @@ -7,9 +6,9 @@ import bokeh -import schedview import schedview.collect.rewards import schedview.plot.rewards +from schedview.testing.sample_data import get_sample_data_path WRITE_TIMEOUT_SECONDS = 20 @@ -35,8 +34,9 @@ def verify_can_plot(self, plot): time.sleep(1) def setUp(self): - rewards_rp = importlib.resources.files("schedview").joinpath("data").joinpath("sample_rewards.h5") - self.rewards_df, self.obs_reward = schedview.collect.read_rewards(rewards_rp) + self.rewards_df, self.obs_reward = schedview.collect.read_rewards( + str(get_sample_data_path("sample_rewards.h5")) + ) self.tier = 3 self.day_obs_mjd = int(self.rewards_df["queue_start_mjd"].min() - 0.5) diff --git a/tests/test_plot_survey_skyproj.py b/tests/test_plot_survey_skyproj.py index ac564f5a..bd0f8bbc 100644 --- a/tests/test_plot_survey_skyproj.py +++ b/tests/test_plot_survey_skyproj.py @@ -6,7 +6,6 @@ import numpy as np from astropy.coordinates import SkyCoord from astropy.time import Time -from lsst.resources import ResourcePath from rubin_scheduler.scheduler.model_observatory import ModelObservatory import schedview @@ -18,6 +17,7 @@ map_healpix, map_visits_over_healpix, ) +from schedview.testing.sample_data import get_sample_data_path RANDOM_SEED = 6563 @@ -45,8 +45,7 @@ def test_compute_circle_points(): def test_map_visits_over_healpix(): hp_map = np.random.default_rng(RANDOM_SEED).uniform(0, 1, hp.nside2npix(4)) - visits_path = ResourcePath("resource://schedview/data/sample_opsim.db") - visits = schedview.collect.read_opsim(visits_path) + visits = schedview.collect.read_opsim(str(get_sample_data_path("sample_opsim.db"))) visits_mjd = visits["observationStartMJD"].median() time_datetime = Time(visits_mjd - 0.5, format="mjd").datetime @@ -64,8 +63,7 @@ def test_create_hpix_visit_map_grid(): for band in "ugrizy": hpix_maps[band] = np.random.default_rng(RANDOM_SEED).uniform(0, 1, hp.nside2npix(4)) - visits_path = ResourcePath("resource://schedview/data/sample_opsim.db") - visits = schedview.collect.read_opsim(visits_path) + visits = schedview.collect.read_opsim(str(get_sample_data_path("sample_opsim.db"))) visits_mjd = visits["observationStartMJD"].median() time_datetime = Time(visits_mjd - 0.5, format="mjd").datetime diff --git a/tests/test_resources.py b/tests/test_resources.py index 65073852..c6840802 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -8,6 +8,7 @@ from rubin_sim.data import get_baseline from schedview.collect import find_file_resources, read_ddf_visits, read_opsim, read_rewards +from schedview.testing.sample_data import get_sample_data_path class TestResources(unittest.TestCase): @@ -32,8 +33,7 @@ def test_find_file_resources(self): class TestCollectOpsim(unittest.TestCase): def test_read_opsim(self): - resource_path = ResourcePath("resource://schedview/data/") - visits = read_opsim(resource_path) + visits = read_opsim(str(get_sample_data_path("sample_opsim.db"))) self.assertTrue("airmass" in visits.columns) self.assertGreater(len(visits), 0) @@ -48,8 +48,7 @@ def test_read_ddf(self): class TestCollectRewards(unittest.TestCase): def test_read_opsim(self): - resource_path = ResourcePath("resource://schedview/data/") - rewards_df, obs_rewards = read_rewards(resource_path) + rewards_df, obs_rewards = read_rewards(str(get_sample_data_path("sample_rewards.h5"))) self.assertGreater(len(rewards_df), 0) self.assertGreater(len(obs_rewards), 0) self.assertTrue("survey_reward" in rewards_df.columns) diff --git a/tests/test_scheduler_dashboard.py b/tests/test_scheduler_dashboard.py index 8286e78d..496123a8 100644 --- a/tests/test_scheduler_dashboard.py +++ b/tests/test_scheduler_dashboard.py @@ -1,5 +1,4 @@ import functools -import importlib.resources import os import re import subprocess @@ -29,7 +28,6 @@ # Objects to test instances against. from rubin_scheduler.scheduler.schedulers.core_scheduler import CoreScheduler -import schedview from schedview.app.scheduler_dashboard.scheduler_dashboard_app import ( SchedulerSnapshotDashboard, scheduler_app, @@ -39,8 +37,9 @@ # Schedview methods. from schedview.compute.scheduler import make_scheduler_summary_df from schedview.compute.survey import compute_maps +from schedview.testing.sample_data import get_sample_data_path -TEST_PICKLE = str(importlib.resources.files(schedview).joinpath("data", "sample_scheduler.pickle.xz")) +TEST_PICKLE = str(get_sample_data_path("sample_scheduler.pickle.xz")) MJD_START = get_sky_brightness_date_bounds()[0] TEST_DATE = Time(MJD_START + 0.2, format="mjd").datetime DEFAULT_TIMEZONE = "America/Santiago" diff --git a/util/sample_data/README.md b/util/sample_data/README.md index 915c0d5e..f0422e92 100644 --- a/util/sample_data/README.md +++ b/util/sample_data/README.md @@ -11,10 +11,18 @@ python make_sample_test_data.py There is a `--help` option to describe optional parameters. -The primary use for it is to generated update sample data for use in +The primary use for it is to generate or update sample data for use in `${SCHEDVIEW_DIR}/schedview/data`. -To copy it into place: +Pytest now uses the same generation logic through +`tests/conftest.py`, which creates sample data in a local cache under +`.pytest_cache/` and points tests at that directory with +`SCHEDVIEW_SAMPLE_DATA_DIR` and `SCHED_PICKLE`. + +The script remains useful when you want to refresh the checked-in sample +artifacts manually. + +To copy the generated files into place: ``` cp sample_* ../../schedview/data diff --git a/util/sample_data/make_sample_test_data.py b/util/sample_data/make_sample_test_data.py index 5054ad5c..cfcbea6b 100644 --- a/util/sample_data/make_sample_test_data.py +++ b/util/sample_data/make_sample_test_data.py @@ -1,51 +1,6 @@ import argparse -import lzma -import pickle -import warnings -import numpy as np -from astropy.time import Time -from rubin_scheduler.scheduler import sim_runner -from rubin_scheduler.scheduler.example import example_scheduler -from rubin_scheduler.scheduler.model_observatory import ModelObservatory -from rubin_scheduler.scheduler.utils import SchemaConverter -from rubin_scheduler.utils import SURVEY_START_MJD - -DEFAULT_DATE = Time(SURVEY_START_MJD, format="mjd").iso[:10] - -# Several dependencies throw prodigious instances of (benign) warnings. -# Suppress them to avoid poluting the executed notebook. - -warnings.filterwarnings( - "ignore", - module="astropy.time", - message="Numerical value without unit or explicit format passed to TimeDelta, assuming days", -) -warnings.filterwarnings( - "ignore", - module="healpy", - message="divide by zero encountered in divide", -) -warnings.filterwarnings( - "ignore", - module="healpy", - message="invalid value encountered in multiply", -) -warnings.filterwarnings( - "ignore", - module="holoviews", - message="Discarding nonzero nanoseconds in conversion.", -) -warnings.filterwarnings( - "ignore", - module="rubin_scheduler", - message="invalid value encountered in arcsin", -) -warnings.filterwarnings( - "ignore", - module="rubin_scheduler", - message="All-NaN slice encountered", -) +from schedview.testing.sample_data import write_sample_data def make_sample_test_data(): @@ -71,8 +26,8 @@ def make_sample_test_data(): parser.add_argument( "--date", type=str, - default=DEFAULT_DATE, - help="Date of the night to simulate (YYYY-MM-DD).", + default=None, + help="Date of the night to simulate (YYYY-MM-DD). Defaults to the scheduler survey start night.", ) parser.add_argument( "--duration", @@ -82,61 +37,14 @@ def make_sample_test_data(): ) args = parser.parse_args() - opsim_output_fname = args.opsim_output_fname - scheduler_fname = args.scheduler_fname - rewards_fname = args.rewards_fname - evening_iso8601 = args.date - - # Set the start date, scheduler, and observatory for the night: - - observatory = ModelObservatory() - - # Set `evening_mjd` to the integer calendar MJD of the local calendar day - # on which sunset falls on the night of interest. - evening_mjd = Time(evening_iso8601).mjd - - # If we just use this day as the start and make the simulation duration 1 - # day, the begin and end of the simulation will probably begin in the - # middle on one night and end in the middle of the next. - # Instead, find the sunset and sunrise of the night we want using the - # almanac, and use these to determine our start time and duration. - - # If the date represents the local calendar date at sunset, we need to - # shift by the longitude in units of days - this_night = ( - np.floor(observatory.almanac.sunsets["sunset"] + observatory.site.longitude / 360) == evening_mjd + write_sample_data( + args.opsim_output_fname, + args.scheduler_fname, + args.rewards_fname, + date=args.date, + duration=args.duration, ) - sim_start_mjd = observatory.almanac.sunsets[this_night]["sun_n12_setting"][0] - sim_end_mjd = observatory.almanac.sunsets[this_night]["sunrise"][0] - - if args.duration is not None: - duration = args.duration / 24.0 - else: - duration = sim_end_mjd - sim_start_mjd - - observatory = ModelObservatory(mjd_start=sim_start_mjd) - - scheduler = example_scheduler(mjd_start=sim_start_mjd) - scheduler.keep_rewards = True - - observatory, scheduler, observations, reward_df, obs_rewards = sim_runner( - observatory, - scheduler, - sim_start_mjd=sim_start_mjd, - sim_duration=duration, - record_rewards=True, - ) - - SchemaConverter().obs2opsim(observations, filename=opsim_output_fname) - - with lzma.open(scheduler_fname, "wb", format=lzma.FORMAT_XZ) as pio: - sched_cond_tuple = (scheduler, scheduler.conditions) - pickle.dump(sched_cond_tuple, pio) - - reward_df.to_hdf(rewards_fname, "reward_df") - obs_rewards.to_hdf(rewards_fname, "obs_rewards") - if __name__ == "__main__": make_sample_test_data()