From ec2e38ff4dec626d3940f3720d3a2c4aa2f0f3d6 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 May 2026 10:13:33 -0500 Subject: [PATCH 01/10] Initial shot with LLM --- schedview/collect/scheduler_pickle.py | 9 ++ tests/test_examples.py | 14 +-- tests/test_nightly.py | 6 +- tests/test_plot_rewards.py | 8 +- tests/test_plot_survey_skyproj.py | 8 +- tests/test_resources.py | 7 +- tests/test_scheduler_dashboard.py | 4 +- util/sample_data/README.md | 13 ++- util/sample_data/make_sample_test_data.py | 106 ++-------------------- 9 files changed, 51 insertions(+), 124 deletions(-) diff --git a/schedview/collect/scheduler_pickle.py b/schedview/collect/scheduler_pickle.py index 981c5acb..44ed55f6 100644 --- a/schedview/collect/scheduler_pickle.py +++ b/schedview/collect/scheduler_pickle.py @@ -7,11 +7,14 @@ import os import pickle from collections.abc import Sequence +from pathlib import Path from lsst.resources import ResourcePath from rubin_scheduler.scheduler.model_observatory import ModelObservatory from rubin_scheduler.scheduler.schedulers.core_scheduler import CoreScheduler +from schedview.testing.sample_data import SAMPLE_DATA_DIR_ENV_VAR + try: PICKLE_FNAME = os.environ["SCHED_PICKLE"] except KeyError: @@ -124,6 +127,12 @@ def sample_pickle(base_fname="sample_scheduler.pickle.xz"): fname : `str` File name of the sample pickle. """ + sample_data_dir = os.environ.get(SAMPLE_DATA_DIR_ENV_VAR) + if sample_data_dir is not None: + sample_path = Path(sample_data_dir).joinpath(base_fname) + if sample_path.exists(): + return str(sample_path) + root_package = __package__.split(".")[0] try: diff --git a/tests/test_examples.py b/tests/test_examples.py index 7255d8ba..e938bd19 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -1,4 +1,3 @@ -import importlib.resources import os import unittest from pathlib import Path @@ -6,6 +5,7 @@ import astropy.utils.exceptions import astropy.utils.iers +import bokeh.io from rubin_scheduler.utils import SURVEY_START_MJD from schedview.dayobs import DayObs @@ -27,6 +27,7 @@ from schedview.examples.visitmap import make_visit_map from schedview.examples.visitparam import make_visit_param_vs_time_plot from schedview.examples.visittable import make_visit_table +from schedview.testing.sample_data import get_sample_data_path astropy.utils.iers.conf.iers_degraded_accuracy = "ignore" @@ -36,6 +37,9 @@ class TestExamples(unittest.TestCase): + def setUp(self): + bokeh.io.reset_output() + def test_nightevents(self): with TemporaryDirectory() as dir: report = str(Path(dir).joinpath("nightevents.txt")) @@ -80,18 +84,14 @@ def test_visitparam(self): def test_surveyrewards(self): with TemporaryDirectory() as dir: report = str(Path(dir).joinpath("surveyrewards.html")) - rewards_uri: str = str( - importlib.resources.files("schedview").joinpath("data").joinpath("sample_rewards.h5") - ) + rewards_uri: str = str(get_sample_data_path("sample_rewards.h5")) make_survey_reward_plot(TEST_ISO_DATE, rewards_uri, report=report) assert os.path.exists(report) def test_bfrewards(self): with TemporaryDirectory() as dir: report = str(Path(dir).joinpath("surveyrewards.html")) - rewards_uri: str = str( - importlib.resources.files("schedview").joinpath("data").joinpath("sample_rewards.h5") - ) + rewards_uri: str = str(get_sample_data_path("sample_rewards.h5")) make_basis_function_reward_plot(TEST_ISO_DATE, rewards_uri, report=report) assert os.path.exists(report) diff --git a/tests/test_nightly.py b/tests/test_nightly.py index 7d3d012b..a562de28 100644 --- a/tests/test_nightly.py +++ b/tests/test_nightly.py @@ -1,4 +1,3 @@ -import importlib.resources import unittest from pathlib import Path from tempfile import TemporaryDirectory @@ -11,10 +10,11 @@ import schedview import schedview.plot.nightly +from schedview.testing.sample_data import get_sample_data_path def _load_sample_visits(): - visits_path = importlib.resources.files(schedview).joinpath("data").joinpath("sample_opsim.db") + visits_path = str(get_sample_data_path("sample_opsim.db")) visits = pd.DataFrame(SchemaConverter().opsim2obs(visits_path)) if "observationStartMJD" not in visits.columns and "mjd" in visits.columns: visits["observationStartMJD"] = visits["mjd"] @@ -40,11 +40,13 @@ def test_plot_airmass_vs_time(self): fig = schedview.plot.nightly.plot_airmass_vs_time(visits, almanac_events) + bokeh.io.reset_output() with TemporaryDirectory() as dir: out_path = Path(dir) saved_html_fname = out_path.joinpath("test_page.html") bokeh.plotting.output_file(filename=saved_html_fname, title="Test Page") bokeh.plotting.save(fig) + bokeh.io.reset_output() if __name__ == "__main__": diff --git a/tests/test_plot_rewards.py b/tests/test_plot_rewards.py index 116c3bae..01e8328e 100644 --- a/tests/test_plot_rewards.py +++ b/tests/test_plot_rewards.py @@ -1,4 +1,3 @@ -import importlib.resources import os import time import unittest @@ -7,9 +6,9 @@ import bokeh -import schedview import schedview.collect.rewards import schedview.plot.rewards +from schedview.testing.sample_data import get_sample_data_path WRITE_TIMEOUT_SECONDS = 20 @@ -35,8 +34,9 @@ def verify_can_plot(self, plot): time.sleep(1) def setUp(self): - rewards_rp = importlib.resources.files("schedview").joinpath("data").joinpath("sample_rewards.h5") - self.rewards_df, self.obs_reward = schedview.collect.read_rewards(rewards_rp) + self.rewards_df, self.obs_reward = schedview.collect.read_rewards( + str(get_sample_data_path("sample_rewards.h5")) + ) self.tier = 3 self.day_obs_mjd = int(self.rewards_df["queue_start_mjd"].min() - 0.5) diff --git a/tests/test_plot_survey_skyproj.py b/tests/test_plot_survey_skyproj.py index ac564f5a..bd0f8bbc 100644 --- a/tests/test_plot_survey_skyproj.py +++ b/tests/test_plot_survey_skyproj.py @@ -6,7 +6,6 @@ import numpy as np from astropy.coordinates import SkyCoord from astropy.time import Time -from lsst.resources import ResourcePath from rubin_scheduler.scheduler.model_observatory import ModelObservatory import schedview @@ -18,6 +17,7 @@ map_healpix, map_visits_over_healpix, ) +from schedview.testing.sample_data import get_sample_data_path RANDOM_SEED = 6563 @@ -45,8 +45,7 @@ def test_compute_circle_points(): def test_map_visits_over_healpix(): hp_map = np.random.default_rng(RANDOM_SEED).uniform(0, 1, hp.nside2npix(4)) - visits_path = ResourcePath("resource://schedview/data/sample_opsim.db") - visits = schedview.collect.read_opsim(visits_path) + visits = schedview.collect.read_opsim(str(get_sample_data_path("sample_opsim.db"))) visits_mjd = visits["observationStartMJD"].median() time_datetime = Time(visits_mjd - 0.5, format="mjd").datetime @@ -64,8 +63,7 @@ def test_create_hpix_visit_map_grid(): for band in "ugrizy": hpix_maps[band] = np.random.default_rng(RANDOM_SEED).uniform(0, 1, hp.nside2npix(4)) - visits_path = ResourcePath("resource://schedview/data/sample_opsim.db") - visits = schedview.collect.read_opsim(visits_path) + visits = schedview.collect.read_opsim(str(get_sample_data_path("sample_opsim.db"))) visits_mjd = visits["observationStartMJD"].median() time_datetime = Time(visits_mjd - 0.5, format="mjd").datetime diff --git a/tests/test_resources.py b/tests/test_resources.py index 65073852..d614874b 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -8,6 +8,7 @@ from rubin_sim.data import get_baseline from schedview.collect import find_file_resources, read_ddf_visits, read_opsim, read_rewards +from schedview.testing.sample_data import get_sample_data_dir class TestResources(unittest.TestCase): @@ -32,8 +33,7 @@ def test_find_file_resources(self): class TestCollectOpsim(unittest.TestCase): def test_read_opsim(self): - resource_path = ResourcePath("resource://schedview/data/") - visits = read_opsim(resource_path) + visits = read_opsim(str(get_sample_data_dir())) self.assertTrue("airmass" in visits.columns) self.assertGreater(len(visits), 0) @@ -48,8 +48,7 @@ def test_read_ddf(self): class TestCollectRewards(unittest.TestCase): def test_read_opsim(self): - resource_path = ResourcePath("resource://schedview/data/") - rewards_df, obs_rewards = read_rewards(resource_path) + rewards_df, obs_rewards = read_rewards(str(get_sample_data_dir())) self.assertGreater(len(rewards_df), 0) self.assertGreater(len(obs_rewards), 0) self.assertTrue("survey_reward" in rewards_df.columns) diff --git a/tests/test_scheduler_dashboard.py b/tests/test_scheduler_dashboard.py index 8286e78d..c51c59ab 100644 --- a/tests/test_scheduler_dashboard.py +++ b/tests/test_scheduler_dashboard.py @@ -1,5 +1,4 @@ import functools -import importlib.resources import os import re import subprocess @@ -35,12 +34,13 @@ scheduler_app, ) from schedview.app.scheduler_dashboard.utils import get_sky_brightness_date_bounds +from schedview.testing.sample_data import get_sample_data_path # Schedview methods. from schedview.compute.scheduler import make_scheduler_summary_df from schedview.compute.survey import compute_maps -TEST_PICKLE = str(importlib.resources.files(schedview).joinpath("data", "sample_scheduler.pickle.xz")) +TEST_PICKLE = str(get_sample_data_path("sample_scheduler.pickle.xz")) MJD_START = get_sky_brightness_date_bounds()[0] TEST_DATE = Time(MJD_START + 0.2, format="mjd").datetime DEFAULT_TIMEZONE = "America/Santiago" diff --git a/util/sample_data/README.md b/util/sample_data/README.md index 915c0d5e..d66448e5 100644 --- a/util/sample_data/README.md +++ b/util/sample_data/README.md @@ -11,11 +11,20 @@ python make_sample_test_data.py There is a `--help` option to describe optional parameters. -The primary use for it is to generated update sample data for use in +The primary use for it is to generate or update sample data for use in `${SCHEDVIEW_DIR}/schedview/data`. -To copy it into place: +Pytest now uses the same generation logic through +`tests/conftest.py`, which creates sample data in a local cache under +`.pytest_cache/` and points tests at that directory with +`SCHEDVIEW_SAMPLE_DATA_DIR` and `SCHED_PICKLE`. + +The script remains useful when you want to refresh the checked-in sample +artifacts manually. + +To copy the generated files into place: ``` cp sample_* ../../schedview/data +cp sim_metadata.yaml ../../schedview/data ``` diff --git a/util/sample_data/make_sample_test_data.py b/util/sample_data/make_sample_test_data.py index 5054ad5c..bf2cbc81 100644 --- a/util/sample_data/make_sample_test_data.py +++ b/util/sample_data/make_sample_test_data.py @@ -1,51 +1,8 @@ import argparse -import lzma -import pickle -import warnings -import numpy as np -from astropy.time import Time -from rubin_scheduler.scheduler import sim_runner -from rubin_scheduler.scheduler.example import example_scheduler -from rubin_scheduler.scheduler.model_observatory import ModelObservatory -from rubin_scheduler.scheduler.utils import SchemaConverter -from rubin_scheduler.utils import SURVEY_START_MJD +from schedview.testing.sample_data import _default_sample_date, write_sample_data -DEFAULT_DATE = Time(SURVEY_START_MJD, format="mjd").iso[:10] - -# Several dependencies throw prodigious instances of (benign) warnings. -# Suppress them to avoid poluting the executed notebook. - -warnings.filterwarnings( - "ignore", - module="astropy.time", - message="Numerical value without unit or explicit format passed to TimeDelta, assuming days", -) -warnings.filterwarnings( - "ignore", - module="healpy", - message="divide by zero encountered in divide", -) -warnings.filterwarnings( - "ignore", - module="healpy", - message="invalid value encountered in multiply", -) -warnings.filterwarnings( - "ignore", - module="holoviews", - message="Discarding nonzero nanoseconds in conversion.", -) -warnings.filterwarnings( - "ignore", - module="rubin_scheduler", - message="invalid value encountered in arcsin", -) -warnings.filterwarnings( - "ignore", - module="rubin_scheduler", - message="All-NaN slice encountered", -) +DEFAULT_DATE = _default_sample_date() def make_sample_test_data(): @@ -82,61 +39,14 @@ def make_sample_test_data(): ) args = parser.parse_args() - opsim_output_fname = args.opsim_output_fname - scheduler_fname = args.scheduler_fname - rewards_fname = args.rewards_fname - evening_iso8601 = args.date - - # Set the start date, scheduler, and observatory for the night: - - observatory = ModelObservatory() - - # Set `evening_mjd` to the integer calendar MJD of the local calendar day - # on which sunset falls on the night of interest. - evening_mjd = Time(evening_iso8601).mjd - - # If we just use this day as the start and make the simulation duration 1 - # day, the begin and end of the simulation will probably begin in the - # middle on one night and end in the middle of the next. - # Instead, find the sunset and sunrise of the night we want using the - # almanac, and use these to determine our start time and duration. - - # If the date represents the local calendar date at sunset, we need to - # shift by the longitude in units of days - this_night = ( - np.floor(observatory.almanac.sunsets["sunset"] + observatory.site.longitude / 360) == evening_mjd - ) - - sim_start_mjd = observatory.almanac.sunsets[this_night]["sun_n12_setting"][0] - sim_end_mjd = observatory.almanac.sunsets[this_night]["sunrise"][0] - - if args.duration is not None: - duration = args.duration / 24.0 - else: - duration = sim_end_mjd - sim_start_mjd - - observatory = ModelObservatory(mjd_start=sim_start_mjd) - - scheduler = example_scheduler(mjd_start=sim_start_mjd) - scheduler.keep_rewards = True - - observatory, scheduler, observations, reward_df, obs_rewards = sim_runner( - observatory, - scheduler, - sim_start_mjd=sim_start_mjd, - sim_duration=duration, - record_rewards=True, + write_sample_data( + args.opsim_output_fname, + args.scheduler_fname, + args.rewards_fname, + date=args.date, + duration=args.duration, ) - SchemaConverter().obs2opsim(observations, filename=opsim_output_fname) - - with lzma.open(scheduler_fname, "wb", format=lzma.FORMAT_XZ) as pio: - sched_cond_tuple = (scheduler, scheduler.conditions) - pickle.dump(sched_cond_tuple, pio) - - reward_df.to_hdf(rewards_fname, "reward_df") - obs_rewards.to_hdf(rewards_fname, "obs_rewards") - if __name__ == "__main__": make_sample_test_data() From 5b4ab36c172adf9634295635b6954b7bcee1b627 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 May 2026 10:27:27 -0500 Subject: [PATCH 02/10] Remove use of sim_metadata.yaml in sample data --- tests/test_resources.py | 6 +++--- util/sample_data/README.md | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_resources.py b/tests/test_resources.py index d614874b..c6840802 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -8,7 +8,7 @@ from rubin_sim.data import get_baseline from schedview.collect import find_file_resources, read_ddf_visits, read_opsim, read_rewards -from schedview.testing.sample_data import get_sample_data_dir +from schedview.testing.sample_data import get_sample_data_path class TestResources(unittest.TestCase): @@ -33,7 +33,7 @@ def test_find_file_resources(self): class TestCollectOpsim(unittest.TestCase): def test_read_opsim(self): - visits = read_opsim(str(get_sample_data_dir())) + visits = read_opsim(str(get_sample_data_path("sample_opsim.db"))) self.assertTrue("airmass" in visits.columns) self.assertGreater(len(visits), 0) @@ -48,7 +48,7 @@ def test_read_ddf(self): class TestCollectRewards(unittest.TestCase): def test_read_opsim(self): - rewards_df, obs_rewards = read_rewards(str(get_sample_data_dir())) + rewards_df, obs_rewards = read_rewards(str(get_sample_data_path("sample_rewards.h5"))) self.assertGreater(len(rewards_df), 0) self.assertGreater(len(obs_rewards), 0) self.assertTrue("survey_reward" in rewards_df.columns) diff --git a/util/sample_data/README.md b/util/sample_data/README.md index d66448e5..f0422e92 100644 --- a/util/sample_data/README.md +++ b/util/sample_data/README.md @@ -26,5 +26,4 @@ To copy the generated files into place: ``` cp sample_* ../../schedview/data -cp sim_metadata.yaml ../../schedview/data ``` From 7bcf83a033c23951e6651dcd7db7739dd7c49c04 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 May 2026 10:31:32 -0500 Subject: [PATCH 03/10] New files needed --- schedview/testing/__init__.py | 21 +++ schedview/testing/sample_data.py | 225 +++++++++++++++++++++++++++++++ tests/conftest.py | 63 +++++++++ 3 files changed, 309 insertions(+) create mode 100644 schedview/testing/__init__.py create mode 100644 schedview/testing/sample_data.py create mode 100644 tests/conftest.py diff --git a/schedview/testing/__init__.py b/schedview/testing/__init__.py new file mode 100644 index 00000000..6380bb35 --- /dev/null +++ b/schedview/testing/__init__.py @@ -0,0 +1,21 @@ +from .sample_data import ( + CACHE_SCHEMA_VERSION, + SAMPLE_DATA_DIR_ENV_VAR, + SAMPLE_PICKLE_ENV_VAR, + ensure_cached_sample_data, + generate_sample_data_dir, + get_sample_data_dir, + get_sample_data_path, + write_sample_data, +) + +__all__ = [ + "CACHE_SCHEMA_VERSION", + "SAMPLE_DATA_DIR_ENV_VAR", + "SAMPLE_PICKLE_ENV_VAR", + "ensure_cached_sample_data", + "generate_sample_data_dir", + "get_sample_data_dir", + "get_sample_data_path", + "write_sample_data", +] diff --git a/schedview/testing/sample_data.py b/schedview/testing/sample_data.py new file mode 100644 index 00000000..914c6627 --- /dev/null +++ b/schedview/testing/sample_data.py @@ -0,0 +1,225 @@ +from __future__ import annotations + +import hashlib +import importlib.resources +import json +import lzma +import os +import pickle +import shutil +import sys +import warnings +from importlib.metadata import PackageNotFoundError, version +from pathlib import Path + +import numpy as np + +SAMPLE_DATA_DIR_ENV_VAR = "SCHEDVIEW_SAMPLE_DATA_DIR" +SAMPLE_PICKLE_ENV_VAR = "SCHED_PICKLE" +CACHE_SCHEMA_VERSION = 1 +SAMPLE_OPSIM_DB = "sample_opsim.db" +SAMPLE_REWARDS_H5 = "sample_rewards.h5" +SAMPLE_SCHEDULER_PICKLE = "sample_scheduler.pickle.xz" +MANIFEST_JSON = "manifest.json" +SAMPLE_DATA_FILE_NAMES = ( + SAMPLE_OPSIM_DB, + SAMPLE_REWARDS_H5, + SAMPLE_SCHEDULER_PICKLE, +) + + +def get_sample_data_dir() -> Path: + """Return the directory holding sample test data""" + override_dir = os.environ.get(SAMPLE_DATA_DIR_ENV_VAR) + if override_dir: + return Path(override_dir) + + root_package = __package__.split(".")[0] + return Path(str(importlib.resources.files(root_package).joinpath("data"))) + + + +def get_sample_data_path(file_name: str) -> Path: + """Return the path to a sample test data artifact""" + return get_sample_data_dir().joinpath(file_name) + + + +def _get_package_version(package_name: str) -> str: + try: + return version(package_name) + except PackageNotFoundError: + return "unknown" + + + +def _default_sample_date() -> str: + from astropy.time import Time + from rubin_scheduler.utils import SURVEY_START_MJD + + return Time(SURVEY_START_MJD, format="mjd").iso[:10] + + + +def _configure_generation_warnings() -> None: + warnings.filterwarnings( + "ignore", + module="astropy.time", + message="Numerical value without unit or explicit format passed to TimeDelta, assuming days", + ) + warnings.filterwarnings( + "ignore", + module="healpy", + message="divide by zero encountered in divide", + ) + warnings.filterwarnings( + "ignore", + module="healpy", + message="invalid value encountered in multiply", + ) + warnings.filterwarnings( + "ignore", + module="holoviews", + message="Discarding nonzero nanoseconds in conversion.", + ) + warnings.filterwarnings( + "ignore", + module="rubin_scheduler", + message="invalid value encountered in arcsin", + ) + warnings.filterwarnings( + "ignore", + module="rubin_scheduler", + message="All-NaN slice encountered", + ) + + + +def _manifest(date: str | None = None, duration: int | None = None) -> dict[str, object]: + resolved_date = _default_sample_date() if date is None else date + source_hash = hashlib.sha256(Path(__file__).read_bytes()).hexdigest() + return { + "cache_schema_version": CACHE_SCHEMA_VERSION, + "python": ".".join(str(part) for part in sys.version_info[:2]), + "rubin_scheduler": _get_package_version("rubin-scheduler"), + "rubin_sim": _get_package_version("rubin-sim"), + "date": resolved_date, + "duration_hours": duration, + "generator_source_hash": source_hash, + "file_names": list(SAMPLE_DATA_FILE_NAMES), + } + + + +def write_sample_data( + opsim_output_path: str | Path, + scheduler_output_path: str | Path, + rewards_output_path: str | Path, + *, + date: str | None = None, + duration: int | None = None, +) -> dict[str, Path]: + """Write sample test data artifacts""" + from astropy.time import Time + from rubin_scheduler.scheduler import sim_runner + from rubin_scheduler.scheduler.example import example_scheduler + from rubin_scheduler.scheduler.model_observatory import ModelObservatory + from rubin_scheduler.scheduler.utils import SchemaConverter + + _configure_generation_warnings() + + resolved_date = _default_sample_date() if date is None else date + opsim_output_path = Path(opsim_output_path) + scheduler_output_path = Path(scheduler_output_path) + rewards_output_path = Path(rewards_output_path) + + for output_path in (opsim_output_path, scheduler_output_path, rewards_output_path): + output_path.parent.mkdir(parents=True, exist_ok=True) + + observatory = ModelObservatory() + evening_mjd = Time(resolved_date).mjd + this_night = np.floor(observatory.almanac.sunsets["sunset"] + observatory.site.longitude / 360) == evening_mjd + sim_start_mjd = observatory.almanac.sunsets[this_night]["sun_n12_setting"][0] + sim_end_mjd = observatory.almanac.sunsets[this_night]["sunrise"][0] + sim_duration = duration / 24.0 if duration is not None else sim_end_mjd - sim_start_mjd + + observatory = ModelObservatory(mjd_start=sim_start_mjd) + scheduler = example_scheduler(mjd_start=sim_start_mjd) + scheduler.keep_rewards = True + + observatory, scheduler, observations, reward_df, obs_rewards = sim_runner( + observatory, + scheduler, + sim_start_mjd=sim_start_mjd, + sim_duration=sim_duration, + record_rewards=True, + ) + + SchemaConverter().obs2opsim(observations, filename=str(opsim_output_path)) + + with lzma.open(scheduler_output_path, "wb", format=lzma.FORMAT_XZ) as pickle_io: + pickle.dump((scheduler, scheduler.conditions), pickle_io) + + reward_df.to_hdf(str(rewards_output_path), key="reward_df") + obs_rewards.to_hdf(str(rewards_output_path), key="obs_rewards") + + return { + SAMPLE_OPSIM_DB: opsim_output_path, + SAMPLE_SCHEDULER_PICKLE: scheduler_output_path, + SAMPLE_REWARDS_H5: rewards_output_path, + } + + + +def generate_sample_data_dir( + output_dir: str | Path, + *, + date: str | None = None, + duration: int | None = None, +) -> Path: + """Generate a complete sample test data directory""" + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + write_sample_data( + output_dir.joinpath(SAMPLE_OPSIM_DB), + output_dir.joinpath(SAMPLE_SCHEDULER_PICKLE), + output_dir.joinpath(SAMPLE_REWARDS_H5), + date=date, + duration=duration, + ) + return output_dir + + + +def ensure_cached_sample_data( + cache_root: str | Path, + *, + date: str | None = None, + duration: int | None = None, +) -> Path: + """Return a cached directory of generated sample test data""" + cache_root = Path(cache_root) + cache_root.mkdir(parents=True, exist_ok=True) + + manifest = _manifest(date=date, duration=duration) + manifest_json = json.dumps(manifest, sort_keys=True) + digest = hashlib.sha256(manifest_json.encode("utf-8")).hexdigest()[:16] + cache_dir = cache_root.joinpath(digest) + manifest_path = cache_dir.joinpath(MANIFEST_JSON) + required_paths = [cache_dir.joinpath(file_name) for file_name in SAMPLE_DATA_FILE_NAMES] + + if manifest_path.exists() and all(path.exists() for path in required_paths): + cached_manifest = json.loads(manifest_path.read_text()) + if cached_manifest == manifest: + return cache_dir + + if cache_dir.exists(): + shutil.rmtree(cache_dir) + + generate_sample_data_dir( + cache_dir, + date=manifest["date"], + duration=duration, + ) + manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n") + return cache_dir diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..d7e033ec --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + +from schedview.testing.sample_data import ( + SAMPLE_DATA_DIR_ENV_VAR, + SAMPLE_PICKLE_ENV_VAR, + SAMPLE_SCHEDULER_PICKLE, + ensure_cached_sample_data, +) + + + +def _resolve_sample_data_dir(root_path: Path) -> Path: + override_dir = os.environ.get(SAMPLE_DATA_DIR_ENV_VAR) + if override_dir: + sample_data_dir = Path(override_dir) + else: + cache_root = root_path.joinpath(".pytest_cache", "schedview-sample-data") + sample_data_dir = ensure_cached_sample_data(cache_root) + + if not sample_data_dir.exists(): + raise pytest.UsageError(f"Sample data directory does not exist: {sample_data_dir}") + + sample_pickle = sample_data_dir.joinpath(SAMPLE_SCHEDULER_PICKLE) + if not sample_pickle.exists(): + raise pytest.UsageError(f"Sample scheduler pickle does not exist: {sample_pickle}") + + os.environ[SAMPLE_DATA_DIR_ENV_VAR] = str(sample_data_dir) + os.environ[SAMPLE_PICKLE_ENV_VAR] = str(sample_pickle) + return sample_data_dir + + + +def pytest_configure(config: pytest.Config) -> None: + _resolve_sample_data_dir(Path(config.rootpath)) + + + +@pytest.fixture(scope="session") +def sample_data_dir(pytestconfig: pytest.Config) -> Path: + return _resolve_sample_data_dir(Path(pytestconfig.rootpath)) + + + +@pytest.fixture(scope="session") +def sample_opsim_path(sample_data_dir: Path) -> Path: + return sample_data_dir.joinpath("sample_opsim.db") + + + +@pytest.fixture(scope="session") +def sample_rewards_path(sample_data_dir: Path) -> Path: + return sample_data_dir.joinpath("sample_rewards.h5") + + + +@pytest.fixture(scope="session") +def sample_scheduler_pickle_path(sample_data_dir: Path) -> Path: + return sample_data_dir.joinpath(SAMPLE_SCHEDULER_PICKLE) From b5264b98b610a35d7e188b4138d3d590ba1239fb Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 May 2026 11:05:10 -0500 Subject: [PATCH 04/10] API cleanup --- schedview/testing/__init__.py | 16 +---- schedview/testing/sample_data.py | 77 +++++++++++++---------- tests/conftest.py | 8 +-- util/sample_data/make_sample_test_data.py | 8 +-- 4 files changed, 50 insertions(+), 59 deletions(-) diff --git a/schedview/testing/__init__.py b/schedview/testing/__init__.py index 6380bb35..26822004 100644 --- a/schedview/testing/__init__.py +++ b/schedview/testing/__init__.py @@ -1,21 +1,7 @@ -from .sample_data import ( - CACHE_SCHEMA_VERSION, - SAMPLE_DATA_DIR_ENV_VAR, - SAMPLE_PICKLE_ENV_VAR, - ensure_cached_sample_data, - generate_sample_data_dir, - get_sample_data_dir, - get_sample_data_path, - write_sample_data, -) +from .sample_data import ensure_cached_sample_data, get_sample_data_path, write_sample_data __all__ = [ - "CACHE_SCHEMA_VERSION", - "SAMPLE_DATA_DIR_ENV_VAR", - "SAMPLE_PICKLE_ENV_VAR", "ensure_cached_sample_data", - "generate_sample_data_dir", - "get_sample_data_dir", "get_sample_data_path", "write_sample_data", ] diff --git a/schedview/testing/sample_data.py b/schedview/testing/sample_data.py index 914c6627..b6143806 100644 --- a/schedview/testing/sample_data.py +++ b/schedview/testing/sample_data.py @@ -16,19 +16,27 @@ SAMPLE_DATA_DIR_ENV_VAR = "SCHEDVIEW_SAMPLE_DATA_DIR" SAMPLE_PICKLE_ENV_VAR = "SCHED_PICKLE" -CACHE_SCHEMA_VERSION = 1 -SAMPLE_OPSIM_DB = "sample_opsim.db" -SAMPLE_REWARDS_H5 = "sample_rewards.h5" -SAMPLE_SCHEDULER_PICKLE = "sample_scheduler.pickle.xz" -MANIFEST_JSON = "manifest.json" -SAMPLE_DATA_FILE_NAMES = ( - SAMPLE_OPSIM_DB, - SAMPLE_REWARDS_H5, - SAMPLE_SCHEDULER_PICKLE, +_CACHE_SCHEMA_VERSION = 1 +_SAMPLE_OPSIM_DB = "sample_opsim.db" +_SAMPLE_REWARDS_H5 = "sample_rewards.h5" +_SAMPLE_SCHEDULER_PICKLE = "sample_scheduler.pickle.xz" +_MANIFEST_JSON = "manifest.json" +_SAMPLE_DATA_FILE_NAMES = ( + _SAMPLE_OPSIM_DB, + _SAMPLE_REWARDS_H5, + _SAMPLE_SCHEDULER_PICKLE, ) +__all__ = [ + "SAMPLE_DATA_DIR_ENV_VAR", + "SAMPLE_PICKLE_ENV_VAR", + "ensure_cached_sample_data", + "get_sample_data_path", + "write_sample_data", +] -def get_sample_data_dir() -> Path: + +def _get_sample_data_dir() -> Path: """Return the directory holding sample test data""" override_dir = os.environ.get(SAMPLE_DATA_DIR_ENV_VAR) if override_dir: @@ -41,15 +49,7 @@ def get_sample_data_dir() -> Path: def get_sample_data_path(file_name: str) -> Path: """Return the path to a sample test data artifact""" - return get_sample_data_dir().joinpath(file_name) - - - -def _get_package_version(package_name: str) -> str: - try: - return version(package_name) - except PackageNotFoundError: - return "unknown" + return _get_sample_data_dir().joinpath(file_name) @@ -98,15 +98,26 @@ def _configure_generation_warnings() -> None: def _manifest(date: str | None = None, duration: int | None = None) -> dict[str, object]: resolved_date = _default_sample_date() if date is None else date source_hash = hashlib.sha256(Path(__file__).read_bytes()).hexdigest() + + try: + rubin_scheduler_version = version("rubin-scheduler") + except PackageNotFoundError: + rubin_scheduler_version = "unknown" + + try: + rubin_sim_version = version("rubin-sim") + except PackageNotFoundError: + rubin_sim_version = "unknown" + return { - "cache_schema_version": CACHE_SCHEMA_VERSION, + "cache_schema_version": _CACHE_SCHEMA_VERSION, "python": ".".join(str(part) for part in sys.version_info[:2]), - "rubin_scheduler": _get_package_version("rubin-scheduler"), - "rubin_sim": _get_package_version("rubin-sim"), + "rubin_scheduler": rubin_scheduler_version, + "rubin_sim": rubin_sim_version, "date": resolved_date, "duration_hours": duration, "generator_source_hash": source_hash, - "file_names": list(SAMPLE_DATA_FILE_NAMES), + "file_names": list(_SAMPLE_DATA_FILE_NAMES), } @@ -164,14 +175,14 @@ def write_sample_data( obs_rewards.to_hdf(str(rewards_output_path), key="obs_rewards") return { - SAMPLE_OPSIM_DB: opsim_output_path, - SAMPLE_SCHEDULER_PICKLE: scheduler_output_path, - SAMPLE_REWARDS_H5: rewards_output_path, + _SAMPLE_OPSIM_DB: opsim_output_path, + _SAMPLE_SCHEDULER_PICKLE: scheduler_output_path, + _SAMPLE_REWARDS_H5: rewards_output_path, } -def generate_sample_data_dir( +def _generate_sample_data_dir( output_dir: str | Path, *, date: str | None = None, @@ -181,9 +192,9 @@ def generate_sample_data_dir( output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) write_sample_data( - output_dir.joinpath(SAMPLE_OPSIM_DB), - output_dir.joinpath(SAMPLE_SCHEDULER_PICKLE), - output_dir.joinpath(SAMPLE_REWARDS_H5), + output_dir.joinpath(_SAMPLE_OPSIM_DB), + output_dir.joinpath(_SAMPLE_SCHEDULER_PICKLE), + output_dir.joinpath(_SAMPLE_REWARDS_H5), date=date, duration=duration, ) @@ -205,8 +216,8 @@ def ensure_cached_sample_data( manifest_json = json.dumps(manifest, sort_keys=True) digest = hashlib.sha256(manifest_json.encode("utf-8")).hexdigest()[:16] cache_dir = cache_root.joinpath(digest) - manifest_path = cache_dir.joinpath(MANIFEST_JSON) - required_paths = [cache_dir.joinpath(file_name) for file_name in SAMPLE_DATA_FILE_NAMES] + manifest_path = cache_dir.joinpath(_MANIFEST_JSON) + required_paths = [cache_dir.joinpath(file_name) for file_name in _SAMPLE_DATA_FILE_NAMES] if manifest_path.exists() and all(path.exists() for path in required_paths): cached_manifest = json.loads(manifest_path.read_text()) @@ -216,7 +227,7 @@ def ensure_cached_sample_data( if cache_dir.exists(): shutil.rmtree(cache_dir) - generate_sample_data_dir( + _generate_sample_data_dir( cache_dir, date=manifest["date"], duration=duration, diff --git a/tests/conftest.py b/tests/conftest.py index d7e033ec..13dd7ced 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,13 +5,9 @@ import pytest -from schedview.testing.sample_data import ( - SAMPLE_DATA_DIR_ENV_VAR, - SAMPLE_PICKLE_ENV_VAR, - SAMPLE_SCHEDULER_PICKLE, - ensure_cached_sample_data, -) +from schedview.testing.sample_data import SAMPLE_DATA_DIR_ENV_VAR, SAMPLE_PICKLE_ENV_VAR, ensure_cached_sample_data +SAMPLE_SCHEDULER_PICKLE = "sample_scheduler.pickle.xz" def _resolve_sample_data_dir(root_path: Path) -> Path: diff --git a/util/sample_data/make_sample_test_data.py b/util/sample_data/make_sample_test_data.py index bf2cbc81..cfcbea6b 100644 --- a/util/sample_data/make_sample_test_data.py +++ b/util/sample_data/make_sample_test_data.py @@ -1,8 +1,6 @@ import argparse -from schedview.testing.sample_data import _default_sample_date, write_sample_data - -DEFAULT_DATE = _default_sample_date() +from schedview.testing.sample_data import write_sample_data def make_sample_test_data(): @@ -28,8 +26,8 @@ def make_sample_test_data(): parser.add_argument( "--date", type=str, - default=DEFAULT_DATE, - help="Date of the night to simulate (YYYY-MM-DD).", + default=None, + help="Date of the night to simulate (YYYY-MM-DD). Defaults to the scheduler survey start night.", ) parser.add_argument( "--duration", From 701cb6f7907a789ae027ce8d427c2ddb7156306e Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 May 2026 11:07:19 -0500 Subject: [PATCH 05/10] Remove rubin_sim dependency in sample data manifest --- schedview/testing/sample_data.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/schedview/testing/sample_data.py b/schedview/testing/sample_data.py index b6143806..836577d2 100644 --- a/schedview/testing/sample_data.py +++ b/schedview/testing/sample_data.py @@ -104,16 +104,10 @@ def _manifest(date: str | None = None, duration: int | None = None) -> dict[str, except PackageNotFoundError: rubin_scheduler_version = "unknown" - try: - rubin_sim_version = version("rubin-sim") - except PackageNotFoundError: - rubin_sim_version = "unknown" - return { "cache_schema_version": _CACHE_SCHEMA_VERSION, "python": ".".join(str(part) for part in sys.version_info[:2]), "rubin_scheduler": rubin_scheduler_version, - "rubin_sim": rubin_sim_version, "date": resolved_date, "duration_hours": duration, "generator_source_hash": source_hash, From 4b9cb660aeb8c8f3dc48c4ec942ec4d5155468a7 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 May 2026 11:13:39 -0500 Subject: [PATCH 06/10] black --- schedview/testing/sample_data.py | 11 +++-------- tests/conftest.py | 11 +++++------ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/schedview/testing/sample_data.py b/schedview/testing/sample_data.py index 836577d2..c41fd01d 100644 --- a/schedview/testing/sample_data.py +++ b/schedview/testing/sample_data.py @@ -46,13 +46,11 @@ def _get_sample_data_dir() -> Path: return Path(str(importlib.resources.files(root_package).joinpath("data"))) - def get_sample_data_path(file_name: str) -> Path: """Return the path to a sample test data artifact""" return _get_sample_data_dir().joinpath(file_name) - def _default_sample_date() -> str: from astropy.time import Time from rubin_scheduler.utils import SURVEY_START_MJD @@ -60,7 +58,6 @@ def _default_sample_date() -> str: return Time(SURVEY_START_MJD, format="mjd").iso[:10] - def _configure_generation_warnings() -> None: warnings.filterwarnings( "ignore", @@ -94,7 +91,6 @@ def _configure_generation_warnings() -> None: ) - def _manifest(date: str | None = None, duration: int | None = None) -> dict[str, object]: resolved_date = _default_sample_date() if date is None else date source_hash = hashlib.sha256(Path(__file__).read_bytes()).hexdigest() @@ -115,7 +111,6 @@ def _manifest(date: str | None = None, duration: int | None = None) -> dict[str, } - def write_sample_data( opsim_output_path: str | Path, scheduler_output_path: str | Path, @@ -143,7 +138,9 @@ def write_sample_data( observatory = ModelObservatory() evening_mjd = Time(resolved_date).mjd - this_night = np.floor(observatory.almanac.sunsets["sunset"] + observatory.site.longitude / 360) == evening_mjd + this_night = ( + np.floor(observatory.almanac.sunsets["sunset"] + observatory.site.longitude / 360) == evening_mjd + ) sim_start_mjd = observatory.almanac.sunsets[this_night]["sun_n12_setting"][0] sim_end_mjd = observatory.almanac.sunsets[this_night]["sunrise"][0] sim_duration = duration / 24.0 if duration is not None else sim_end_mjd - sim_start_mjd @@ -175,7 +172,6 @@ def write_sample_data( } - def _generate_sample_data_dir( output_dir: str | Path, *, @@ -195,7 +191,6 @@ def _generate_sample_data_dir( return output_dir - def ensure_cached_sample_data( cache_root: str | Path, *, diff --git a/tests/conftest.py b/tests/conftest.py index 13dd7ced..48bf3327 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,11 @@ import pytest -from schedview.testing.sample_data import SAMPLE_DATA_DIR_ENV_VAR, SAMPLE_PICKLE_ENV_VAR, ensure_cached_sample_data +from schedview.testing.sample_data import ( + SAMPLE_DATA_DIR_ENV_VAR, + SAMPLE_PICKLE_ENV_VAR, + ensure_cached_sample_data, +) SAMPLE_SCHEDULER_PICKLE = "sample_scheduler.pickle.xz" @@ -30,30 +34,25 @@ def _resolve_sample_data_dir(root_path: Path) -> Path: return sample_data_dir - def pytest_configure(config: pytest.Config) -> None: _resolve_sample_data_dir(Path(config.rootpath)) - @pytest.fixture(scope="session") def sample_data_dir(pytestconfig: pytest.Config) -> Path: return _resolve_sample_data_dir(Path(pytestconfig.rootpath)) - @pytest.fixture(scope="session") def sample_opsim_path(sample_data_dir: Path) -> Path: return sample_data_dir.joinpath("sample_opsim.db") - @pytest.fixture(scope="session") def sample_rewards_path(sample_data_dir: Path) -> Path: return sample_data_dir.joinpath("sample_rewards.h5") - @pytest.fixture(scope="session") def sample_scheduler_pickle_path(sample_data_dir: Path) -> Path: return sample_data_dir.joinpath(SAMPLE_SCHEDULER_PICKLE) From 6c02386a9c76c65c7d74c724d3911c4dc2b9fb3a Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 May 2026 11:16:01 -0500 Subject: [PATCH 07/10] ruff and isort --- tests/test_scheduler_dashboard.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_scheduler_dashboard.py b/tests/test_scheduler_dashboard.py index c51c59ab..496123a8 100644 --- a/tests/test_scheduler_dashboard.py +++ b/tests/test_scheduler_dashboard.py @@ -28,17 +28,16 @@ # Objects to test instances against. from rubin_scheduler.scheduler.schedulers.core_scheduler import CoreScheduler -import schedview from schedview.app.scheduler_dashboard.scheduler_dashboard_app import ( SchedulerSnapshotDashboard, scheduler_app, ) from schedview.app.scheduler_dashboard.utils import get_sky_brightness_date_bounds -from schedview.testing.sample_data import get_sample_data_path # Schedview methods. from schedview.compute.scheduler import make_scheduler_summary_df from schedview.compute.survey import compute_maps +from schedview.testing.sample_data import get_sample_data_path TEST_PICKLE = str(get_sample_data_path("sample_scheduler.pickle.xz")) MJD_START = get_sky_brightness_date_bounds()[0] From 9cbd044ef1d2912c62b415112fecb2f2e63e3f31 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 May 2026 12:41:01 -0500 Subject: [PATCH 08/10] docstrings! --- schedview/testing/sample_data.py | 121 ++++++++++++++++++++++++++++--- 1 file changed, 110 insertions(+), 11 deletions(-) diff --git a/schedview/testing/sample_data.py b/schedview/testing/sample_data.py index c41fd01d..e2fa2aa9 100644 --- a/schedview/testing/sample_data.py +++ b/schedview/testing/sample_data.py @@ -13,6 +13,11 @@ from pathlib import Path import numpy as np +from astropy.time import Time +from rubin_scheduler.scheduler import sim_runner +from rubin_scheduler.scheduler.example import example_scheduler +from rubin_scheduler.scheduler.model_observatory import ModelObservatory +from rubin_scheduler.scheduler.utils import SchemaConverter SAMPLE_DATA_DIR_ENV_VAR = "SCHEDVIEW_SAMPLE_DATA_DIR" SAMPLE_PICKLE_ENV_VAR = "SCHED_PICKLE" @@ -37,7 +42,13 @@ def _get_sample_data_dir() -> Path: - """Return the directory holding sample test data""" + """Return the directory holding sample test data. + + Returns + ------- + directory : `pathlib.Path` + Directory containing the sample data artifacts. + """ override_dir = os.environ.get(SAMPLE_DATA_DIR_ENV_VAR) if override_dir: return Path(override_dir) @@ -47,11 +58,29 @@ def _get_sample_data_dir() -> Path: def get_sample_data_path(file_name: str) -> Path: - """Return the path to a sample test data artifact""" + """Return the path to a sample test data artifact. + + Parameters + ---------- + file_name : `str` + Basename of the sample data artifact. + + Returns + ------- + path : `pathlib.Path` + Path to the requested sample data artifact. + """ return _get_sample_data_dir().joinpath(file_name) def _default_sample_date() -> str: + """Return the default date used for sample-data generation. + + Returns + ------- + date : `str` + ISO date string for the scheduler survey start night. + """ from astropy.time import Time from rubin_scheduler.utils import SURVEY_START_MJD @@ -59,6 +88,11 @@ def _default_sample_date() -> str: def _configure_generation_warnings() -> None: + """Configure warning filters for sample-data generation. + + The filters suppress known noisy warnings emitted by dependencies during + scheduler simulation and artifact generation. + """ warnings.filterwarnings( "ignore", module="astropy.time", @@ -92,6 +126,21 @@ def _configure_generation_warnings() -> None: def _manifest(date: str | None = None, duration: int | None = None) -> dict[str, object]: + """Build the cache manifest for a sample-data generation request. + + Parameters + ---------- + date : `str`, optional + Date of the simulated night in ISO format. If `None`, use the + scheduler survey start night. + duration : `int`, optional + Number of hours to simulate. If `None`, simulate the full night. + + Returns + ------- + manifest : `dict` [`str`, `object`] + Manifest describing the generation inputs and expected artifact set. + """ resolved_date = _default_sample_date() if date is None else date source_hash = hashlib.sha256(Path(__file__).read_bytes()).hexdigest() @@ -119,13 +168,27 @@ def write_sample_data( date: str | None = None, duration: int | None = None, ) -> dict[str, Path]: - """Write sample test data artifacts""" - from astropy.time import Time - from rubin_scheduler.scheduler import sim_runner - from rubin_scheduler.scheduler.example import example_scheduler - from rubin_scheduler.scheduler.model_observatory import ModelObservatory - from rubin_scheduler.scheduler.utils import SchemaConverter - + """Generate and write the sample test data artifacts. + + Parameters + ---------- + opsim_output_path : `str` or `pathlib.Path` + Output path for the generated opsim database. + scheduler_output_path : `str` or `pathlib.Path` + Output path for the generated scheduler pickle. + rewards_output_path : `str` or `pathlib.Path` + Output path for the generated rewards file. + date : `str`, optional + Date of the simulated night in ISO format. If `None`, use the + scheduler survey start night. + duration : `int`, optional + Number of hours to simulate. If `None`, simulate the full night. + + Returns + ------- + output_paths : `dict` [`str`, `pathlib.Path`] + Mapping from sample artifact filename to the generated output path. + """ _configure_generation_warnings() resolved_date = _default_sample_date() if date is None else date @@ -178,7 +241,23 @@ def _generate_sample_data_dir( date: str | None = None, duration: int | None = None, ) -> Path: - """Generate a complete sample test data directory""" + """Generate a complete sample test data directory. + + Parameters + ---------- + output_dir : `str` or `pathlib.Path` + Directory in which the sample artifacts will be written. + date : `str`, optional + Date of the simulated night in ISO format. If `None`, use the + scheduler survey start night. + duration : `int`, optional + Number of hours to simulate. If `None`, simulate the full night. + + Returns + ------- + output_dir : `pathlib.Path` + Directory containing the generated sample data artifacts. + """ output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) write_sample_data( @@ -197,7 +276,27 @@ def ensure_cached_sample_data( date: str | None = None, duration: int | None = None, ) -> Path: - """Return a cached directory of generated sample test data""" + """Return a cache directory containing generated sample test data. + + The cache directory name is derived from a hash of a manifest that + records the generation inputs and expected artifact set. + + Parameters + ---------- + cache_root : `str` or `pathlib.Path` + Directory under which hashed cache entries are created. + date : `str`, optional + Date of the simulated night in ISO format. If `None`, use the + scheduler survey start night. + duration : `int`, optional + Number of hours to simulate. If `None`, simulate the full night. + + Returns + ------- + cache_dir : `pathlib.Path` + Directory containing the generated sample data artifacts for the + requested inputs. + """ cache_root = Path(cache_root) cache_root.mkdir(parents=True, exist_ok=True) From ddd0dfa78668e482a515bdc1d6d9d808649e392f Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 May 2026 12:53:53 -0500 Subject: [PATCH 09/10] more docstrings --- tests/conftest.py | 75 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 48bf3327..55c0413c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,6 +15,25 @@ def _resolve_sample_data_dir(root_path: Path) -> Path: + """Resolve and validate the sample-data directory for the test session. + + Parameters + ---------- + root_path : `pathlib.Path` + Root directory of the pytest run. + + Returns + ------- + sample_data_dir : `pathlib.Path` + Directory containing the sample test data artifacts for this test + session. + + Raises + ------ + pytest.UsageError + Raised if the resolved sample data directory or scheduler pickle does + not exist. + """ override_dir = os.environ.get(SAMPLE_DATA_DIR_ENV_VAR) if override_dir: sample_data_dir = Path(override_dir) @@ -35,24 +54,80 @@ def _resolve_sample_data_dir(root_path: Path) -> Path: def pytest_configure(config: pytest.Config) -> None: + """Initialize sample-data environment variables before test collection. + + Parameters + ---------- + config : `pytest.Config` + Pytest configuration object for the current test run. + """ _resolve_sample_data_dir(Path(config.rootpath)) @pytest.fixture(scope="session") def sample_data_dir(pytestconfig: pytest.Config) -> Path: + """Return the sample-data directory for the test session. + + Parameters + ---------- + pytestconfig : `pytest.Config` + Pytest configuration object for the current test run. + + Returns + ------- + sample_data_dir : `pathlib.Path` + Directory containing the sample test data artifacts for this test + session. + """ return _resolve_sample_data_dir(Path(pytestconfig.rootpath)) @pytest.fixture(scope="session") def sample_opsim_path(sample_data_dir: Path) -> Path: + """Return the generated opsim sample-data path. + + Parameters + ---------- + sample_data_dir : `pathlib.Path` + Directory containing the sample test data artifacts. + + Returns + ------- + opsim_path : `pathlib.Path` + Path to the generated opsim database. + """ return sample_data_dir.joinpath("sample_opsim.db") @pytest.fixture(scope="session") def sample_rewards_path(sample_data_dir: Path) -> Path: + """Return the generated rewards sample-data path. + + Parameters + ---------- + sample_data_dir : `pathlib.Path` + Directory containing the sample test data artifacts. + + Returns + ------- + rewards_path : `pathlib.Path` + Path to the generated rewards file. + """ return sample_data_dir.joinpath("sample_rewards.h5") @pytest.fixture(scope="session") def sample_scheduler_pickle_path(sample_data_dir: Path) -> Path: + """Return the generated scheduler-pickle sample-data path. + + Parameters + ---------- + sample_data_dir : `pathlib.Path` + Directory containing the sample test data artifacts. + + Returns + ------- + scheduler_pickle_path : `pathlib.Path` + Path to the generated scheduler pickle. + """ return sample_data_dir.joinpath(SAMPLE_SCHEDULER_PICKLE) From 30e62dec4418546c747e006090e2ff15feddd827 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 May 2026 13:29:17 -0500 Subject: [PATCH 10/10] Log when data is being updated --- schedview/testing/sample_data.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/schedview/testing/sample_data.py b/schedview/testing/sample_data.py index e2fa2aa9..0dd97bf5 100644 --- a/schedview/testing/sample_data.py +++ b/schedview/testing/sample_data.py @@ -311,7 +311,14 @@ def ensure_cached_sample_data( cached_manifest = json.loads(manifest_path.read_text()) if cached_manifest == manifest: return cache_dir + regeneration_reason = "cache inputs changed" + else: + regeneration_reason = "cache entry missing or incomplete" + print( + f"Regenerating sample test data in {cache_dir} ({regeneration_reason}).", + flush=True, + ) if cache_dir.exists(): shutil.rmtree(cache_dir)