Open-ISP · nick-gorman · Apr 22, 2026 · Apr 29, 2026 · May 5, 2026 · May 5, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -31,6 +31,10 @@ def template_network_transmission_paths(iasr_tables, scenario):
   `_map_*`, `_merge_*`.
 - **Single responsibility** — each helper does one thing. If a helper needs an internal
   comment explaining a second step, it should probably be two helpers.
+- **No hidden preconditions** — if a helper's correctness depends on a column
+  value or invariant set by the caller, set it explicitly inside the helper
+  instead. Preconditions that don't appear in the signature or name are easy
+  to break during refactoring.
 
 ### Clarity over cleverness
 
@@ -58,6 +62,38 @@ through testing. Let the code fail clearly when preconditions aren't met.
 
 No backwards compatibility unless explicitly requested — update all call sites directly.
 
+### Docstrings: I/O Example
+
+Every non-trivial function should include an `I/O Example:` section in its docstring
+showing a concrete input → output mapping. The goal is that a reviewer can understand
+the function's behaviour from the docstring alone, without reading the body.
+
+Conventions:
+
+- Use a plain CSV-like table format for DataFrame inputs and outputs — no need to
+  wrap in runnable `csv_str_to_df` calls, since this is illustrative, not a doctest.
+- Abbreviate long column names when they would otherwise overflow the line; point at
+  the relevant constants for the real names.
+- Cover representative edge cases in the same example, with trailing `# comment`
+  notes on the rows that demonstrate each case.
+- For trivial utility functions, one-line input → output cases are enough.
+
+```python
+def _duplicate_for_both_directions(limits: pd.DataFrame) -> pd.DataFrame:
+    """Mirrors each row into a forward and a reverse entry.
+
+    I/O Example:
+        limits:
+            path_id  timeslice    capacity
+            Q1-NQ    peak_demand  750
+
+        returns:
+            path_id  direction  timeslice    capacity
+            Q1-NQ    forward    peak_demand  750
+            Q1-NQ    reverse    peak_demand  750
+    """
+```
+
 ## Logging
 
 Logging surfaces things a user or operator wants to know during a template/translation
@@ -89,9 +125,12 @@ are `raise`d, not logged.
 ### What not to log
 
 - The successful happy path inside a helper.
-- Individual row contents — aggregate into a `sorted(...)` list and log once. The
-  fuzzy-match log in `helpers.py` is an exception: it logs each non-exact match
-  individually so the user can audit name-matching decisions one by one.
+- Repeated firings of the same event — when one logical decision (a drop, a fallback, a
+  fuzzy match) would fire many times because of redundant rows (e.g. once per year per
+  option), aggregate into a `sorted(...)` list and log once. When each firing is a
+  *distinct* decision the user may want to audit (one log line per dropped option, one
+  per fuzzy match), per-row is fine. The fuzzy-match log in `helpers.py` is the
+  canonical example of the per-row case.
 - Anything readily inspected from the returned DataFrame.
 - The same condition at multiple call sites — log once at the source where the cause
   is visible.
@@ -207,6 +246,19 @@ same readable form as every other expected DataFrame in the test).
   Use this for collapsed/missing-data rows instead of `iloc` + `pd.isna` probes.
 - Use `check_dtype=False` when type precision isn't critical (e.g. NaN columns)
 
+### Integration tests
+
+When a public orchestrator calls into per-module helpers that have their own
+thorough tests, the orchestrator's integration test should verify wiring only —
+not duplicate content checks. Assert:
+
+- the expected output keys are present
+- each output has the expected column set
+- each output has the expected row count
+
+Skip `assert_frame_equal`. Add a one-line comment stating that the detailed
+content is covered by the per-module tests.
+
 ## Development Environment
 
 Use `uv` for package management. Key commands:
@@ -222,3 +274,6 @@ uv run pre-commit run --all-files             # Run linters
 
 - Only commit when explicitly requested
 - Commit messages should focus on the "why" rather than the "what"
+- Before staging, list the files you intend to commit and confirm with the user.
+  Local-only edits (dev configs, feature flag flips, experiment artefacts) often
+  sit alongside the real change and should not be swept into the commit.
diff --git a/pyproject.toml b/pyproject.toml
@@ -88,6 +88,7 @@ select = [
 [tool.hatch.build]
 include = [
     "src/ispypsa/**/*.py",
+    "src/ispypsa/**/*.yaml",
     "src/ispypsa/data_fetch/manifests/**/*.txt",
     "src/ispypsa/templater/manually_extracted_template_tables/**/*.csv",
 ]

diff --git a/scripts/build_75_test_cache.py b/scripts/build_75_test_cache.py
@@ -0,0 +1,33 @@
+"""One-off: build tests/test_workbook_table_cache/7.5 from the local 7.5 workbook.
+
+Run with the new-format flag set:
+
+    ISPYPSA_USE_NEW_TABLE_FORMAT=true uv run python scripts/build_75_test_cache.py
+"""
+
+from pathlib import Path
+
+from ispypsa.iasr_table_caching import build_local_cache
+
+
+def main() -> None:
+    repo_root = Path(__file__).resolve().parent.parent
+    workbook_path = (
+        repo_root
+        / "data"
+        / "workbooks"
+        / "7.5"
+        / "Draft 2026 ISP Inputs and Assumptions workbook.xlsx"
+    )
+    cache_path = repo_root / "tests" / "test_workbook_table_cache" / "7.5"
+
+    if not workbook_path.exists():
+        raise FileNotFoundError(f"Workbook not found at: {workbook_path}")
+
+    cache_path.mkdir(parents=True, exist_ok=True)
+    build_local_cache(cache_path, workbook_path, "7.5")
+    print(f"Wrote cache CSVs to {cache_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/ispypsa/cli/dodo.py b/src/ispypsa/cli/dodo.py
@@ -208,7 +208,7 @@ def get_operational_pypsa_file():
 @return_empty_list_if_no_config
 def get_local_cache_files():
     """Get list of local cache files."""
-    return list_cache_files(get_parsed_workbook_cache())
+    return list_cache_files(get_parsed_workbook_cache(), config.iasr_workbook_version)
 
 
 @return_empty_list_if_no_config
@@ -308,11 +308,13 @@ def build_parsed_workbook_cache() -> None:
     if os.environ.get("ISPYPSA_TEST_MOCK_CACHE", "").lower() == "true":
         # In test mode, just ensure cache directory exists and copy pre-existing files
         parsed_workbook_cache.mkdir(parents=True, exist_ok=True)
-        # Copy any existing test cache files if they don't already exist
+        # Copy any existing test cache files if they don't already exist. The
+        # cache is partitioned by workbook version on disk.
         test_cache_dir = (
             Path(__file__).parent.parent.parent.parent
             / "tests"
             / "test_workbook_table_cache"
+            / version
         )
         if test_cache_dir.exists():
             for csv_file in test_cache_dir.glob("*.csv"):

diff --git a/src/ispypsa/feature_flags.py b/src/ispypsa/feature_flags.py
@@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 
 import yaml
@@ -6,3 +7,15 @@
 
 with open(_flags_path) as f:
     FEATURE_FLAGS = yaml.safe_load(f)
+
+# Env-var overrides win over the YAML defaults. Used by tests that need to flip
+# a flag for a subprocess CLI run, where monkeypatching the imported dict isn't
+# an option.
+_ENV_OVERRIDES = {
+    # FEATURE_FLAG_CLEANUP[use_new_table_format]: remove this entry.
+    "use_new_table_format": "ISPYPSA_USE_NEW_TABLE_FORMAT",
+}
+for _flag, _env_var in _ENV_OVERRIDES.items():
+    _value = os.environ.get(_env_var)
+    if _value is not None:
+        FEATURE_FLAGS[_flag] = _value.lower() == "true"
diff --git a/src/ispypsa/feature_flags.yaml b/src/ispypsa/feature_flags.yaml
@@ -1 +1,2 @@
+# FEATURE_FLAG_CLEANUP[use_new_table_format]: remove this flag entry.
 use_new_table_format: false
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		# FEATURE_FLAG_CLEANUP[use_new_table_format]: remove this flag entry.
		use_new_table_format: false