feat(csvable): add generic CSV reader/writer alongside dictable/fitsable

Jammy2211 · Jammy2211 · commit b6d24cb6cf4a · 2026-04-19T21:54:17.000+01:00
Adds `autoconf.csvable` as the third text-format I/O surface, matching
the shape of `autoconf.dictable` (JSON) and `autoconf.fitsable` (FITS).
`output_to_csv(rows, file_path, headers=None)` accepts list-of-dicts
(primary) or list-of-lists; with dict rows and `headers=None` the
header row is the union of keys across all rows so optional columns
are only included when populated. `list_from_csv(file_path)` is a thin
`csv.DictReader` wrapper returning an ordered list of dicts. Stdlib
`csv` only — no pandas dependency.

Extracted to enable `autolens.point.dataset` to drop its inline CSV
logic and delegate the file-level read/write here.
diff --git a/autoconf/__init__.py b/autoconf/__init__.py
@@ -1,3 +1,12 @@
+"""
+autoconf — configuration, serialization, and I/O helpers for the PyAuto ecosystem.
+
+Text-format I/O surfaces:
+
+- :mod:`autoconf.dictable`  — JSON (``output_to_json`` / ``from_json``)
+- :mod:`autoconf.fitsable`  — FITS (``output_to_fits`` / ``ndarray_via_fits_from``)
+- :mod:`autoconf.csvable`   — CSV  (``output_to_csv`` / ``list_from_csv``)
+"""
 import sys
 
 if sys.version_info < (3, 12):
diff --git a/autoconf/csvable.py b/autoconf/csvable.py
@@ -0,0 +1,105 @@
+"""
+Generic CSV reader/writer helpers for the PyAuto ecosystem.
+
+Sits alongside :mod:`autoconf.dictable` (JSON) and :mod:`autoconf.fitsable`
+(FITS) as the third text-format I/O surface. The functions here are schema
+agnostic — callers layer their own column conventions on top (see e.g.
+``autolens.point.dataset`` for the PointDataset schema layer).
+
+Only the standard-library ``csv`` module is used; there is no pandas
+dependency.
+"""
+from pathlib import Path
+from typing import Iterable, List, Optional, Sequence, Union
+
+import csv
+
+
+Row = Union[dict, Sequence]
+
+
+def output_to_csv(
+    rows: Iterable[Row],
+    file_path: Union[str, Path],
+    headers: Optional[List[str]] = None,
+):
+    """
+    Write ``rows`` to ``file_path`` as a CSV.
+
+    Parameters
+    ----------
+    rows
+        Either a list of dicts (``{column: value}``) or a list of sequences.
+    file_path
+        Destination path. Parent directories are created if missing.
+    headers
+        Optional explicit column list.
+
+        - For dict rows with ``headers=None``: the header row is the union
+          of keys across all rows in first-appearance order — a column is
+          written if *any* row populates it, and rows that omit the key
+          get a blank cell.
+        - For dict rows with explicit ``headers``: the given columns are
+          used verbatim; extra keys in any row are dropped silently; missing
+          keys produce blanks.
+        - For sequence rows: ``headers`` is required.
+    """
+    rows = list(rows)
+
+    file_path = Path(file_path)
+    file_path.parent.mkdir(parents=True, exist_ok=True)
+
+    is_dict_rows = bool(rows) and isinstance(rows[0], dict)
+
+    if not rows:
+        with open(file_path, "w", newline="") as f:
+            if headers:
+                csv.writer(f).writerow(headers)
+        return
+
+    if is_dict_rows:
+        if headers is None:
+            headers = []
+            seen = set()
+            for row in rows:
+                for key in row:
+                    if key not in seen:
+                        seen.add(key)
+                        headers.append(key)
+
+        with open(file_path, "w", newline="") as f:
+            writer = csv.DictWriter(
+                f, fieldnames=headers, extrasaction="ignore"
+            )
+            writer.writeheader()
+            writer.writerows(rows)
+        return
+
+    if headers is None:
+        raise ValueError(
+            "output_to_csv: headers must be provided when rows are sequences "
+            "(not dicts); sequence rows carry no column names of their own."
+        )
+
+    with open(file_path, "w", newline="") as f:
+        writer = csv.writer(f)
+        writer.writerow(headers)
+        writer.writerows(rows)
+
+
+def list_from_csv(file_path: Union[str, Path]) -> List[dict]:
+    """
+    Read a CSV and return its rows as an ordered list of dicts.
+
+    Row order is preserved.  Within each row, keys are ordered to match the
+    header line (Python dicts are insertion-ordered and :class:`csv.DictReader`
+    inserts fields in ``fieldnames`` order), so callers that need the header
+    list can recover it with ``list(rows[0].keys())`` when at least one row
+    is present.
+
+    An empty CSV (no header line) and a header-only CSV (header line but no
+    data rows) both return an empty list.
+    """
+    with open(file_path, newline="") as f:
+        reader = csv.DictReader(f)
+        return list(reader)
diff --git a/test_autoconf/test_csvable.py b/test_autoconf/test_csvable.py
@@ -0,0 +1,120 @@
+import pytest
+
+from autoconf.csvable import list_from_csv, output_to_csv
+
+
+def test_round_trip__uniform_dict_rows(tmp_path):
+    rows = [
+        {"a": "1", "b": "x"},
+        {"a": "2", "b": "y"},
+        {"a": "3", "b": "z"},
+    ]
+    path = tmp_path / "uniform.csv"
+
+    output_to_csv(rows, path)
+    loaded = list_from_csv(path)
+
+    assert loaded == rows
+    assert list(loaded[0].keys()) == ["a", "b"]
+
+
+def test_round_trip__list_of_lists_with_explicit_headers(tmp_path):
+    headers = ["a", "b", "c"]
+    rows = [["1", "2", "3"], ["4", "5", "6"]]
+    path = tmp_path / "seq.csv"
+
+    output_to_csv(rows, path, headers=headers)
+    loaded = list_from_csv(path)
+
+    assert loaded == [
+        {"a": "1", "b": "2", "c": "3"},
+        {"a": "4", "b": "5", "c": "6"},
+    ]
+    assert list(loaded[0].keys()) == headers
+
+
+def test_flexible_headers__union_in_first_appearance_order(tmp_path):
+    rows = [
+        {"name": "s1", "y": "0.1", "x": "0.2"},
+        {"name": "s1", "y": "0.3", "x": "0.4", "flux": "1.0"},
+        {"name": "s2", "y": "0.5", "x": "0.6"},
+    ]
+    path = tmp_path / "flex.csv"
+
+    output_to_csv(rows, path)
+    loaded = list_from_csv(path)
+
+    assert list(loaded[0].keys()) == ["name", "y", "x", "flux"]
+    assert loaded[0]["flux"] == ""
+    assert loaded[1]["flux"] == "1.0"
+    assert loaded[2]["flux"] == ""
+
+
+def test_explicit_headers__drops_extra_keys(tmp_path):
+    rows = [
+        {"a": "1", "b": "x", "ignored": "skip"},
+        {"a": "2", "b": "y"},
+    ]
+    path = tmp_path / "drop.csv"
+
+    output_to_csv(rows, path, headers=["a", "b"])
+    loaded = list_from_csv(path)
+
+    assert loaded == [{"a": "1", "b": "x"}, {"a": "2", "b": "y"}]
+
+
+def test_explicit_headers__missing_key_is_blank(tmp_path):
+    rows = [{"a": "1", "b": "x"}, {"a": "2"}]
+    path = tmp_path / "missing.csv"
+
+    output_to_csv(rows, path, headers=["a", "b"])
+    loaded = list_from_csv(path)
+
+    assert loaded == [{"a": "1", "b": "x"}, {"a": "2", "b": ""}]
+
+
+def test_empty_rows_with_headers__header_only_round_trips_to_empty(tmp_path):
+    path = tmp_path / "empty.csv"
+
+    output_to_csv([], path, headers=["a", "b"])
+    loaded = list_from_csv(path)
+
+    assert loaded == []
+    with open(path) as f:
+        assert f.read().splitlines() == ["a,b"]
+
+
+def test_empty_rows_no_headers__writes_empty_file(tmp_path):
+    path = tmp_path / "nothing.csv"
+
+    output_to_csv([], path)
+    loaded = list_from_csv(path)
+
+    assert loaded == []
+    assert path.read_text() == ""
+
+
+def test_parent_directory_auto_created(tmp_path):
+    path = tmp_path / "new_dir" / "nested" / "out.csv"
+
+    output_to_csv([{"a": "1"}], path)
+
+    assert path.exists()
+    assert list_from_csv(path) == [{"a": "1"}]
+
+
+def test_row_order_and_header_order_preserved(tmp_path):
+    rows = [{"b": str(i), "a": str(i * 10)} for i in range(10)]
+    path = tmp_path / "order.csv"
+
+    output_to_csv(rows, path)
+    loaded = list_from_csv(path)
+
+    assert list(loaded[0].keys()) == ["b", "a"]
+    assert [r["b"] for r in loaded] == [str(i) for i in range(10)]
+    assert [r["a"] for r in loaded] == [str(i * 10) for i in range(10)]
+
+
+def test_list_of_lists_without_headers_raises(tmp_path):
+    with pytest.raises(ValueError, match="headers must be provided"):
+        output_to_csv([[1, 2, 3]], tmp_path / "bad.csv")