Skip to content

Commit b6d24cb

Browse files
Jammy2211Jammy2211
authored andcommitted
feat(csvable): add generic CSV reader/writer alongside dictable/fitsable
Adds `autoconf.csvable` as the third text-format I/O surface, matching the shape of `autoconf.dictable` (JSON) and `autoconf.fitsable` (FITS). `output_to_csv(rows, file_path, headers=None)` accepts list-of-dicts (primary) or list-of-lists; with dict rows and `headers=None` the header row is the union of keys across all rows so optional columns are only included when populated. `list_from_csv(file_path)` is a thin `csv.DictReader` wrapper returning an ordered list of dicts. Stdlib `csv` only — no pandas dependency. Extracted to enable `autolens.point.dataset` to drop its inline CSV logic and delegate the file-level read/write here.
1 parent e759045 commit b6d24cb

3 files changed

Lines changed: 234 additions & 0 deletions

File tree

autoconf/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
"""
2+
autoconf — configuration, serialization, and I/O helpers for the PyAuto ecosystem.
3+
4+
Text-format I/O surfaces:
5+
6+
- :mod:`autoconf.dictable` — JSON (``output_to_json`` / ``from_json``)
7+
- :mod:`autoconf.fitsable` — FITS (``output_to_fits`` / ``ndarray_via_fits_from``)
8+
- :mod:`autoconf.csvable` — CSV (``output_to_csv`` / ``list_from_csv``)
9+
"""
110
import sys
211

312
if sys.version_info < (3, 12):

autoconf/csvable.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""
2+
Generic CSV reader/writer helpers for the PyAuto ecosystem.
3+
4+
Sits alongside :mod:`autoconf.dictable` (JSON) and :mod:`autoconf.fitsable`
5+
(FITS) as the third text-format I/O surface. The functions here are schema
6+
agnostic — callers layer their own column conventions on top (see e.g.
7+
``autolens.point.dataset`` for the PointDataset schema layer).
8+
9+
Only the standard-library ``csv`` module is used; there is no pandas
10+
dependency.
11+
"""
12+
from pathlib import Path
13+
from typing import Iterable, List, Optional, Sequence, Union
14+
15+
import csv
16+
17+
18+
Row = Union[dict, Sequence]
19+
20+
21+
def output_to_csv(
22+
rows: Iterable[Row],
23+
file_path: Union[str, Path],
24+
headers: Optional[List[str]] = None,
25+
):
26+
"""
27+
Write ``rows`` to ``file_path`` as a CSV.
28+
29+
Parameters
30+
----------
31+
rows
32+
Either a list of dicts (``{column: value}``) or a list of sequences.
33+
file_path
34+
Destination path. Parent directories are created if missing.
35+
headers
36+
Optional explicit column list.
37+
38+
- For dict rows with ``headers=None``: the header row is the union
39+
of keys across all rows in first-appearance order — a column is
40+
written if *any* row populates it, and rows that omit the key
41+
get a blank cell.
42+
- For dict rows with explicit ``headers``: the given columns are
43+
used verbatim; extra keys in any row are dropped silently; missing
44+
keys produce blanks.
45+
- For sequence rows: ``headers`` is required.
46+
"""
47+
rows = list(rows)
48+
49+
file_path = Path(file_path)
50+
file_path.parent.mkdir(parents=True, exist_ok=True)
51+
52+
is_dict_rows = bool(rows) and isinstance(rows[0], dict)
53+
54+
if not rows:
55+
with open(file_path, "w", newline="") as f:
56+
if headers:
57+
csv.writer(f).writerow(headers)
58+
return
59+
60+
if is_dict_rows:
61+
if headers is None:
62+
headers = []
63+
seen = set()
64+
for row in rows:
65+
for key in row:
66+
if key not in seen:
67+
seen.add(key)
68+
headers.append(key)
69+
70+
with open(file_path, "w", newline="") as f:
71+
writer = csv.DictWriter(
72+
f, fieldnames=headers, extrasaction="ignore"
73+
)
74+
writer.writeheader()
75+
writer.writerows(rows)
76+
return
77+
78+
if headers is None:
79+
raise ValueError(
80+
"output_to_csv: headers must be provided when rows are sequences "
81+
"(not dicts); sequence rows carry no column names of their own."
82+
)
83+
84+
with open(file_path, "w", newline="") as f:
85+
writer = csv.writer(f)
86+
writer.writerow(headers)
87+
writer.writerows(rows)
88+
89+
90+
def list_from_csv(file_path: Union[str, Path]) -> List[dict]:
91+
"""
92+
Read a CSV and return its rows as an ordered list of dicts.
93+
94+
Row order is preserved. Within each row, keys are ordered to match the
95+
header line (Python dicts are insertion-ordered and :class:`csv.DictReader`
96+
inserts fields in ``fieldnames`` order), so callers that need the header
97+
list can recover it with ``list(rows[0].keys())`` when at least one row
98+
is present.
99+
100+
An empty CSV (no header line) and a header-only CSV (header line but no
101+
data rows) both return an empty list.
102+
"""
103+
with open(file_path, newline="") as f:
104+
reader = csv.DictReader(f)
105+
return list(reader)

test_autoconf/test_csvable.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import pytest
2+
3+
from autoconf.csvable import list_from_csv, output_to_csv
4+
5+
6+
def test_round_trip__uniform_dict_rows(tmp_path):
7+
rows = [
8+
{"a": "1", "b": "x"},
9+
{"a": "2", "b": "y"},
10+
{"a": "3", "b": "z"},
11+
]
12+
path = tmp_path / "uniform.csv"
13+
14+
output_to_csv(rows, path)
15+
loaded = list_from_csv(path)
16+
17+
assert loaded == rows
18+
assert list(loaded[0].keys()) == ["a", "b"]
19+
20+
21+
def test_round_trip__list_of_lists_with_explicit_headers(tmp_path):
22+
headers = ["a", "b", "c"]
23+
rows = [["1", "2", "3"], ["4", "5", "6"]]
24+
path = tmp_path / "seq.csv"
25+
26+
output_to_csv(rows, path, headers=headers)
27+
loaded = list_from_csv(path)
28+
29+
assert loaded == [
30+
{"a": "1", "b": "2", "c": "3"},
31+
{"a": "4", "b": "5", "c": "6"},
32+
]
33+
assert list(loaded[0].keys()) == headers
34+
35+
36+
def test_flexible_headers__union_in_first_appearance_order(tmp_path):
37+
rows = [
38+
{"name": "s1", "y": "0.1", "x": "0.2"},
39+
{"name": "s1", "y": "0.3", "x": "0.4", "flux": "1.0"},
40+
{"name": "s2", "y": "0.5", "x": "0.6"},
41+
]
42+
path = tmp_path / "flex.csv"
43+
44+
output_to_csv(rows, path)
45+
loaded = list_from_csv(path)
46+
47+
assert list(loaded[0].keys()) == ["name", "y", "x", "flux"]
48+
assert loaded[0]["flux"] == ""
49+
assert loaded[1]["flux"] == "1.0"
50+
assert loaded[2]["flux"] == ""
51+
52+
53+
def test_explicit_headers__drops_extra_keys(tmp_path):
54+
rows = [
55+
{"a": "1", "b": "x", "ignored": "skip"},
56+
{"a": "2", "b": "y"},
57+
]
58+
path = tmp_path / "drop.csv"
59+
60+
output_to_csv(rows, path, headers=["a", "b"])
61+
loaded = list_from_csv(path)
62+
63+
assert loaded == [{"a": "1", "b": "x"}, {"a": "2", "b": "y"}]
64+
65+
66+
def test_explicit_headers__missing_key_is_blank(tmp_path):
67+
rows = [{"a": "1", "b": "x"}, {"a": "2"}]
68+
path = tmp_path / "missing.csv"
69+
70+
output_to_csv(rows, path, headers=["a", "b"])
71+
loaded = list_from_csv(path)
72+
73+
assert loaded == [{"a": "1", "b": "x"}, {"a": "2", "b": ""}]
74+
75+
76+
def test_empty_rows_with_headers__header_only_round_trips_to_empty(tmp_path):
77+
path = tmp_path / "empty.csv"
78+
79+
output_to_csv([], path, headers=["a", "b"])
80+
loaded = list_from_csv(path)
81+
82+
assert loaded == []
83+
with open(path) as f:
84+
assert f.read().splitlines() == ["a,b"]
85+
86+
87+
def test_empty_rows_no_headers__writes_empty_file(tmp_path):
88+
path = tmp_path / "nothing.csv"
89+
90+
output_to_csv([], path)
91+
loaded = list_from_csv(path)
92+
93+
assert loaded == []
94+
assert path.read_text() == ""
95+
96+
97+
def test_parent_directory_auto_created(tmp_path):
98+
path = tmp_path / "new_dir" / "nested" / "out.csv"
99+
100+
output_to_csv([{"a": "1"}], path)
101+
102+
assert path.exists()
103+
assert list_from_csv(path) == [{"a": "1"}]
104+
105+
106+
def test_row_order_and_header_order_preserved(tmp_path):
107+
rows = [{"b": str(i), "a": str(i * 10)} for i in range(10)]
108+
path = tmp_path / "order.csv"
109+
110+
output_to_csv(rows, path)
111+
loaded = list_from_csv(path)
112+
113+
assert list(loaded[0].keys()) == ["b", "a"]
114+
assert [r["b"] for r in loaded] == [str(i) for i in range(10)]
115+
assert [r["a"] for r in loaded] == [str(i * 10) for i in range(10)]
116+
117+
118+
def test_list_of_lists_without_headers_raises(tmp_path):
119+
with pytest.raises(ValueError, match="headers must be provided"):
120+
output_to_csv([[1, 2, 3]], tmp_path / "bad.csv")

0 commit comments

Comments
 (0)