diff --git a/src/con_duct/cli.py b/src/con_duct/cli.py index 258d00aa..4ca8600a 100644 --- a/src/con_duct/cli.py +++ b/src/con_duct/cli.py @@ -428,6 +428,7 @@ def _create_ls_parser() -> argparse.ArgumentParser: "exit_code", "wall_clock_time", "peak_rss", + "files_size", ], ) parser.add_argument( diff --git a/src/con_duct/ls.py b/src/con_duct/ls.py index 403eed6a..fd9fbba1 100644 --- a/src/con_duct/ls.py +++ b/src/con_duct/ls.py @@ -3,6 +3,7 @@ import glob import json import logging +from pathlib import Path import re from types import ModuleType from typing import Any, Dict, List, Optional @@ -32,6 +33,7 @@ "average_vsz": "{value!S}", "end_time": "{value:.2f!N}", "exit_code": "{value!E}", + "files_size": "{value!S}", "memory_total": "{value!S}", "peak_pcpu": "{value:.2f!N}%", "peak_pmem": "{value:.2f!N}%", @@ -65,9 +67,26 @@ LS_FIELD_CHOICES: List[str] = ( list(VALUE_TRANSFORMATION_MAP.keys()) + NON_TRANSFORMED_FIELDS ) +COMPUTED_FIELDS: List[str] = ["files_size"] MINIMUM_SCHEMA_VERSION: str = "0.2.0" +def compute_files_size(prefix: str) -> int: + """Compute total size in bytes of all files for a given session prefix. + + Files that cannot be accessed (e.g. due to permissions) are silently skipped. + """ + total = 0 + for path_str in glob.glob(glob.escape(prefix) + "*"): + path = Path(path_str) + if path.is_file(): + try: + total += path.stat().st_size + except OSError as e: + lgr.warning("Could not get size of %s: %s", path_str, e) + return total + + def load_duct_runs( info_files: List[str], eval_filter: Optional[str] = None ) -> List[Dict[str, Any]]: @@ -87,6 +106,7 @@ def load_duct_runs( ) continue ensure_compliant_schema(this) + this["files_size"] = compute_files_size(this["prefix"]) if eval_filter is not None and not ( eval_results := eval(eval_filter, _flatten_dict(this), dict(re=re)) ): diff --git a/test/test_ls.py b/test/test_ls.py index 7d7dd232..392e645d 100644 --- a/test/test_ls.py +++ b/test/test_ls.py @@ -15,6 +15,7 @@ MINIMUM_SCHEMA_VERSION, _flatten_dict, _restrict_row, + compute_files_size, ensure_compliant_schema, load_duct_runs, ls, @@ -30,6 +31,7 @@ def test_load_duct_runs_sanity() -> None: result = load_duct_runs(["/test/path_info.json"]) assert len(result) == 1 assert result[0]["prefix"] == "/test/path_" + assert "files_size" in result[0] def test_load_duct_runs_skips_unsupported_schema() -> None: @@ -164,6 +166,20 @@ def side_effect(filename: str) -> Any: assert "Skipping empty file" in caplog.text +def test_compute_files_size_sums_all_files() -> None: + """Test that compute_files_size sums sizes of all files with the given prefix.""" + with tempfile.TemporaryDirectory() as tmpdir: + prefix = os.path.join(tmpdir, "run_") + # Use arbitrary suffixes; compute_files_size globs all files with the prefix + for suffix, content in [("a", b"hello"), ("b", b"world!"), ("c", b"{}...")]: + with open(f"{prefix}{suffix}", "wb") as f: + f.write(content) + expected = sum(len(c) for c in [b"hello", b"world!", b"{}..."]) + assert compute_files_size(prefix) == expected + # Non-existent prefix returns 0 + assert compute_files_size(os.path.join(tmpdir, "nonexistent_")) == 0 + + class TestLS(unittest.TestCase): def setUp(self) -> None: """Create a temporary directory and test files.""" @@ -218,7 +234,7 @@ def _run_ls( args = argparse.Namespace( paths=[os.path.join(self.temp_dir.name, path) for path in paths], colors=False, - fields=["prefix", "schema_version"], + fields=["prefix", "schema_version", "files_size"], eval_filter=None, format=fmt, func=ls, @@ -243,6 +259,8 @@ def test_ls_sanity(self) -> None: ] assert len(prefixes) == 1 assert any("file1" in p for p in prefixes) + assert "Files Size:" in result + assert any(unit in result for unit in ["Byte", "kB", "MB", "GB"]) def test_ls_with_filter(self) -> None: """Basic sanity test to ensure ls() runs without crashing.""" @@ -341,6 +359,7 @@ def test_ls_json_output(self) -> None: parsed = json.loads(result) assert len(parsed) == 1 assert "prefix" in parsed[0] + assert "files_size" in parsed[0] def test_ls_json_pp_output(self) -> None: """Test pretty-printed JSON output format.""" diff --git a/test/test_schema.py b/test/test_schema.py index 158abb91..bb34a5bd 100644 --- a/test/test_schema.py +++ b/test/test_schema.py @@ -3,7 +3,7 @@ from pathlib import Path from utils import run_duct_command from con_duct._constants import SUFFIXES -from con_duct.ls import LS_FIELD_CHOICES, _flatten_dict +from con_duct.ls import COMPUTED_FIELDS, LS_FIELD_CHOICES, _flatten_dict def test_info_fields(temp_output_dir: str) -> None: @@ -29,4 +29,5 @@ def test_info_fields(temp_output_dir: str) -> None: actual_info_schema = _flatten_dict(json.loads(info_file.read_text())).keys() os.remove(info_file) - assert set(actual_info_schema) == set(LS_FIELD_CHOICES) + schema_field_choices = [f for f in LS_FIELD_CHOICES if f not in COMPUTED_FIELDS] + assert set(actual_info_schema) == set(schema_field_choices)