From 6cb3bfbd87e7c383a4db3be006561969d678149c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 11:46:09 +0000 Subject: [PATCH 1/5] Initial plan From 57ddd5574707ea65ba58350ab77ee825bd3da699 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 11:56:48 +0000 Subject: [PATCH 2/5] Add files_size field to ls -F output - Add `files_size` to `VALUE_TRANSFORMATION_MAP` with `{value!S}` (humanized size) - Add `COMPUTED_FIELDS` list to distinguish computed fields from JSON schema fields - Add `compute_files_size()` function that sums sizes of all session files via glob - Compute `files_size` in `load_duct_runs` after loading each run - Add `files_size` to default `--fields` in `_create_ls_parser` - Update `test_schema.py` to exclude computed fields from schema equality check - Add tests for `compute_files_size`, `load_duct_runs`, and `ls` output Agent-Logs-Url: https://github.com/con/duct/sessions/829c6010-40e6-416a-99be-6f9361c06057 Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com> --- src/con_duct/cli.py | 1 + src/con_duct/ls.py | 17 ++++++++++ test/test_ls.py | 76 +++++++++++++++++++++++++++++++++++++++++++++ test/test_schema.py | 5 +-- 4 files changed, 97 insertions(+), 2 deletions(-) diff --git a/src/con_duct/cli.py b/src/con_duct/cli.py index 258d00aa..4ca8600a 100644 --- a/src/con_duct/cli.py +++ b/src/con_duct/cli.py @@ -428,6 +428,7 @@ def _create_ls_parser() -> argparse.ArgumentParser: "exit_code", "wall_clock_time", "peak_rss", + "files_size", ], ) parser.add_argument( diff --git a/src/con_duct/ls.py b/src/con_duct/ls.py index 403eed6a..f91b83d8 100644 --- a/src/con_duct/ls.py +++ b/src/con_duct/ls.py @@ -3,6 +3,7 @@ import glob import json import logging +from pathlib import Path import re from types import ModuleType from typing import Any, Dict, List, Optional @@ -32,6 +33,7 @@ "average_vsz": "{value!S}", "end_time": "{value:.2f!N}", "exit_code": "{value!E}", + "files_size": "{value!S}", "memory_total": "{value!S}", "peak_pcpu": "{value:.2f!N}%", "peak_pmem": "{value:.2f!N}%", @@ -65,9 +67,23 @@ LS_FIELD_CHOICES: List[str] = ( list(VALUE_TRANSFORMATION_MAP.keys()) + NON_TRANSFORMED_FIELDS ) +COMPUTED_FIELDS: List[str] = ["files_size"] MINIMUM_SCHEMA_VERSION: str = "0.2.0" +def compute_files_size(prefix: str) -> int: + """Compute total size in bytes of all files for a given session prefix.""" + total = 0 + for path_str in glob.glob(glob.escape(prefix) + "*"): + path = Path(path_str) + if path.is_file(): + try: + total += path.stat().st_size + except OSError: + pass + return total + + def load_duct_runs( info_files: List[str], eval_filter: Optional[str] = None ) -> List[Dict[str, Any]]: @@ -87,6 +103,7 @@ def load_duct_runs( ) continue ensure_compliant_schema(this) + this["files_size"] = compute_files_size(this["prefix"]) if eval_filter is not None and not ( eval_results := eval(eval_filter, _flatten_dict(this), dict(re=re)) ): diff --git a/test/test_ls.py b/test/test_ls.py index 7d7dd232..6816043a 100644 --- a/test/test_ls.py +++ b/test/test_ls.py @@ -15,6 +15,7 @@ MINIMUM_SCHEMA_VERSION, _flatten_dict, _restrict_row, + compute_files_size, ensure_compliant_schema, load_duct_runs, ls, @@ -164,6 +165,49 @@ def side_effect(filename: str) -> Any: assert "Skipping empty file" in caplog.text +def test_compute_files_size_sums_all_files() -> None: + """Test that compute_files_size sums sizes of all files with the given prefix.""" + with tempfile.TemporaryDirectory() as tmpdir: + prefix = os.path.join(tmpdir, "run_") + # Create files with known sizes + for suffix, content in [("stdout", b"hello"), ("stderr", b"world!"), ("info.json", b"{}...")]: + with open(f"{prefix}{suffix}", "wb") as f: + f.write(content) + expected = sum(len(c) for c in [b"hello", b"world!", b"{}..."]) + assert compute_files_size(prefix) == expected + + +def test_compute_files_size_empty_prefix() -> None: + """Test that compute_files_size returns 0 when no files match the prefix.""" + with tempfile.TemporaryDirectory() as tmpdir: + prefix = os.path.join(tmpdir, "nonexistent_") + assert compute_files_size(prefix) == 0 + + +def test_load_duct_runs_includes_files_size() -> None: + """Test that load_duct_runs populates files_size for each run.""" + with tempfile.TemporaryDirectory() as tmpdir: + prefix = os.path.join(tmpdir, "run_") + info_path = f"{prefix}info.json" + with open(info_path, "w") as f: + json.dump( + { + "schema_version": MINIMUM_SCHEMA_VERSION, + "prefix": prefix, + "execution_summary": {}, + "message": "", + }, + f, + ) + # Create a sibling file to count toward files_size + with open(f"{prefix}stdout", "w") as f: + f.write("some output") + result = load_duct_runs([info_path]) + assert len(result) == 1 + assert "files_size" in result[0] + assert result[0]["files_size"] > 0 + + class TestLS(unittest.TestCase): def setUp(self) -> None: """Create a temporary directory and test files.""" @@ -389,3 +433,35 @@ def test_ls_reverse(self) -> None: prefixes_reversed = [row["prefix"] for row in parsed_reversed] assert prefixes_reversed == list(reversed(prefixes_normal)) + + def test_ls_files_size_in_output(self) -> None: + """Test that files_size field appears in ls output and is humanized.""" + args = argparse.Namespace( + paths=[os.path.join(self.temp_dir.name, "file1_info.json")], + colors=False, + fields=["files_size"], + eval_filter=None, + format="json", + func=ls, + reverse=False, + ) + result = self._run_ls(["file1_info.json"], "json", args) + parsed = json.loads(result) + assert len(parsed) == 1 + assert "files_size" in parsed[0] + + def test_ls_files_size_humanized_in_summaries(self) -> None: + """Test that files_size is humanized in summaries output.""" + args = argparse.Namespace( + paths=[os.path.join(self.temp_dir.name, "file1_info.json")], + colors=False, + fields=["files_size"], + eval_filter=None, + format="summaries", + func=ls, + reverse=False, + ) + result = self._run_ls(["file1_info.json"], "summaries", args) + assert "Files Size:" in result + # Should show humanized size (Bytes or kB etc.) not raw integer + assert any(unit in result for unit in ["Byte", "kB", "MB", "GB"]) diff --git a/test/test_schema.py b/test/test_schema.py index 158abb91..bb34a5bd 100644 --- a/test/test_schema.py +++ b/test/test_schema.py @@ -3,7 +3,7 @@ from pathlib import Path from utils import run_duct_command from con_duct._constants import SUFFIXES -from con_duct.ls import LS_FIELD_CHOICES, _flatten_dict +from con_duct.ls import COMPUTED_FIELDS, LS_FIELD_CHOICES, _flatten_dict def test_info_fields(temp_output_dir: str) -> None: @@ -29,4 +29,5 @@ def test_info_fields(temp_output_dir: str) -> None: actual_info_schema = _flatten_dict(json.loads(info_file.read_text())).keys() os.remove(info_file) - assert set(actual_info_schema) == set(LS_FIELD_CHOICES) + schema_field_choices = [f for f in LS_FIELD_CHOICES if f not in COMPUTED_FIELDS] + assert set(actual_info_schema) == set(schema_field_choices) From 19ecc38abdab70dbef1cdd48e34889863410def4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 11:59:03 +0000 Subject: [PATCH 3/5] Address code review feedback: improve docstring and test clarity Agent-Logs-Url: https://github.com/con/duct/sessions/829c6010-40e6-416a-99be-6f9361c06057 Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com> --- src/con_duct/ls.py | 5 ++++- test/test_ls.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/con_duct/ls.py b/src/con_duct/ls.py index f91b83d8..cc2c73be 100644 --- a/src/con_duct/ls.py +++ b/src/con_duct/ls.py @@ -72,7 +72,10 @@ def compute_files_size(prefix: str) -> int: - """Compute total size in bytes of all files for a given session prefix.""" + """Compute total size in bytes of all files for a given session prefix. + + Files that cannot be accessed (e.g. due to permissions) are silently skipped. + """ total = 0 for path_str in glob.glob(glob.escape(prefix) + "*"): path = Path(path_str) diff --git a/test/test_ls.py b/test/test_ls.py index 6816043a..b1e44ced 100644 --- a/test/test_ls.py +++ b/test/test_ls.py @@ -169,8 +169,8 @@ def test_compute_files_size_sums_all_files() -> None: """Test that compute_files_size sums sizes of all files with the given prefix.""" with tempfile.TemporaryDirectory() as tmpdir: prefix = os.path.join(tmpdir, "run_") - # Create files with known sizes - for suffix, content in [("stdout", b"hello"), ("stderr", b"world!"), ("info.json", b"{}...")]: + # Use arbitrary suffixes; compute_files_size globs all files with the prefix + for suffix, content in [("a", b"hello"), ("b", b"world!"), ("c", b"{}...")]: with open(f"{prefix}{suffix}", "wb") as f: f.write(content) expected = sum(len(c) for c in [b"hello", b"world!", b"{}..."]) From ca997baf30a89c529b26f367fcd834e6014ad820 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 12:16:52 +0000 Subject: [PATCH 4/5] Address review: debug log on OSError, consolidate files_size tests into existing tests Agent-Logs-Url: https://github.com/con/duct/sessions/74a1b4cf-2d72-4835-af90-44d1e11414f3 Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com> --- src/con_duct/ls.py | 4 +-- test/test_ls.py | 71 +++++----------------------------------------- 2 files changed, 9 insertions(+), 66 deletions(-) diff --git a/src/con_duct/ls.py b/src/con_duct/ls.py index cc2c73be..19fb32f7 100644 --- a/src/con_duct/ls.py +++ b/src/con_duct/ls.py @@ -82,8 +82,8 @@ def compute_files_size(prefix: str) -> int: if path.is_file(): try: total += path.stat().st_size - except OSError: - pass + except OSError as e: + lgr.debug("Could not get size of %s: %s", path_str, e) return total diff --git a/test/test_ls.py b/test/test_ls.py index b1e44ced..392e645d 100644 --- a/test/test_ls.py +++ b/test/test_ls.py @@ -31,6 +31,7 @@ def test_load_duct_runs_sanity() -> None: result = load_duct_runs(["/test/path_info.json"]) assert len(result) == 1 assert result[0]["prefix"] == "/test/path_" + assert "files_size" in result[0] def test_load_duct_runs_skips_unsupported_schema() -> None: @@ -175,37 +176,8 @@ def test_compute_files_size_sums_all_files() -> None: f.write(content) expected = sum(len(c) for c in [b"hello", b"world!", b"{}..."]) assert compute_files_size(prefix) == expected - - -def test_compute_files_size_empty_prefix() -> None: - """Test that compute_files_size returns 0 when no files match the prefix.""" - with tempfile.TemporaryDirectory() as tmpdir: - prefix = os.path.join(tmpdir, "nonexistent_") - assert compute_files_size(prefix) == 0 - - -def test_load_duct_runs_includes_files_size() -> None: - """Test that load_duct_runs populates files_size for each run.""" - with tempfile.TemporaryDirectory() as tmpdir: - prefix = os.path.join(tmpdir, "run_") - info_path = f"{prefix}info.json" - with open(info_path, "w") as f: - json.dump( - { - "schema_version": MINIMUM_SCHEMA_VERSION, - "prefix": prefix, - "execution_summary": {}, - "message": "", - }, - f, - ) - # Create a sibling file to count toward files_size - with open(f"{prefix}stdout", "w") as f: - f.write("some output") - result = load_duct_runs([info_path]) - assert len(result) == 1 - assert "files_size" in result[0] - assert result[0]["files_size"] > 0 + # Non-existent prefix returns 0 + assert compute_files_size(os.path.join(tmpdir, "nonexistent_")) == 0 class TestLS(unittest.TestCase): @@ -262,7 +234,7 @@ def _run_ls( args = argparse.Namespace( paths=[os.path.join(self.temp_dir.name, path) for path in paths], colors=False, - fields=["prefix", "schema_version"], + fields=["prefix", "schema_version", "files_size"], eval_filter=None, format=fmt, func=ls, @@ -287,6 +259,8 @@ def test_ls_sanity(self) -> None: ] assert len(prefixes) == 1 assert any("file1" in p for p in prefixes) + assert "Files Size:" in result + assert any(unit in result for unit in ["Byte", "kB", "MB", "GB"]) def test_ls_with_filter(self) -> None: """Basic sanity test to ensure ls() runs without crashing.""" @@ -385,6 +359,7 @@ def test_ls_json_output(self) -> None: parsed = json.loads(result) assert len(parsed) == 1 assert "prefix" in parsed[0] + assert "files_size" in parsed[0] def test_ls_json_pp_output(self) -> None: """Test pretty-printed JSON output format.""" @@ -433,35 +408,3 @@ def test_ls_reverse(self) -> None: prefixes_reversed = [row["prefix"] for row in parsed_reversed] assert prefixes_reversed == list(reversed(prefixes_normal)) - - def test_ls_files_size_in_output(self) -> None: - """Test that files_size field appears in ls output and is humanized.""" - args = argparse.Namespace( - paths=[os.path.join(self.temp_dir.name, "file1_info.json")], - colors=False, - fields=["files_size"], - eval_filter=None, - format="json", - func=ls, - reverse=False, - ) - result = self._run_ls(["file1_info.json"], "json", args) - parsed = json.loads(result) - assert len(parsed) == 1 - assert "files_size" in parsed[0] - - def test_ls_files_size_humanized_in_summaries(self) -> None: - """Test that files_size is humanized in summaries output.""" - args = argparse.Namespace( - paths=[os.path.join(self.temp_dir.name, "file1_info.json")], - colors=False, - fields=["files_size"], - eval_filter=None, - format="summaries", - func=ls, - reverse=False, - ) - result = self._run_ls(["file1_info.json"], "summaries", args) - assert "Files Size:" in result - # Should show humanized size (Bytes or kB etc.) not raw integer - assert any(unit in result for unit in ["Byte", "kB", "MB", "GB"]) From c19eaf94a25144771264e82e3dda230649f9b21b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 12:20:15 +0000 Subject: [PATCH 5/5] Upgrade OSError log level to WARNING in compute_files_size Agent-Logs-Url: https://github.com/con/duct/sessions/3a36c202-bdc1-4d54-8469-0e8c2c988d2f Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com> --- src/con_duct/ls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/con_duct/ls.py b/src/con_duct/ls.py index 19fb32f7..fd9fbba1 100644 --- a/src/con_duct/ls.py +++ b/src/con_duct/ls.py @@ -83,7 +83,7 @@ def compute_files_size(prefix: str) -> int: try: total += path.stat().st_size except OSError as e: - lgr.debug("Could not get size of %s: %s", path_str, e) + lgr.warning("Could not get size of %s: %s", path_str, e) return total