Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/con_duct/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ def _create_ls_parser() -> argparse.ArgumentParser:
"exit_code",
"wall_clock_time",
"peak_rss",
"files_size",
],
)
parser.add_argument(
Expand Down
20 changes: 20 additions & 0 deletions src/con_duct/ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import glob
import json
import logging
from pathlib import Path
import re
from types import ModuleType
from typing import Any, Dict, List, Optional
Expand Down Expand Up @@ -32,6 +33,7 @@
"average_vsz": "{value!S}",
"end_time": "{value:.2f!N}",
"exit_code": "{value!E}",
"files_size": "{value!S}",
"memory_total": "{value!S}",
"peak_pcpu": "{value:.2f!N}%",
"peak_pmem": "{value:.2f!N}%",
Expand Down Expand Up @@ -65,9 +67,26 @@
LS_FIELD_CHOICES: List[str] = (
list(VALUE_TRANSFORMATION_MAP.keys()) + NON_TRANSFORMED_FIELDS
)
COMPUTED_FIELDS: List[str] = ["files_size"]
MINIMUM_SCHEMA_VERSION: str = "0.2.0"


def compute_files_size(prefix: str) -> int:
"""Compute total size in bytes of all files for a given session prefix.

Files that cannot be accessed (e.g. due to permissions) are silently skipped.
"""
total = 0
for path_str in glob.glob(glob.escape(prefix) + "*"):
path = Path(path_str)
if path.is_file():
try:
total += path.stat().st_size
except OSError as e:
lgr.warning("Could not get size of %s: %s", path_str, e)
return total


def load_duct_runs(
info_files: List[str], eval_filter: Optional[str] = None
) -> List[Dict[str, Any]]:
Expand All @@ -87,6 +106,7 @@ def load_duct_runs(
)
continue
ensure_compliant_schema(this)
this["files_size"] = compute_files_size(this["prefix"])
if eval_filter is not None and not (
eval_results := eval(eval_filter, _flatten_dict(this), dict(re=re))
):
Expand Down
21 changes: 20 additions & 1 deletion test/test_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
MINIMUM_SCHEMA_VERSION,
_flatten_dict,
_restrict_row,
compute_files_size,
ensure_compliant_schema,
load_duct_runs,
ls,
Expand All @@ -30,6 +31,7 @@ def test_load_duct_runs_sanity() -> None:
result = load_duct_runs(["/test/path_info.json"])
assert len(result) == 1
assert result[0]["prefix"] == "/test/path_"
assert "files_size" in result[0]


def test_load_duct_runs_skips_unsupported_schema() -> None:
Expand Down Expand Up @@ -164,6 +166,20 @@ def side_effect(filename: str) -> Any:
assert "Skipping empty file" in caplog.text


def test_compute_files_size_sums_all_files() -> None:
"""Test that compute_files_size sums sizes of all files with the given prefix."""
with tempfile.TemporaryDirectory() as tmpdir:
prefix = os.path.join(tmpdir, "run_")
# Use arbitrary suffixes; compute_files_size globs all files with the prefix
for suffix, content in [("a", b"hello"), ("b", b"world!"), ("c", b"{}...")]:
with open(f"{prefix}{suffix}", "wb") as f:
f.write(content)
expected = sum(len(c) for c in [b"hello", b"world!", b"{}..."])
assert compute_files_size(prefix) == expected
# Non-existent prefix returns 0
assert compute_files_size(os.path.join(tmpdir, "nonexistent_")) == 0


class TestLS(unittest.TestCase):
def setUp(self) -> None:
"""Create a temporary directory and test files."""
Expand Down Expand Up @@ -218,7 +234,7 @@ def _run_ls(
args = argparse.Namespace(
paths=[os.path.join(self.temp_dir.name, path) for path in paths],
colors=False,
fields=["prefix", "schema_version"],
fields=["prefix", "schema_version", "files_size"],
eval_filter=None,
format=fmt,
func=ls,
Expand All @@ -243,6 +259,8 @@ def test_ls_sanity(self) -> None:
]
assert len(prefixes) == 1
assert any("file1" in p for p in prefixes)
assert "Files Size:" in result
assert any(unit in result for unit in ["Byte", "kB", "MB", "GB"])

def test_ls_with_filter(self) -> None:
"""Basic sanity test to ensure ls() runs without crashing."""
Expand Down Expand Up @@ -341,6 +359,7 @@ def test_ls_json_output(self) -> None:
parsed = json.loads(result)
assert len(parsed) == 1
assert "prefix" in parsed[0]
assert "files_size" in parsed[0]

def test_ls_json_pp_output(self) -> None:
"""Test pretty-printed JSON output format."""
Expand Down
5 changes: 3 additions & 2 deletions test/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pathlib import Path
from utils import run_duct_command
from con_duct._constants import SUFFIXES
from con_duct.ls import LS_FIELD_CHOICES, _flatten_dict
from con_duct.ls import COMPUTED_FIELDS, LS_FIELD_CHOICES, _flatten_dict


def test_info_fields(temp_output_dir: str) -> None:
Expand All @@ -29,4 +29,5 @@ def test_info_fields(temp_output_dir: str) -> None:
actual_info_schema = _flatten_dict(json.loads(info_file.read_text())).keys()
os.remove(info_file)

assert set(actual_info_schema) == set(LS_FIELD_CHOICES)
schema_field_choices = [f for f in LS_FIELD_CHOICES if f not in COMPUTED_FIELDS]
assert set(actual_info_schema) == set(schema_field_choices)