improve testing harness

Teagan42 · Teagan42 · commit 0e329c0a6f46 · 2025-10-23T07:49:13.000-06:00
diff --git a/benches/harness.py b/benches/harness.py
@@ -113,8 +113,18 @@ async def run_bench(config_path: str):
         )
         for i in range(cfg.load.get('concurrency', 1))
     ]
-    await asyncio.gather(*workers)
-    mon.stop()
+    try:
+        await asyncio.gather(*workers)
+    except KeyboardInterrupt:
+        # ensure monitor and workers are stopped on Ctrl-C
+        for w in workers:
+            w.cancel()
+        raise
+    finally:
+        try:
+            mon.stop()
+        except Exception:
+            pass
 
     # write per-sample
     out_prefix = cfg.io['output_prefix']
diff --git a/benches/plot.py b/benches/plot.py
@@ -0,0 +1,30 @@
+"""Simple plotting helpers for benchmark outputs."""
+from __future__ import annotations
+import pathlib
+from typing import Optional
+
+def plot_resources(csv_path: str, out_png: Optional[str] = None):
+    try:
+        import pandas as pd
+        import matplotlib.pyplot as plt
+    except Exception:
+        raise RuntimeError('pandas and matplotlib are required for plotting')
+
+    p = pathlib.Path(csv_path)
+    df = pd.read_csv(p)
+    fig, ax = plt.subplots(2, 1, figsize=(8, 6), sharex=True)
+    if 't_s' in df:
+        ax[0].plot(df['t_s'], df['cpu_pct'], label='cpu_pct')
+        ax[0].set_ylabel('CPU %')
+        ax[1].plot(df['t_s'], df['rss_mb'], label='rss_mb')
+        ax[1].set_ylabel('RSS MB')
+    else:
+        raise RuntimeError('unexpected resources csv format')
+
+    ax[0].legend()
+    ax[1].legend()
+    if out_png is None:
+        out_png = str(p.with_suffix('.png'))
+    fig.tight_layout()
+    fig.savefig(out_png)
+    return out_png
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,2 +1,3 @@
 pytest
 pytest-cov
+matplotlib
diff --git a/tests/test_integration_harness.py b/tests/test_integration_harness.py
@@ -0,0 +1,30 @@
+import asyncio, tempfile, shutil, pathlib, os
+
+
+def test_harness_writes_artifacts():
+    # create a temporary config copying the example config and dataset
+    root = pathlib.Path(__file__).resolve().parents[1]
+    td = tempfile.TemporaryDirectory()
+    out_prefix = pathlib.Path(td.name) / "reports" / "it_demo"
+    config = {
+        'run_name': 'it_demo',
+        'provider': {'kind': 'mock'},
+        'io': {'dataset_path': str(root / 'datasets' / 'qa_tiny.jsonl'), 'output_prefix': str(out_prefix)},
+        'prompt': {'system': 'You are concise.', 'template': 'Q: {input}\nA:'},
+        'limits': {'max_samples': 2, 'timeout_s': 10},
+        'load': {'batch_size': 1, 'concurrency': 1},
+        'metrics': {'bleu': False, 'rougeL': False, 'normalization': 'lower_strip'},
+    }
+    import yaml
+    cfg_path = pathlib.Path(td.name) / 'cfg.yaml'
+    cfg_path.write_text(yaml.safe_dump(config), encoding='utf8')
+
+    # run harness
+    from benches.harness import run_bench
+    asyncio.run(run_bench(str(cfg_path)))
+
+    # assert artifacts exist
+    assert (out_prefix.with_suffix('.jsonl')).exists()
+    assert (out_prefix.with_name(out_prefix.name + '_summary.csv')).exists()
+    assert (out_prefix.with_name(out_prefix.name + '_resources.csv')).exists()
+    assert (out_prefix.with_name(out_prefix.name + '_report.md')).exists()

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`pytest`
`2`	`2`	`pytest-cov`
	`3`	`+matplotlib`