feat: add custom output directory support to vf-eval

ericlau98 · ericlau98 · commit 0e0e8729682a · 2025-12-24T13:21:47.000-05:00
Allow users to specify a custom output path via the -s/--save-results
flag. When a path is provided, results are saved there instead of
the default ./outputs or environment-local outputs directory.

Usage:
  vf-eval gsm8k -s              # default location
  vf-eval gsm8k -s /custom/path # custom location
diff --git a/verifiers/scripts/eval.py b/verifiers/scripts/eval.py
@@ -192,9 +192,11 @@ def main():
     parser.add_argument(
         "--save-results",
         "-s",
+        nargs="?",
+        const=True,
         default=False,
-        action="store_true",
-        help="Save results to disk",
+        metavar="PATH",
+        help="Save results to disk. Optionally specify custom output path.",
     )
     # save every n rollouts
     parser.add_argument(
@@ -317,8 +319,9 @@ def main():
         print_results=True,
         verbose=args.verbose,
         # saving
+        output_dir=args.save_results if isinstance(args.save_results, str) else None,
         state_columns=args.state_columns,
-        save_results=args.save_results,
+        save_results=bool(args.save_results),
         save_every=args.save_every,
         save_to_hf_hub=args.save_to_hf_hub,
         hf_hub_dataset_name=args.hf_hub_dataset_name,
diff --git a/verifiers/types.py b/verifiers/types.py
@@ -237,6 +237,7 @@ class EvalConfig(BaseModel):
     print_results: bool = False
     verbose: bool = False
     # saving
+    output_dir: str | None = None
     state_columns: list[str] | None = None
     save_results: bool = False
     save_every: int = -1
diff --git a/verifiers/utils/path_utils.py b/verifiers/utils/path_utils.py
@@ -15,13 +15,17 @@ def get_results_path(
 
 
 def get_eval_results_path(config: EvalConfig) -> Path:
+    # Use custom output_dir if provided
+    if config.output_dir is not None:
+        base_path = Path(config.output_dir)
+        return get_results_path(config.env_id, config.model, base_path)
+
+    # Fall back to default behavior
     module_name = config.env_id.replace("-", "_")
     local_env_dir = Path(config.env_dir_path) / module_name
 
     if local_env_dir.exists():
         base_path = local_env_dir / "outputs"
-        results_path = get_results_path(config.env_id, config.model, base_path)
     else:
         base_path = Path("./outputs")
-        results_path = get_results_path(config.env_id, config.model, base_path)
-    return results_path
+    return get_results_path(config.env_id, config.model, base_path)