diff --git a/configs/vf-rl/math-python.toml b/configs/vf-rl/math-python.toml
index 47f2e48c9..633c40050 100644
--- a/configs/vf-rl/math-python.toml
+++ b/configs/vf-rl/math-python.toml
@@ -1,7 +1,7 @@
 model = "Qwen/Qwen3-4B-Instruct-2507"
 
 [env]
-id = "primeintellect/wiki-search"
+id = "primeintellect/math-python"
 
 [inference]
 gpus = 6
@@ -15,7 +15,7 @@ tool_call_parser = "hermes"
 gpus = 2
 
 [trainer.args]
-run_name = "wiki-search"
+run_name = "math-python"
 micro_batch_size = 8
 rollouts_per_example = 16
 batch_size = 512
diff --git a/verifiers/scripts/eval.py b/verifiers/scripts/eval.py
index bd5b91dd5..21bddf359 100644
--- a/verifiers/scripts/eval.py
+++ b/verifiers/scripts/eval.py
@@ -192,9 +192,11 @@ def main():
     parser.add_argument(
         "--save-results",
         "-s",
+        nargs="?",
+        const=True,
         default=False,
-        action="store_true",
-        help="Save results to disk",
+        metavar="PATH",
+        help="Save results to disk. Optionally specify custom output path.",
     )
     # save every n rollouts
     parser.add_argument(
@@ -317,8 +319,9 @@ def main():
         print_results=True,
         verbose=args.verbose,
         # saving
+        output_dir=args.save_results if isinstance(args.save_results, str) else None,
         state_columns=args.state_columns,
-        save_results=args.save_results,
+        save_results=bool(args.save_results),
         save_every=args.save_every,
         save_to_hf_hub=args.save_to_hf_hub,
         hf_hub_dataset_name=args.hf_hub_dataset_name,
diff --git a/verifiers/types.py b/verifiers/types.py
index f9b986aea..f3f8827ba 100644
--- a/verifiers/types.py
+++ b/verifiers/types.py
@@ -237,6 +237,7 @@ class EvalConfig(BaseModel):
     print_results: bool = False
     verbose: bool = False
     # saving
+    output_dir: str | None = None
     state_columns: list[str] | None = None
     save_results: bool = False
     save_every: int = -1
diff --git a/verifiers/utils/path_utils.py b/verifiers/utils/path_utils.py
index 6ab89923b..fcc4f2b45 100644
--- a/verifiers/utils/path_utils.py
+++ b/verifiers/utils/path_utils.py
@@ -15,13 +15,17 @@ def get_results_path(
 
 
 def get_eval_results_path(config: EvalConfig) -> Path:
+    # Use custom output_dir if provided
+    if config.output_dir is not None:
+        base_path = Path(config.output_dir)
+        return get_results_path(config.env_id, config.model, base_path)
+
+    # Fall back to default behavior
     module_name = config.env_id.replace("-", "_")
     local_env_dir = Path(config.env_dir_path) / module_name
 
     if local_env_dir.exists():
         base_path = local_env_dir / "outputs"
-        results_path = get_results_path(config.env_id, config.model, base_path)
     else:
         base_path = Path("./outputs")
-        results_path = get_results_path(config.env_id, config.model, base_path)
-    return results_path
+    return get_results_path(config.env_id, config.model, base_path)