diff --git a/configs/vf-rl/math-python.toml b/configs/vf-rl/math-python.toml index 47f2e48c9..633c40050 100644 --- a/configs/vf-rl/math-python.toml +++ b/configs/vf-rl/math-python.toml @@ -1,7 +1,7 @@ model = "Qwen/Qwen3-4B-Instruct-2507" [env] -id = "primeintellect/wiki-search" +id = "primeintellect/math-python" [inference] gpus = 6 @@ -15,7 +15,7 @@ tool_call_parser = "hermes" gpus = 2 [trainer.args] -run_name = "wiki-search" +run_name = "math-python" micro_batch_size = 8 rollouts_per_example = 16 batch_size = 512 diff --git a/verifiers/scripts/eval.py b/verifiers/scripts/eval.py index bd5b91dd5..21bddf359 100644 --- a/verifiers/scripts/eval.py +++ b/verifiers/scripts/eval.py @@ -192,9 +192,11 @@ def main(): parser.add_argument( "--save-results", "-s", + nargs="?", + const=True, default=False, - action="store_true", - help="Save results to disk", + metavar="PATH", + help="Save results to disk. Optionally specify custom output path.", ) # save every n rollouts parser.add_argument( @@ -317,8 +319,9 @@ def main(): print_results=True, verbose=args.verbose, # saving + output_dir=args.save_results if isinstance(args.save_results, str) else None, state_columns=args.state_columns, - save_results=args.save_results, + save_results=bool(args.save_results), save_every=args.save_every, save_to_hf_hub=args.save_to_hf_hub, hf_hub_dataset_name=args.hf_hub_dataset_name, diff --git a/verifiers/types.py b/verifiers/types.py index f9b986aea..f3f8827ba 100644 --- a/verifiers/types.py +++ b/verifiers/types.py @@ -237,6 +237,7 @@ class EvalConfig(BaseModel): print_results: bool = False verbose: bool = False # saving + output_dir: str | None = None state_columns: list[str] | None = None save_results: bool = False save_every: int = -1 diff --git a/verifiers/utils/path_utils.py b/verifiers/utils/path_utils.py index 6ab89923b..fcc4f2b45 100644 --- a/verifiers/utils/path_utils.py +++ b/verifiers/utils/path_utils.py @@ -15,13 +15,17 @@ def get_results_path( def get_eval_results_path(config: EvalConfig) -> Path: + # Use custom output_dir if provided + if config.output_dir is not None: + base_path = Path(config.output_dir) + return get_results_path(config.env_id, config.model, base_path) + + # Fall back to default behavior module_name = config.env_id.replace("-", "_") local_env_dir = Path(config.env_dir_path) / module_name if local_env_dir.exists(): base_path = local_env_dir / "outputs" - results_path = get_results_path(config.env_id, config.model, base_path) else: base_path = Path("./outputs") - results_path = get_results_path(config.env_id, config.model, base_path) - return results_path + return get_results_path(config.env_id, config.model, base_path)