Fix RFT training (#630)

Mandolaro · abhishekg999 · web-flow · commit 6950dba7a7db · 2025-11-12T15:33:30.000-08:00
* fix RFT

* added v1 model

---------

Co-authored-by: Abhishek Govindarasu &lt;abhishekgovindarasu@gmail.com&gt;
diff --git a/src/judgeval/trainer/config.py b/src/judgeval/trainer/config.py
@@ -1,12 +1,9 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Optional, Dict, Any, TYPE_CHECKING
+from typing import Optional, Dict, Any
 import json
 
-if TYPE_CHECKING:
-    from fireworks.llm.llm_reinforcement_step import ReinforcementAcceleratorTypeLiteral  # type: ignore[import-not-found]
-
 
 @dataclass
 class TrainerConfig:
@@ -23,8 +20,6 @@ class TrainerConfig:
     concurrency: int = 100
     epochs: int = 1
     learning_rate: float = 1e-5
-    accelerator_count: int = 1
-    accelerator_type: ReinforcementAcceleratorTypeLiteral = "NVIDIA_A100_80GB"
     temperature: float = 1.5
     max_tokens: int = 50
     enable_addons: bool = True
diff --git a/src/judgeval/trainer/fireworks_trainer.py b/src/judgeval/trainer/fireworks_trainer.py
@@ -292,8 +292,6 @@ async def run_reinforcement_learning(
             "num_generations_per_prompt": self.config.num_generations_per_prompt,
             "epochs": self.config.epochs,
             "learning_rate": self.config.learning_rate,
-            "accelerator_count": self.config.accelerator_count,
-            "accelerator_type": self.config.accelerator_type,
             "temperature": self.config.temperature,
             "max_tokens": self.config.max_tokens,
         }
@@ -357,8 +355,6 @@ async def run_reinforcement_learning(
                 self.config.num_steps,
             )
 
-            dataset.delete()
-
         _print_progress("All training steps completed!")
 
         with _spinner_progress("Deploying final trained model"):
diff --git a/src/judgeval/trainer/trainable_model.py b/src/judgeval/trainer/trainable_model.py
@@ -1,3 +1,4 @@
+import time
 from fireworks import LLM  # type: ignore[import-not-found]
 from .config import TrainerConfig, ModelConfig
 from typing import Optional, Dict, Any, Callable
@@ -159,31 +160,39 @@ def advance_to_next_step(self, step: int):
                 f"Failed to advance to training step {step}: {str(e)}"
             ) from e
 
-    def perform_reinforcement_step(self, dataset, step: int):
+    def perform_reinforcement_step(
+        self, dataset, step: int, max_retries: int = 3, initial_backoff: float = 1.0
+    ):
         """
         Perform a reinforcement learning step using the current model.
 
         Args:
             dataset: Training dataset for the reinforcement step
             step: Current step number for output model naming
+            max_retries: Maximum number of retry attempts (default: 3)
+            initial_backoff: Initial backoff time in seconds for exponential backoff (default: 1.0)
 
         Returns:
             Training job object
         """
-        try:
-            model_name = f"{self.config.model_id}-v{step + 1}"
-            return self._current_model.reinforcement_step(
-                dataset=dataset,
-                output_model=model_name,
-                epochs=self.config.epochs,
-                learning_rate=self.config.learning_rate,
-                accelerator_count=self.config.accelerator_count,
-                accelerator_type=self.config.accelerator_type,
-            )
-        except Exception as e:
-            raise JudgmentRuntimeError(
-                f"Failed to start reinforcement learning step {step + 1}: {str(e)}"
-            ) from e
+        model_name = f"{self.config.model_id}-v{step + 1}"
+
+        for attempt in range(max_retries):
+            try:
+                return self._current_model.reinforcement_step(
+                    dataset=dataset,
+                    output_model=model_name,
+                    epochs=self.config.epochs,
+                    learning_rate=self.config.learning_rate,
+                )
+            except Exception as e:
+                if attempt < max_retries - 1:
+                    backoff_time = initial_backoff * (2**attempt)
+                    time.sleep(backoff_time)
+                else:
+                    raise JudgmentRuntimeError(
+                        f"Failed to start reinforcement learning step {step + 1} after {max_retries} attempts: {str(e)}"
+                    ) from e
 
     def get_model_config(
         self, training_params: Optional[Dict[str, Any]] = None
diff --git a/src/judgeval/v1/trainers/base_trainer.py b/src/judgeval/v1/trainers/base_trainer.py
@@ -5,8 +5,8 @@
 
 if TYPE_CHECKING:
     from judgeval.v1.tracer.tracer import Tracer
-    from judgeval.trainer.trainable_model import TrainableModel
-    from judgeval.trainer.config import TrainerConfig, ModelConfig
+    from judgeval.v1.trainers.trainable_model import TrainableModel
+    from judgeval.v1.trainers.config import TrainerConfig, ModelConfig
     from judgeval.v1.scorers.base_scorer import BaseScorer
 
 
diff --git a/src/judgeval/v1/trainers/config.py b/src/judgeval/v1/trainers/config.py
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Optional, Dict, Any
+import json
+
+
+@dataclass
+class TrainerConfig:
+    """Configuration class for JudgmentTrainer parameters."""
+
+    deployment_id: str
+    user_id: str
+    model_id: str
+    base_model_name: str = "qwen2p5-7b-instruct"
+    rft_provider: str = "fireworks"  # Supported: "fireworks", "verifiers" (future)
+    num_steps: int = 5
+    num_generations_per_prompt: int = 4
+    num_prompts_per_step: int = 4
+    concurrency: int = 100
+    epochs: int = 1
+    learning_rate: float = 1e-5
+    temperature: float = 1.5
+    max_tokens: int = 50
+    enable_addons: bool = True
+
+
+@dataclass
+class ModelConfig:
+    """
+    Configuration class for storing and loading trained model state.
+
+    This class enables persistence of trained models so they can be loaded
+    and used later without retraining.
+
+    Example usage:
+        trainer = JudgmentTrainer(config)
+        model_config = trainer.train(agent_function, scorers, prompts)
+
+        # Save the trained model configuration
+        model_config.save_to_file("my_trained_model.json")
+
+        # Later, load and use the trained model
+        loaded_config = ModelConfig.load_from_file("my_trained_model.json")
+        trained_model = TrainableModel.from_model_config(loaded_config)
+
+        # Use the trained model for inference
+        response = trained_model.chat.completions.create(
+            model="current",  # Uses the loaded trained model
+            messages=[{"role": "user", "content": "Hello!"}]
+        )
+    """
+
+    # Base model configuration
+    base_model_name: str
+    deployment_id: str
+    user_id: str
+    model_id: str
+    enable_addons: bool
+
+    # Training state
+    current_step: int
+    total_steps: int
+
+    # Current model information
+    current_model_name: Optional[str] = None
+    is_trained: bool = False
+
+    # Training parameters used (for reference)
+    training_params: Optional[Dict[str, Any]] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert ModelConfig to dictionary for serialization."""
+        return {
+            "base_model_name": self.base_model_name,
+            "deployment_id": self.deployment_id,
+            "user_id": self.user_id,
+            "model_id": self.model_id,
+            "enable_addons": self.enable_addons,
+            "current_step": self.current_step,
+            "total_steps": self.total_steps,
+            "current_model_name": self.current_model_name,
+            "is_trained": self.is_trained,
+            "training_params": self.training_params,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> ModelConfig:
+        """Create ModelConfig from dictionary."""
+        return cls(
+            base_model_name=data.get("base_model_name", "qwen2p5-7b-instruct"),
+            deployment_id=data.get("deployment_id", "my-base-deployment"),
+            user_id=data.get("user_id", ""),
+            model_id=data.get("model_id", ""),
+            enable_addons=data.get("enable_addons", True),
+            current_step=data.get("current_step", 0),
+            total_steps=data.get("total_steps", 0),
+            current_model_name=data.get("current_model_name"),
+            is_trained=data.get("is_trained", False),
+            training_params=data.get("training_params"),
+        )
+
+    def to_json(self) -> str:
+        """Convert ModelConfig to JSON string."""
+        return json.dumps(self.to_dict(), indent=2)
+
+    @classmethod
+    def from_json(cls, json_str: str) -> ModelConfig:
+        """Create ModelConfig from JSON string."""
+        data = json.loads(json_str)
+        return cls.from_dict(data)
+
+    def save_to_file(self, filepath: str):
+        """Save ModelConfig to a JSON file."""
+        with open(filepath, "w") as f:
+            f.write(self.to_json())
+
+    @classmethod
+    def load_from_file(cls, filepath: str) -> ModelConfig:
+        """Load ModelConfig from a JSON file."""
+        with open(filepath, "r") as f:
+            json_str = f.read()
+        return cls.from_json(json_str)
diff --git a/src/judgeval/v1/trainers/console.py b/src/judgeval/v1/trainers/console.py
@@ -0,0 +1,144 @@
+from contextlib import contextmanager
+from typing import Optional
+import sys
+import os
+from judgeval.utils.decorators.use_once import use_once
+
+
+@use_once
+def _is_jupyter_environment():
+    """Check if we're running in a Jupyter notebook or similar environment."""
+    try:
+        # Check for IPython kernel
+        if "ipykernel" in sys.modules or "IPython" in sys.modules:
+            return True
+        # Check for Jupyter environment variables
+        if "JPY_PARENT_PID" in os.environ:
+            return True
+        # Check if we're in Google Colab
+        if "google.colab" in sys.modules:
+            return True
+        return False
+    except Exception:
+        return False
+
+
+IS_JUPYTER = _is_jupyter_environment()
+
+if not IS_JUPYTER:
+    try:
+        from rich.console import Console
+        from rich.spinner import Spinner
+        from rich.live import Live
+        from rich.text import Text
+
+        shared_console = Console()
+        RICH_AVAILABLE = True
+    except ImportError:
+        RICH_AVAILABLE = False
+else:
+    RICH_AVAILABLE = False
+
+
+class SimpleSpinner:
+    def __init__(self, name, text):
+        self.text = text
+
+
+class SimpleLive:
+    def __init__(self, spinner, console=None, refresh_per_second=None):
+        self.spinner = spinner
+
+    def __enter__(self):
+        print(f"🔄 {self.spinner.text}")
+        return self
+
+    def __exit__(self, *args):
+        pass
+
+    def update(self, spinner):
+        print(f"🔄 {spinner.text}")
+
+
+def safe_print(message, style=None):
+    """Safe print function that works in all environments."""
+    if RICH_AVAILABLE and not IS_JUPYTER:
+        shared_console.print(message, style=style)
+    else:
+        if style == "green":
+            print(f"✅ {message}")
+        elif style == "yellow":
+            print(f"⚠️ {message}")
+        elif style == "blue":
+            print(f"🔵 {message}")
+        elif style == "cyan":
+            print(f"🔷 {message}")
+        else:
+            print(message)
+
+
+@contextmanager
+def _spinner_progress(
+    message: str, step: Optional[int] = None, total_steps: Optional[int] = None
+):
+    """Context manager for spinner-based progress display."""
+    if step is not None and total_steps is not None:
+        full_message = f"[Step {step}/{total_steps}] {message}"
+    else:
+        full_message = f"[Training] {message}"
+
+    if RICH_AVAILABLE and not IS_JUPYTER:
+        spinner = Spinner("dots", text=Text(full_message, style="cyan"))
+        with Live(spinner, console=shared_console, refresh_per_second=10):
+            yield
+    else:
+        print(f"🔄 {full_message}")
+        try:
+            yield
+        finally:
+            print(f"✅ {full_message} - Complete")
+
+
+@contextmanager
+def _model_spinner_progress(message: str):
+    """Context manager for model operation spinner-based progress display."""
+    if RICH_AVAILABLE and not IS_JUPYTER:
+        spinner = Spinner("dots", text=Text(f"[Model] {message}", style="blue"))
+        with Live(spinner, console=shared_console, refresh_per_second=10) as live:
+
+            def update_progress(progress_message: str):
+                """Update the spinner with a new progress message."""
+                new_text = f"[Model] {message}\n  └─ {progress_message}"
+                spinner.text = Text(new_text, style="blue")
+                live.update(spinner)
+
+            yield update_progress
+    else:
+        print(f"🔵 [Model] {message}")
+
+        def update_progress(progress_message: str):
+            print(f"  └─ {progress_message}")
+
+        yield update_progress
+
+
+def _print_progress(
+    message: str, step: Optional[int] = None, total_steps: Optional[int] = None
+):
+    """Print progress message with consistent formatting."""
+    if step is not None and total_steps is not None:
+        safe_print(f"[Step {step}/{total_steps}] {message}", style="green")
+    else:
+        safe_print(f"[Training] {message}", style="green")
+
+
+def _print_progress_update(
+    message: str, step: Optional[int] = None, total_steps: Optional[int] = None
+):
+    """Print progress update message (for status changes during long operations)."""
+    safe_print(f"  └─ {message}", style="yellow")
+
+
+def _print_model_progress(message: str):
+    """Print model progress message with consistent formatting."""
+    safe_print(f"[Model] {message}", style="blue")
diff --git a/src/judgeval/v1/trainers/fireworks_trainer.py b/src/judgeval/v1/trainers/fireworks_trainer.py
diff --git a/src/judgeval/v1/trainers/trainable_model.py b/src/judgeval/v1/trainers/trainable_model.py
diff --git a/src/judgeval/v1/trainers/trainers_factory.py b/src/judgeval/v1/trainers/trainers_factory.py