diff --git a/README.md b/README.md index f0f568a..5fc01eb 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,6 @@ A package for running OELLM CLI workflows across multiple HPC clusters using SLU ## Currently supported workflows - Schedule evaluations on multiple models and tasks on all clusters ✅ `oellm schedule-eval ...` - Restart failed evaluations (e.g., due to node failures) ✅ `oellm collect-results ... --reschedule true` -- Interactive eval job/csv builder ✅ `oellm build-csv` - - Recursively resolve local paths: pass a directory containing models and their nested intermediate checkpoints, will eval all checkpoints - - Support default task groups (cf `oellm/resources/task-groups.yaml`) ## Planned workflows - Sync and download evaluation results from all clusters via a shared data layer @@ -38,23 +35,7 @@ This will automatically: In case you meet HuggingFace quotas issues, make sure you are logged in by setting your `HF_TOKEN` and that you are part of [OpenEuroLLM](https://huggingface.co/OpenEuroLLM) organization. -## Interactive CSV Builder - -```bash -oellm interactive-csv -``` - -This will launch an interactive workflow where you can: -- Add models (HuggingFace Hub or local paths) -- Select evaluation tasks -- Configure n-shot settings -- Preview and save your evaluation configuration - -The resulting CSV includes an additional `eval_suite` column that records which -evaluation framework (e.g., `lm_eval` or `lighteval`) should be used for each -task. - -Otherwise you can also directly schedule using a CSV file: +You can also directly schedule using a CSV file: ```bash oellm schedule-eval --eval_csv_path custom_evals.csv ``` diff --git a/oellm/interactive_csv_builder.py b/oellm/interactive_csv_builder.py deleted file mode 100644 index 61c99f1..0000000 --- a/oellm/interactive_csv_builder.py +++ /dev/null @@ -1,400 +0,0 @@ -import signal -import sys -from importlib.resources import files -from pathlib import Path - -import pandas as pd -import questionary -import yaml -from questionary import Style -from rich import box -from rich.console import Console -from rich.panel import Panel -from rich.progress import Progress, SpinnerColumn, TextColumn -from rich.table import Table - - -def build_csv_interactive(output_path: str = "eval_config.csv") -> None: - """ - Enhanced interactive CSV builder with arrow key navigation. - - Args: - output_path: Path where the CSV file will be saved. - """ - console = Console() - - # Set up signal handler for graceful exit - def signal_handler(sig, frame): - console.print("\n\n[yellow]Interrupted by user. Exiting...[/yellow]") - sys.exit(0) - - signal.signal(signal.SIGINT, signal_handler) - - # Custom style for questionary - custom_style = Style( - [ - ("qmark", "fg:#673ab7 bold"), - ("question", "bold"), - ("answer", "fg:#f44336 bold"), - ("pointer", "fg:#673ab7 bold"), - ("highlighted", "fg:#673ab7 bold"), - ("selected", "fg:#cc5454"), - ("separator", "fg:#cc5454"), - ("instruction", "fg:#abb2bf"), - ("text", ""), - ("disabled", "fg:#858585 italic"), - ] - ) - - # Clear screen and show header - console.clear() - console.print( - Panel.fit( - "[bold cyan]OpenEuroLLM Evaluation Configuration Builder[/bold cyan]\n" - "[dim]Use arrow keys to navigate, Enter to select, Ctrl+C to exit[/dim]", - border_style="cyan", - ) - ) - - # Step 1: Get models with enhanced input - console.print("\n[bold cyan]📦 Step 1: Add Models[/bold cyan]") - - models = [] - add_more = True - - while add_more: - try: - action = questionary.select( - "What would you like to do?", - choices=[ - "➕ Add a model", - "📋 View current models" - if models - else questionary.Choice( - "📋 View current models", disabled="No models added yet" - ), - "✅ Continue to tasks" - if models - else questionary.Choice( - "✅ Continue to tasks", disabled="Add at least one model first" - ), - ], - style=custom_style, - ).ask() - - if action is None: # User pressed Ctrl+C - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - except (KeyboardInterrupt, EOFError): - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - - if action == "➕ Add a model": - model = questionary.text( - "Enter model (HuggingFace ID or local path):", - instruction="(e.g., meta-llama/Llama-2-7b-hf or /path/to/model)", - style=custom_style, - ).ask() - - if model is None: # User pressed Ctrl+C - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - - if model: - models.append(model) - console.print(f"[green]✓ Added: {model}[/green]") - - elif action == "📋 View current models": - console.print("\n[bold]Current models:[/bold]") - for i, model in enumerate(models, 1): - console.print(f" {i}. [cyan]{model}[/cyan]") - console.print() - - elif action == "✅ Continue to tasks": - add_more = False - - # Step 2: Configure tasks - console.print("\n[bold cyan]📝 Step 2: Configure Tasks[/bold cyan]") - - task_configs: list[tuple[str, list[int], str]] = [] - add_more = True - - # Load task groups from packaged resources - task_groups = {} - try: - data = yaml.safe_load((files("oellm.resources") / "task-groups.yaml").read_text()) - task_groups = data.get("task_groups", {}) - except Exception as e: - console.print(f"[yellow]Warning: Could not load task groups: {e}[/yellow]") - - while add_more: - choices = [ - "➕ Add a single task", - ] - - # Add task group options if available - if task_groups: - choices.insert(0, "📦 Use a default task group") - - choices.extend( - [ - "📋 View current tasks" - if task_configs - else questionary.Choice( - "📋 View current tasks", disabled="No tasks added yet" - ), - "✅ Continue to preview" - if task_configs - else questionary.Choice( - "✅ Continue to preview", disabled="Add at least one task first" - ), - ] - ) - - action = questionary.select( - "What would you like to do?", - choices=choices, - style=custom_style, - ).ask() - - if action is None: - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - - if action == "📦 Use a default task group": - # Show available task groups - group_choices = [] - for group_name, group_data in task_groups.items(): - description = group_data.get("description", "") - group_choices.append(f"{group_name} - {description}") - - selected_groups = questionary.checkbox( - "Select task groups (↑↓ to navigate, SPACE to check/uncheck, ENTER when done):", - choices=group_choices, - style=custom_style, - instruction="Use SPACEBAR to select groups, not typing text", - ).ask() - - if selected_groups is None: - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - - # Only process if groups were actually selected - if selected_groups: - # Add tasks from selected groups - for selection in selected_groups: - group_name = selection.split(" - ")[0] - group_data = task_groups[group_name] - - console.print(f"\n[cyan]Adding tasks from '{group_name}':[/cyan]") - for task_item in group_data.get("tasks", []): - task_name = task_item["task"] - n_shots = task_item.get("n_shots", [0]) - suite = task_item.get("suite", "lm_eval") - task_configs.append((task_name, n_shots, suite)) - console.print( - f" [green]✓ Added: {task_name} (suite={suite}) with n_shot={n_shots}[/green]" - ) - - # After adding task groups, ask if user wants to add more or proceed - proceed_choice = questionary.select( - "\nTask groups added. What would you like to do?", - choices=[ - "✅ Continue to preview", - "➕ Add more tasks", - ], - style=custom_style, - ).ask() - - if proceed_choice is None: - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - - if proceed_choice == "✅ Continue to preview": - add_more = False - # If user chooses "Add more tasks", the loop continues - else: - console.print("\n[yellow]No task groups selected.[/yellow]") - - elif action == "➕ Add a single task": - # Direct task input - task = questionary.text("Enter task name:", style=custom_style).ask() - if task is None: - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - - if task: - # Get n_shot values - n_shot_choice = questionary.select( - f"Select n_shot configuration for '{task}':", - choices=[ - "0 (zero-shot)", - "5 (few-shot)", - "0,5 (both)", - "0,5,10,25 (multiple)", - "📝 Custom values", - ], - style=custom_style, - ).ask() - - if n_shot_choice is None: - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - - if n_shot_choice == "📝 Custom values": - n_shots_str = questionary.text( - "Enter n_shot values (comma-separated):", - instruction="(e.g., 0,5,10)", - style=custom_style, - ).ask() - if n_shots_str is None: - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - else: - # Extract numbers from the choice - import re - - n_shots_str = ",".join(re.findall(r"\d+", n_shot_choice)) - - try: - n_shots = [int(x.strip()) for x in n_shots_str.split(",")] - suite_choice = questionary.select( - f"Select evaluation suite for '{task}':", - choices=[ - questionary.Choice( - "lm_eval (lm-eval-harness)", value="lm_eval" - ), - questionary.Choice( - "lighteval (Hugging Face LightEval)", - value="lighteval", - ), - "📝 Custom suite", - ], - style=custom_style, - ).ask() - - if suite_choice is None: - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - - if suite_choice == "📝 Custom suite": - suite = questionary.text( - "Enter suite identifier:", - instruction="(e.g., custom-eval-suite)", - style=custom_style, - ).ask() - if suite is None: - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - suite = suite.strip() - if not suite: - suite = "lm_eval" - else: - suite = suite_choice - - task_configs.append((task, n_shots, suite)) - console.print( - f"[green]✓ Added: {task} (suite={suite}) with n_shot={n_shots}[/green]" - ) - except ValueError: - console.print("[red]Invalid n_shot values. Skipping.[/red]") - - elif action == "📋 View current tasks": - console.print("\n[bold]Current tasks:[/bold]") - for i, (task, n_shots, suite) in enumerate(task_configs, 1): - console.print( - f" {i}. [green]{task}[/green] → n_shot={n_shots} (suite={suite})" - ) - console.print() - - elif action == "✅ Continue to preview": - add_more = False - - # Build the dataframe - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - transient=True, - ) as progress: - task = progress.add_task("Building configuration matrix...", total=None) - - rows = [] - for model in models: - for task_name, n_shots, suite in task_configs: - for n_shot in n_shots: - rows.append( - { - "model_path": model, - "task_path": task_name, - "n_shot": n_shot, - "eval_suite": suite, - } - ) - - df = pd.DataFrame(rows) - progress.update(task, completed=True) - - # Show preview - console.print("\n[bold cyan]👁️ Preview[/bold cyan]") - - table = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED) - table.add_column("#", style="dim", width=4) - table.add_column("Model", style="cyan", no_wrap=True) - table.add_column("Task", style="green") - table.add_column("n_shot", justify="right", style="yellow") - table.add_column("Suite", style="magenta") - - # Show first 10 rows - for idx, (_, row) in enumerate(df.head(10).iterrows(), 1): - table.add_row( - str(idx), - str(row["model_path"]), - str(row["task_path"]), - str(row["n_shot"]), - str(row["eval_suite"]), - ) - - if len(df) > 10: - table.add_row("...", "...", "...", "...") - - console.print(table) - console.print(f"\n[bold]Total configurations: {len(df)}[/bold]") - - # Summary statistics - console.print("\n[bold cyan]📊 Summary[/bold cyan]") - console.print(f" • Models: {len(models)}") - console.print(f" • Tasks: {len(task_configs)}") - console.print(f" • Total evaluations: {len(df)}") - - # Save confirmation - save = questionary.confirm( - f"\nSave configuration to {output_path}?", default=True, style=custom_style - ).ask() - - if save is None: - console.print("\n[yellow]Cancelled by user.[/yellow]") - return - - if save: - # Ensure directory exists - output_dir = Path(output_path).parent - if output_dir != Path("."): - output_dir.mkdir(parents=True, exist_ok=True) - - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - transient=True, - ) as progress: - task = progress.add_task(f"Saving to {output_path}...", total=None) - df.to_csv(output_path, index=False) - progress.update(task, completed=True) - - console.print(f"\n[green]✅ Configuration saved to {output_path}[/green]") - console.print("\n[bold]Next steps:[/bold]") - console.print(f" 1. Review the configuration: [cyan]cat {output_path}[/cyan]") - console.print( - f" 2. Run evaluation: [cyan]oellm schedule-eval --eval_csv_path {output_path}[/cyan]" - ) - else: - console.print("\n[yellow]Configuration not saved.[/yellow]") diff --git a/oellm/main.py b/oellm/main.py index c1ffac2..c1d7f5d 100644 --- a/oellm/main.py +++ b/oellm/main.py @@ -351,25 +351,6 @@ def schedule_evals( ) -def build_csv( - output_path: str = "eval_config.csv", - *, - verbose: bool = False, -) -> None: - """ - Build a CSV file for evaluation with per-task n_shot configurations using the interactive builder. - - Args: - output_path: Path where the CSV file will be saved. - verbose: Enable verbose logging. - """ - _setup_logging(verbose) - - from oellm.interactive_csv_builder import build_csv_interactive - - build_csv_interactive(output_path) - - def collect_results( results_dir: str, output_csv: str = "eval_results.csv", @@ -651,7 +632,6 @@ def main(): auto_cli( { "schedule-eval": schedule_evals, - "build-csv": build_csv, "collect-results": collect_results, "clean-cache": lambda: clear_task_cache(), }, diff --git a/pyproject.toml b/pyproject.toml index d699cba..42855e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,6 @@ dependencies = [ "pydantic<2.12", "huggingface_hub", "pyyaml", - "questionary", ] [project.optional-dependencies]