diff --git a/.gitignore b/.gitignore index 4fb9589..44aa472 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,7 @@ coverage/ examples/**/analysis_*.json examples/**/context.json examples/**/*.log + +# Copilot runtime outputs +.context/copilot/ +.context/logs/ diff --git a/core/python/context/cli.py b/core/python/context/cli.py new file mode 100644 index 0000000..6ef75a0 --- /dev/null +++ b/core/python/context/cli.py @@ -0,0 +1,526 @@ +""" +CLI interface for Context runtime. + +Provides commands for managing and executing context-based LLM workflows. +""" + +import json +import os +import pathlib +import re +import uuid +from datetime import datetime, timezone +from typing import Literal, Optional + +import httpx +import typer +from pydantic import BaseModel, Field, field_validator, computed_field + +app = typer.Typer(help="Context: Lightweight execution abstraction for LLM requests") +copilot_app = typer.Typer(help="Copilot-style interface for one-off LLM runs") +app.add_typer(copilot_app, name="copilot") + + +# Pricing table for token estimation (USD per token) +MODEL_PRICING = { + "gpt-4o-mini": { + "input": 0.00000015, # $0.15 per 1M tokens + "output": 0.0000006, # $0.60 per 1M tokens + }, + "gpt-4o": { + "input": 0.0000025, # $2.50 per 1M tokens + "output": 0.00001, # $10.00 per 1M tokens + }, + "gpt-4": { + "input": 0.00003, # $30 per 1M tokens + "output": 0.00006, # $60 per 1M tokens + }, +} + + +class UsageMetadata(BaseModel): + """Token usage metadata.""" + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + + +class LLMResponse(BaseModel): + """LLM response structure.""" + content: str + usage: UsageMetadata + cost_usd: float + + +class CopilotRunLog(BaseModel): + """Structured log for copilot run.""" + prompt_id: str + timestamp_start: datetime + timestamp_end: datetime + user: str + prompt: str + instructions_source: Literal["flag", "file", "default"] + model: str + budget_usd: float + estimated_max_tokens: int + usage: Optional[UsageMetadata] = None + cost_usd: Optional[float] = None + output_path: Optional[str] = None + error: Optional[str] = None + + +def write_log(log: CopilotRunLog, log_dir: pathlib.Path = None) -> pathlib.Path: + """ + Write copilot run log to JSON file. + + Args: + log: CopilotRunLog instance + log_dir: Directory to write logs (defaults to .context/logs/copilot) + + Returns: + Path to log file + """ + if log_dir is None: + log_dir = pathlib.Path.cwd() / ".context" / "logs" / "copilot" + + log_dir.mkdir(parents=True, exist_ok=True) + log_path = log_dir / f"{log.prompt_id}.json" + + # Custom serialization to handle datetime + log_dict = log.model_dump() + log_dict["timestamp_start"] = log.timestamp_start.isoformat() + log_dict["timestamp_end"] = log.timestamp_end.isoformat() + + log_path.write_text(json.dumps(log_dict, indent=2)) + + return log_path + + +def parse_prompt_hints(prompt: str) -> dict: + """ + Extract high-level task hints from natural-language prompt. + + Uses simple regex patterns to detect task types. + Returns a dict with detected task hints. + """ + prompt_lower = prompt.lower() + hints = { + "task_type": "general", + "keywords": [], + } + + # Detect planner/planning requests + if re.search(r'\b(plan|planner|planning|schedule|agenda)\b', prompt_lower): + hints["task_type"] = "planner" + hints["keywords"].append("planning") + + # Detect analysis requests (supports both American and British spellings) + elif re.search(r'\b(analy[sz]e|analysis|examine|inspect|investigate)\b', prompt_lower): + hints["task_type"] = "analysis" + hints["keywords"].append("analysis") + + # Detect generation/creation requests + elif re.search(r'\b(build|create|generate|make|develop)\b', prompt_lower): + hints["task_type"] = "generation" + hints["keywords"].append("generation") + + # Detect summarization requests (supports both American and British spellings) + elif re.search(r'\b(summari[sz]e|summary|brief|overview)\b', prompt_lower): + hints["task_type"] = "summarization" + hints["keywords"].append("summarization") + + return hints + + +def budget_to_max_tokens(budget_usd: float, model: str = "gpt-4o-mini") -> int: + """ + Convert USD budget to approximate max_tokens with safety margin. + + Uses a simple pricing estimate based on average input/output token costs. + Applies 20% safety margin to ensure we don't exceed budget. + """ + pricing = MODEL_PRICING.get(model, MODEL_PRICING["gpt-4o-mini"]) + + # Use weighted average: assume 70% output tokens, 30% input tokens + avg_price_per_token = (0.3 * pricing["input"]) + (0.7 * pricing["output"]) + + # Calculate max tokens with 20% safety margin + max_tokens = int((budget_usd * 0.8) / avg_price_per_token) + + # Ensure at least 1 token + if max_tokens < 1: + max_tokens = 1 + + return max_tokens + + +def calculate_cost(usage: UsageMetadata, model: str = "gpt-4o-mini") -> float: + """Calculate USD cost from usage metadata.""" + pricing = MODEL_PRICING.get(model, MODEL_PRICING["gpt-4o-mini"]) + + input_cost = usage.prompt_tokens * pricing["input"] + output_cost = usage.completion_tokens * pricing["output"] + + return input_cost + output_cost + + +def call_litellm( + prompt: str, + model: str, + max_tokens: int, + user_instructions: str = "", + virtual_key: Optional[str] = None, + proxy_url: str = "http://localhost:4000", +) -> LLMResponse: + """ + Call LiteLLM proxy to execute the prompt. + + Args: + prompt: The main user prompt + model: Model identifier + max_tokens: Maximum tokens for completion + user_instructions: Optional user-provided instructions + virtual_key: Virtual key for authentication + proxy_url: LiteLLM proxy URL + + Returns: + LLMResponse with content, usage, and cost + + Raises: + Exception: If LiteLLM call fails + """ + # Build messages payload + messages = [] + + # Add system message if needed + system_msg = "You are a helpful assistant." + messages.append({"role": "system", "content": system_msg}) + + # Add user instructions if provided + if user_instructions: + messages.append({"role": "user", "content": f"Instructions: {user_instructions}"}) + + # Add main prompt + messages.append({"role": "user", "content": prompt}) + + # Prepare request payload + payload = { + "model": model, + "messages": messages, + "max_tokens": max_tokens, + } + + # Prepare headers + headers = { + "Content-Type": "application/json", + } + if virtual_key: + headers["Authorization"] = f"Bearer {virtual_key}" + + # Make HTTP POST to LiteLLM proxy + # Allow configurable timeout for long-running LLM requests + timeout_env = os.getenv("CONTEXT_HTTP_TIMEOUT", "180") + try: + timeout_seconds = float(timeout_env) + except ValueError: + timeout_seconds = 180.0 + + try: + with httpx.Client(timeout=timeout_seconds) as client: + response = client.post( + f"{proxy_url}/chat/completions", + json=payload, + headers=headers, + ) + response.raise_for_status() + data = response.json() + + # Parse response + content = data["choices"][0]["message"]["content"] + usage_data = data.get("usage", {}) + usage = UsageMetadata( + prompt_tokens=usage_data.get("prompt_tokens", 0), + completion_tokens=usage_data.get("completion_tokens", 0), + total_tokens=usage_data.get("total_tokens", 0), + ) + + # Calculate cost + cost_usd = calculate_cost(usage, model) + + return LLMResponse(content=content, usage=usage, cost_usd=cost_usd) + + except httpx.HTTPStatusError as e: + raise Exception(f"LiteLLM API error: {e.response.status_code} - {e.response.text}") from e + except httpx.RequestError as e: + raise Exception(f"LiteLLM connection error: {e}") from e + except (KeyError, IndexError) as e: + raise Exception(f"Invalid LiteLLM response format: {e}") from e + + +def generate_dashboard( + prompt: str, + llm_response: LLMResponse, + task_type: str = "general", + output_path: pathlib.Path = None, +) -> pathlib.Path: + """ + Generate a Markdown dashboard file from LLM response. + + Args: + prompt: Original prompt + llm_response: LLM response + task_type: Type of task (planner, analysis, etc.) + output_path: Path to write dashboard + + Returns: + Path to generated dashboard file + """ + content = llm_response.content + + # Create structured markdown based on task type + if task_type == "planner": + # Extract sections for planner + markdown = f"""# Planning Tool + +## Request +{prompt} + +## Plan +{content} + +## Notes +- Generated by Context Copilot +- Budget estimate based on LLM usage +""" + else: + # Generic format for other task types + markdown = f"""# Task: {task_type.capitalize()} + +## Request +{prompt} + +## Response +{content} + +## Metadata +- Task Type: {task_type} +- Generated by Context Copilot +""" + + # Write to file + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(markdown) + + return output_path + + +class CopilotRunConfig(BaseModel): + """Configuration for a copilot run.""" + + # Required fields + prompt: str = Field(..., description="Natural language prompt describing the task") + user: str = Field(..., description="Username for this run") + budget: float = Field(..., description="USD budget cap for this run", gt=0.0) + + # Optional fields + instructions: Optional[str] = Field(None, description="Custom instructions") + instructions_file: Optional[pathlib.Path] = Field(None, description="Path to instructions file") + + # Derived/computed fields (set during initialization) + prompt_id: uuid.UUID = Field(default_factory=uuid.uuid4, description="Unique identifier for this run") + model: str = Field(default="gpt-4o-mini", description="LLM model to use") + mode: Literal["one_off"] = Field(default="one_off", description="Execution mode") + + # Prompt analysis hints + prompt_hints: dict = Field(default_factory=dict, description="Parsed hints from prompt") + + @field_validator("budget") + @classmethod + def validate_budget(cls, v: float) -> float: + """Validate budget is positive.""" + if v <= 0: + raise ValueError("Budget must be greater than 0") + return v + + @computed_field + @property + def user_instructions(self) -> str: + """ + Resolve user instructions from flag or file. + + Priority: + 1. --instructions flag + 2. --instructions-file (read from file) + 3. Empty string (default) + """ + if self.instructions is not None: + return self.instructions + + if self.instructions_file is not None: + try: + return self.instructions_file.read_text() + except FileNotFoundError: + raise ValueError(f"Instructions file not found: {self.instructions_file}") + except PermissionError: + raise ValueError(f"Permission denied reading instructions file: {self.instructions_file}") + except UnicodeDecodeError: + raise ValueError(f"Invalid encoding in instructions file: {self.instructions_file}") + + return "" + + def model_post_init(self, __context) -> None: + """Post-initialization processing.""" + # Validate mutual exclusivity of instructions flags + if self.instructions is not None and self.instructions_file is not None: + raise ValueError("Cannot specify both --instructions and --instructions-file") + + # Parse prompt hints + self.prompt_hints = parse_prompt_hints(self.prompt) + + # Override model from environment if set + env_model = os.getenv("COPILOT_MODEL") + if env_model: + self.model = env_model + + +@copilot_app.command("run") +def copilot_run( + prompt: str = typer.Option(..., "--prompt", help="Natural language prompt describing the task"), + user: str = typer.Option(..., "--user", help="Username for this run"), + budget: float = typer.Option(..., "--budget", help="USD budget cap for this run"), + instructions: Optional[str] = typer.Option(None, "--instructions", help="Custom instructions"), + instructions_file: Optional[pathlib.Path] = typer.Option(None, "--instructions-file", help="Path to instructions file"), +): + """ + Execute a one-off copilot run with the specified prompt. + + Example: + context copilot run --prompt "build me a custom weekend planning tool" --user matthew --budget 0.05 + """ + timestamp_start = datetime.now(timezone.utc) + config = None + error_msg = None + llm_response = None + output_path = None + max_tokens = 0 + instructions_source = "default" # Initialize before try block + + try: + # Parse and validate configuration + config = CopilotRunConfig( + prompt=prompt, + user=user, + budget=budget, + instructions=instructions, + instructions_file=instructions_file, + ) + + # Determine instructions source + if instructions is not None: + instructions_source = "flag" + elif instructions_file is not None: + instructions_source = "file" + else: + instructions_source = "default" + + # Print minimal configuration info (avoid sensitive data) + typer.echo(f"Prompt ID: {config.prompt_id}") + typer.echo(f"Model: {config.model}") + typer.echo(f"Budget: ${config.budget}") + typer.echo("") + + # Get LiteLLM proxy URL + proxy_url = os.getenv("LITELLM_PROXY_URL", "http://localhost:4000") + typer.echo(f"LiteLLM Proxy: {proxy_url}") + + # Get user virtual key + virtual_key_env = f"CONTEXT_VIRTUAL_KEY_{user.upper()}" + virtual_key = os.getenv(virtual_key_env) + if not virtual_key: + raise ValueError( + f"Virtual key not found. Please set {virtual_key_env} environment variable." + ) + + # Convert budget to max_tokens + max_tokens = budget_to_max_tokens(config.budget, config.model) + typer.echo(f"Estimated max tokens: {max_tokens}") + typer.echo("") + + # Call LiteLLM + typer.echo("Calling LiteLLM...") + llm_response = call_litellm( + prompt=config.prompt, + model=config.model, + max_tokens=max_tokens, + user_instructions=config.user_instructions, + virtual_key=virtual_key, + proxy_url=proxy_url, + ) + + typer.echo(f"✓ LLM call successful") + typer.echo(f" Tokens used: {llm_response.usage.total_tokens}") + typer.echo(f" Cost: ${llm_response.cost_usd:.6f}") + typer.echo("") + + # Generate dashboard + output_dir = pathlib.Path.cwd() / ".context" / "copilot" + output_path = output_dir / f"{config.prompt_id}.md" + + generate_dashboard( + prompt=config.prompt, + llm_response=llm_response, + task_type=config.prompt_hints["task_type"], + output_path=output_path, + ) + + typer.echo(f"✓ Dashboard generated: {output_path}") + typer.echo("") + + # Show preview of response + typer.echo("Response preview:") + typer.echo("-" * 60) + preview = llm_response.content[:500] + if len(llm_response.content) > 500: + preview += "..." + typer.echo(preview) + typer.echo("-" * 60) + + except Exception as e: + error_msg = str(e) + typer.echo(f"Error: {e}", err=True) + finally: + # Always write log, even on failure + timestamp_end = datetime.now(timezone.utc) + + if config is not None: + log = CopilotRunLog( + prompt_id=str(config.prompt_id), + timestamp_start=timestamp_start, + timestamp_end=timestamp_end, + user=config.user, + prompt=config.prompt, + instructions_source=instructions_source, + model=config.model, + budget_usd=config.budget, + estimated_max_tokens=max_tokens, + usage=llm_response.usage if llm_response else None, + cost_usd=llm_response.cost_usd if llm_response else None, + output_path=str(output_path) if output_path else None, + error=error_msg, + ) + + log_path = write_log(log) + typer.echo("") + typer.echo(f"✓ Log written: {log_path}") + + if error_msg: + raise typer.Exit(code=1) + + +def main(): + """Entry point for the CLI.""" + app() + + +if __name__ == "__main__": + main() diff --git a/core/python/pyproject.toml b/core/python/pyproject.toml index b80e0af..47aae0f 100644 --- a/core/python/pyproject.toml +++ b/core/python/pyproject.toml @@ -24,6 +24,20 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] +dependencies = [ + "typer>=0.9.0", + "pydantic>=2.0.0", + "httpx>=0.24.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "pytest-mock>=3.10.0", +] + +[project.scripts] +context = "context.cli:app" [project.urls] Homepage = "https://github.com/gitbrainlab/context" @@ -32,3 +46,10 @@ Documentation = "https://github.com/gitbrainlab/context/blob/main/docs" [tool.setuptools] packages = ["context"] +package-dir = {"" = "."} + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] diff --git a/core/python/tests/test_cli.py b/core/python/tests/test_cli.py new file mode 100644 index 0000000..62b34ec --- /dev/null +++ b/core/python/tests/test_cli.py @@ -0,0 +1,624 @@ +""" +Tests for Context CLI functionality. +""" + +import json +import os +import pathlib +import tempfile +from datetime import datetime, timezone +from unittest.mock import patch + +import pytest +from typer.testing import CliRunner + +from context.cli import ( + app, + CopilotRunConfig, + parse_prompt_hints, + budget_to_max_tokens, + calculate_cost, + UsageMetadata, + LLMResponse, + generate_dashboard, +) + + +runner = CliRunner() + + +class TestPromptParsing: + """Tests for prompt parsing functionality.""" + + def test_parse_planner_hint(self): + """Test detection of planner task type.""" + hints = parse_prompt_hints("build me a custom weekend planning tool") + assert hints["task_type"] == "planner" + assert "planning" in hints["keywords"] + + def test_parse_analysis_hint(self): + """Test detection of analysis task type.""" + hints = parse_prompt_hints("analyze this dataset") + assert hints["task_type"] == "analysis" + assert "analysis" in hints["keywords"] + + def test_parse_generation_hint(self): + """Test detection of generation task type.""" + hints = parse_prompt_hints("create a new application") + assert hints["task_type"] == "generation" + assert "generation" in hints["keywords"] + + def test_parse_summarization_hint(self): + """Test detection of summarization task type.""" + hints = parse_prompt_hints("summarize this document") + assert hints["task_type"] == "summarization" + assert "summarization" in hints["keywords"] + + def test_parse_general_hint(self): + """Test fallback to general task type.""" + hints = parse_prompt_hints("some random task") + assert hints["task_type"] == "general" + + +class TestCopilotRunConfig: + """Tests for CopilotRunConfig model.""" + + def test_valid_config(self): + """Test valid configuration.""" + config = CopilotRunConfig( + prompt="build me a custom weekend planning tool", + user="matthew", + budget=0.05, + ) + assert config.prompt == "build me a custom weekend planning tool" + assert config.user == "matthew" + assert config.budget == 0.05 + assert config.instructions is None + assert config.instructions_file is None + + def test_derived_fields(self): + """Test derived fields are set correctly.""" + config = CopilotRunConfig( + prompt="build me a custom weekend planning tool", + user="matthew", + budget=0.05, + ) + # Check prompt_id is a valid UUID + assert config.prompt_id is not None + assert str(config.prompt_id) # Can be converted to string + + # Check default model + assert config.model == "gpt-4o-mini" + + # Check mode + assert config.mode == "one_off" + + # Check prompt hints are parsed + assert config.prompt_hints["task_type"] == "planner" + + def test_user_instructions_from_flag(self): + """Test user_instructions resolved from flag.""" + config = CopilotRunConfig( + prompt="test", + user="matthew", + budget=0.05, + instructions="custom instructions", + ) + assert config.user_instructions == "custom instructions" + + def test_user_instructions_from_file(self): + """Test user_instructions resolved from file.""" + with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f: + f.write("file instructions") + temp_path = pathlib.Path(f.name) + + try: + config = CopilotRunConfig( + prompt="test", + user="matthew", + budget=0.05, + instructions_file=temp_path, + ) + assert config.user_instructions == "file instructions" + finally: + temp_path.unlink() + + def test_user_instructions_default(self): + """Test user_instructions defaults to empty.""" + config = CopilotRunConfig( + prompt="test", + user="matthew", + budget=0.05, + ) + assert config.user_instructions == "" + + def test_model_override_from_env(self): + """Test model can be overridden from environment.""" + with patch.dict(os.environ, {"COPILOT_MODEL": "gpt-4"}): + config = CopilotRunConfig( + prompt="test", + user="matthew", + budget=0.05, + ) + assert config.model == "gpt-4" + + def test_budget_must_be_positive(self): + """Test budget must be greater than 0.""" + with pytest.raises(ValueError, match="greater than 0"): + CopilotRunConfig( + prompt="test", + user="matthew", + budget=0.0, + ) + + def test_negative_budget_fails(self): + """Test negative budget fails validation.""" + with pytest.raises(ValueError): + CopilotRunConfig( + prompt="test", + user="matthew", + budget=-0.05, + ) + + def test_instructions_mutual_exclusivity(self): + """Test cannot specify both instructions and instructions_file.""" + with pytest.raises(ValueError, match="Cannot specify both"): + CopilotRunConfig( + prompt="test", + user="matthew", + budget=0.05, + instructions="custom", + instructions_file=pathlib.Path("/tmp/test.txt"), + ) + + def test_instructions_flag_only(self): + """Test can specify only instructions flag.""" + config = CopilotRunConfig( + prompt="test", + user="matthew", + budget=0.05, + instructions="custom instructions", + ) + assert config.instructions == "custom instructions" + assert config.instructions_file is None + + def test_instructions_file_only(self): + """Test can specify only instructions_file flag.""" + config = CopilotRunConfig( + prompt="test", + user="matthew", + budget=0.05, + instructions_file=pathlib.Path("/tmp/test.txt"), + ) + assert config.instructions is None + assert config.instructions_file == pathlib.Path("/tmp/test.txt") + + +class TestCopilotRunCLI: + """Tests for copilot run CLI command.""" + + def test_help_command(self): + """Test help command.""" + result = runner.invoke(app, ["copilot", "run", "--help"]) + assert result.exit_code == 0 + assert "Execute a one-off copilot run" in result.stdout + + @patch('context.cli.call_litellm') + def test_valid_run_command(self, mock_call_litellm): + """Test valid run command with mocked LiteLLM.""" + # Mock LiteLLM response + mock_usage = UsageMetadata( + prompt_tokens=100, + completion_tokens=200, + total_tokens=300, + ) + mock_response = LLMResponse( + content="Weekend planning suggestions...", + usage=mock_usage, + cost_usd=0.0001, + ) + mock_call_litellm.return_value = mock_response + + # Set required environment variable + with patch.dict(os.environ, {"CONTEXT_VIRTUAL_KEY_MATTHEW": "test-key"}): + result = runner.invoke(app, [ + "copilot", "run", + "--prompt", "build me a custom weekend planning tool", + "--user", "matthew", + "--budget", "0.05", + ]) + + assert result.exit_code == 0 + assert "LLM call successful" in result.stdout + assert "Dashboard generated" in result.stdout + + def test_missing_virtual_key(self): + """Test error when virtual key is missing.""" + result = runner.invoke(app, [ + "copilot", "run", + "--prompt", "test", + "--user", "matthew", + "--budget", "0.05", + ]) + assert result.exit_code == 1 + assert "Virtual key not found" in result.stdout + + def test_invalid_budget(self): + """Test invalid budget fails.""" + result = runner.invoke(app, [ + "copilot", "run", + "--prompt", "test", + "--user", "matthew", + "--budget", "0", + ]) + assert result.exit_code == 1 + assert "Error:" in result.stdout + + def test_missing_required_args(self): + """Test missing required arguments fails.""" + result = runner.invoke(app, [ + "copilot", "run", + "--prompt", "test", + ]) + assert result.exit_code != 0 + + @patch('context.cli.call_litellm') + def test_instructions_flag(self, mock_call_litellm): + """Test with instructions flag.""" + # Mock LiteLLM response + mock_usage = UsageMetadata(prompt_tokens=50, completion_tokens=100, total_tokens=150) + mock_response = LLMResponse(content="Response", usage=mock_usage, cost_usd=0.00005) + mock_call_litellm.return_value = mock_response + + with patch.dict(os.environ, {"CONTEXT_VIRTUAL_KEY_MATTHEW": "test-key"}): + result = runner.invoke(app, [ + "copilot", "run", + "--prompt", "test", + "--user", "matthew", + "--budget", "0.05", + "--instructions", "custom instructions", + ]) + + assert result.exit_code == 0 + assert "LLM call successful" in result.stdout + + @patch('context.cli.call_litellm') + def test_instructions_file_flag(self, mock_call_litellm): + """Test with instructions-file flag.""" + # Mock LiteLLM response + mock_usage = UsageMetadata(prompt_tokens=50, completion_tokens=100, total_tokens=150) + mock_response = LLMResponse(content="Response", usage=mock_usage, cost_usd=0.00005) + mock_call_litellm.return_value = mock_response + + # Create temp file + with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f: + f.write("test instructions") + temp_path = f.name + + try: + with patch.dict(os.environ, {"CONTEXT_VIRTUAL_KEY_MATTHEW": "test-key"}): + result = runner.invoke(app, [ + "copilot", "run", + "--prompt", "test", + "--user", "matthew", + "--budget", "0.05", + "--instructions-file", temp_path, + ]) + + assert result.exit_code == 0 + assert "LLM call successful" in result.stdout + finally: + pathlib.Path(temp_path).unlink() + + def test_both_instructions_flags_fails(self): + """Test both instructions flags fails.""" + result = runner.invoke(app, [ + "copilot", "run", + "--prompt", "test", + "--user", "matthew", + "--budget", "0.05", + "--instructions", "custom", + "--instructions-file", "/tmp/test.txt", + ]) + assert result.exit_code == 1 + assert "Error:" in result.stdout + + +class TestBudgetCalculations: + """Tests for budget and cost calculations.""" + + def test_budget_to_max_tokens_gpt4o_mini(self): + """Test budget to max tokens for gpt-4o-mini.""" + max_tokens = budget_to_max_tokens(0.05, "gpt-4o-mini") + assert max_tokens > 0 + # With 0.05 budget and pricing, should get reasonable token count + assert max_tokens > 50000 # At least 50k tokens + + def test_budget_to_max_tokens_gpt4(self): + """Test budget to max tokens for gpt-4.""" + max_tokens = budget_to_max_tokens(0.05, "gpt-4") + assert max_tokens > 0 + # GPT-4 is more expensive, should get fewer tokens + assert max_tokens < 2000 # Less than 2k tokens + + def test_calculate_cost(self): + """Test cost calculation from usage.""" + usage = UsageMetadata( + prompt_tokens=100, + completion_tokens=200, + total_tokens=300, + ) + cost = calculate_cost(usage, "gpt-4o-mini") + assert cost > 0 + # Should be very small for gpt-4o-mini + assert cost < 0.001 # Less than 0.1 cents + + +class TestLLMResponse: + """Tests for LLM response handling.""" + + def test_llm_response_creation(self): + """Test creating LLM response.""" + usage = UsageMetadata( + prompt_tokens=100, + completion_tokens=200, + total_tokens=300, + ) + response = LLMResponse( + content="Test response", + usage=usage, + cost_usd=0.0001, + ) + assert response.content == "Test response" + assert response.usage.total_tokens == 300 + assert response.cost_usd == 0.0001 + + +class TestDashboardGeneration: + """Tests for dashboard generation.""" + + def test_generate_dashboard_planner(self): + """Test generating planner dashboard.""" + usage = UsageMetadata( + prompt_tokens=100, + completion_tokens=200, + total_tokens=300, + ) + llm_response = LLMResponse( + content="Weekend activities:\n1. Hiking\n2. Museum visit", + usage=usage, + cost_usd=0.0001, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + output_path = pathlib.Path(tmpdir) / "test.md" + result_path = generate_dashboard( + prompt="build me a weekend planner", + llm_response=llm_response, + task_type="planner", + output_path=output_path, + ) + + assert result_path == output_path + assert output_path.exists() + + content = output_path.read_text() + assert "Planning Tool" in content + assert "Plan" in content + assert "Hiking" in content + + def test_generate_dashboard_general(self): + """Test generating general dashboard.""" + usage = UsageMetadata( + prompt_tokens=50, + completion_tokens=100, + total_tokens=150, + ) + llm_response = LLMResponse( + content="Analysis complete", + usage=usage, + cost_usd=0.00005, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + output_path = pathlib.Path(tmpdir) / "test.md" + result_path = generate_dashboard( + prompt="analyze data", + llm_response=llm_response, + task_type="analysis", + output_path=output_path, + ) + + assert result_path == output_path + assert output_path.exists() + + content = output_path.read_text() + assert "Task: Analysis" in content + assert "Analysis complete" in content + + +class TestLogging: + """Tests for structured logging.""" + + def test_copilot_run_log_creation(self): + """Test creating CopilotRunLog.""" + from context.cli import CopilotRunLog + + log = CopilotRunLog( + prompt_id="test-123", + timestamp_start=datetime.now(timezone.utc), + timestamp_end=datetime.now(timezone.utc), + user="matthew", + prompt="test prompt", + instructions_source="flag", + model="gpt-4o-mini", + budget_usd=0.05, + estimated_max_tokens=1000, + usage=UsageMetadata(prompt_tokens=100, completion_tokens=200, total_tokens=300), + cost_usd=0.0001, + output_path="/tmp/test.md", + error=None, + ) + + assert log.prompt_id == "test-123" + assert log.user == "matthew" + assert log.usage.total_tokens == 300 + assert log.error is None + + def test_copilot_run_log_with_error(self): + """Test creating CopilotRunLog with error.""" + from context.cli import CopilotRunLog + + log = CopilotRunLog( + prompt_id="test-123", + timestamp_start=datetime.now(timezone.utc), + timestamp_end=datetime.now(timezone.utc), + user="matthew", + prompt="test prompt", + instructions_source="default", + model="gpt-4o-mini", + budget_usd=0.05, + estimated_max_tokens=1000, + usage=None, + cost_usd=None, + output_path=None, + error="LiteLLM connection error", + ) + + assert log.error == "LiteLLM connection error" + assert log.usage is None + assert log.cost_usd is None + + def test_write_log(self): + """Test writing log to file.""" + from context.cli import CopilotRunLog, write_log + + log = CopilotRunLog( + prompt_id="test-456", + timestamp_start=datetime.now(timezone.utc), + timestamp_end=datetime.now(timezone.utc), + user="matthew", + prompt="test prompt", + instructions_source="file", + model="gpt-4o-mini", + budget_usd=0.05, + estimated_max_tokens=1000, + usage=UsageMetadata(prompt_tokens=100, completion_tokens=200, total_tokens=300), + cost_usd=0.0001, + output_path="/tmp/test.md", + error=None, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + log_dir = pathlib.Path(tmpdir) / "logs" + log_path = write_log(log, log_dir) + + assert log_path.exists() + assert log_path.name == "test-456.json" + + # Verify log content + log_data = json.loads(log_path.read_text()) + assert log_data["prompt_id"] == "test-456" + assert log_data["user"] == "matthew" + assert log_data["usage"]["total_tokens"] == 300 + + @patch('context.cli.call_litellm') + def test_cli_writes_log_on_success(self, mock_call_litellm): + """Test that CLI writes log on successful run.""" + # Mock LiteLLM response + mock_usage = UsageMetadata(prompt_tokens=100, completion_tokens=200, total_tokens=300) + mock_response = LLMResponse(content="Test response", usage=mock_usage, cost_usd=0.0001) + mock_call_litellm.return_value = mock_response + + with patch.dict(os.environ, {"CONTEXT_VIRTUAL_KEY_MATTHEW": "test-key"}): + result = runner.invoke(app, [ + "copilot", "run", + "--prompt", "test", + "--user", "matthew", + "--budget", "0.05", + ]) + + assert result.exit_code == 0 + assert "Log written" in result.stdout + + def test_cli_writes_log_on_failure(self): + """Test that CLI writes log on failure.""" + # Missing virtual key should cause failure + result = runner.invoke(app, [ + "copilot", "run", + "--prompt", "test", + "--user", "matthew", + "--budget", "0.05", + ]) + + assert result.exit_code == 1 + assert "Log written" in result.stdout + + +class TestPromptParsingBritishSpellings: + """Tests for British spelling support in prompt parsing.""" + + def test_parse_analyse_british(self): + """Test detection of British spelling 'analyse'.""" + hints = parse_prompt_hints("analyse this dataset") + assert hints["task_type"] == "analysis" + assert "analysis" in hints["keywords"] + + def test_parse_summarise_british(self): + """Test detection of British spelling 'summarise'.""" + hints = parse_prompt_hints("summarise this document") + assert hints["task_type"] == "summarization" + assert "summarization" in hints["keywords"] + + +class TestBudgetEdgeCases: + """Tests for edge cases in budget calculations.""" + + def test_very_small_budget_returns_minimum_tokens(self): + """Test that very small budget returns at least 1 token.""" + max_tokens = budget_to_max_tokens(0.0001, "gpt-4o-mini") + assert max_tokens >= 1 + + def test_zero_budget_returns_minimum_tokens(self): + """Test that zero budget returns at least 1 token.""" + max_tokens = budget_to_max_tokens(0.0, "gpt-4o-mini") + assert max_tokens >= 1 + + def test_unknown_model_uses_fallback_pricing(self): + """Test that unknown model falls back to gpt-4o-mini pricing.""" + max_tokens_unknown = budget_to_max_tokens(0.05, "unknown-model") + max_tokens_default = budget_to_max_tokens(0.05, "gpt-4o-mini") + assert max_tokens_unknown == max_tokens_default + + +class TestInstructionsFileErrorHandling: + """Tests for instructions file error handling.""" + + def test_missing_instructions_file_raises_error(self): + """Test that missing instructions file raises ValueError.""" + with pytest.raises(ValueError, match="Instructions file not found"): + config = CopilotRunConfig( + prompt="test", + user="matthew", + budget=0.05, + instructions_file=pathlib.Path("/nonexistent/file.txt"), + ) + # Access the property to trigger file read + _ = config.user_instructions + + def test_valid_instructions_file_works(self): + """Test that valid instructions file is read correctly.""" + with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f: + f.write("test instructions content") + temp_path = pathlib.Path(f.name) + + try: + config = CopilotRunConfig( + prompt="test", + user="matthew", + budget=0.05, + instructions_file=temp_path, + ) + assert config.user_instructions == "test instructions content" + finally: + temp_path.unlink() diff --git a/docs/copilot-cli.md b/docs/copilot-cli.md new file mode 100644 index 0000000..a096638 --- /dev/null +++ b/docs/copilot-cli.md @@ -0,0 +1,205 @@ +# Copilot CLI + +The Copilot CLI provides a command-line interface for running one-off LLM tasks with the Context runtime. + +## Installation + +```bash +cd core/python +pip install -e . +``` + +## Quick Start + +### Basic Usage + +```bash +context copilot run \ + --prompt "build me a custom weekend planning tool" \ + --user matthew \ + --budget 0.05 +``` + +### With Custom Instructions + +```bash +context copilot run \ + --prompt "analyze this quarterly sales data" \ + --user matthew \ + --budget 0.10 \ + --instructions "Focus on trends and anomalies" +``` + +### With Instructions File + +```bash +context copilot run \ + --prompt "create a project plan" \ + --user matthew \ + --budget 0.05 \ + --instructions-file ./instructions.txt +``` + +## Configuration + +### Environment Variables + +- `LITELLM_PROXY_URL` - LiteLLM proxy endpoint (default: `http://localhost:4000`) +- `CONTEXT_VIRTUAL_KEY_` - User-specific virtual key for LiteLLM authentication (required) +- `COPILOT_MODEL` - Override default model (default: `gpt-4o-mini`) + +### Example Setup + +```bash +export LITELLM_PROXY_URL="http://localhost:4000" +export CONTEXT_VIRTUAL_KEY_MATTHEW="sk-your-key-here" +export COPILOT_MODEL="gpt-4o-mini" # Optional +``` + +## Features + +### Automatic Task Detection + +The CLI automatically detects task types from your prompt: +- **Planner** - Planning, scheduling, agenda tasks +- **Analysis** - Data analysis, examination tasks +- **Generation** - Creation, building, development tasks +- **Summarization** - Summary, overview tasks +- **General** - All other tasks + +### Budget Management + +Specify a USD budget cap for each run. The CLI converts your budget to estimated max tokens with a safety margin: + +```bash +--budget 0.05 # $0.05 budget cap +``` + +### Output Files + +Each run generates two output files: + +1. **Dashboard** - Markdown file with formatted results + - Location: `.context/copilot/{prompt_id}.md` + - Content: Task-specific formatting (e.g., planner layout for planning tasks) + +2. **Log** - JSON file with run metadata + - Location: `.context/logs/copilot/{prompt_id}.json` + - Content: Full run details including usage, cost, and errors + +### Error Handling + +Logs are created even when runs fail, ensuring you have visibility into all executions: + +```json +{ + "prompt_id": "abc-123", + "error": "LiteLLM connection error", + "cost_usd": null, + ... +} +``` + +## Command Reference + +### `context copilot run` + +Execute a one-off copilot run. + +**Required Arguments:** +- `--prompt TEXT` - Natural language prompt describing the task +- `--user TEXT` - Username for this run +- `--budget FLOAT` - USD budget cap (must be > 0) + +**Optional Arguments:** +- `--instructions TEXT` - Custom instructions +- `--instructions-file PATH` - Path to instructions file + +**Note:** You cannot specify both `--instructions` and `--instructions-file`. + +## Examples + +### Weekend Planner + +```bash +context copilot run \ + --prompt "build me a custom weekend planning tool" \ + --user matthew \ + --budget 0.05 +``` + +Output: +- Dashboard: `.context/copilot/{prompt_id}.md` with Activities, Costs, Notes +- Log: `.context/logs/copilot/{prompt_id}.json` + +### Data Analysis + +```bash +context copilot run \ + --prompt "analyze Q3 sales trends and identify anomalies" \ + --user sarah \ + --budget 0.10 \ + --instructions "Focus on regional variations" +``` + +## Pricing + +The CLI uses approximate pricing for token estimation: + +| Model | Input (per 1M tokens) | Output (per 1M tokens) | +|-------|----------------------|------------------------| +| gpt-4o-mini | $0.15 | $0.60 | +| gpt-4o | $2.50 | $10.00 | +| gpt-4 | $30.00 | $60.00 | + +Actual costs are calculated from usage and included in the log file. + +## Troubleshooting + +### "Virtual key not found" + +Ensure you've set the environment variable for your user: + +```bash +export CONTEXT_VIRTUAL_KEY_="sk-your-key-here" +``` + +The username should be uppercase in the environment variable name. + +### "LiteLLM connection error" + +Check that your LiteLLM proxy is running: + +```bash +curl http://localhost:4000/health +``` + +Or set `LITELLM_PROXY_URL` if using a different endpoint. + +### Budget validation error + +Budget must be greater than 0: + +```bash +--budget 0.05 # Valid +--budget 0 # Invalid +``` + +## Development + +### Running Tests + +```bash +cd core/python +python -m pytest tests/test_cli.py -v +``` + +### Test Coverage + +The CLI has comprehensive test coverage including: +- Configuration validation +- Prompt parsing +- Budget calculations +- LiteLLM integration (mocked) +- Dashboard generation +- Logging functionality