diff --git a/docs/docs.json b/docs/docs.json index 131ab74b..fabb3e50 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -29,7 +29,7 @@ "navigation": { "versions": [ { - "version": "0.4.70", + "version": "0.4.71", "groups": [ { "group": "Get Started", diff --git a/docs/index.mdx b/docs/index.mdx index 7b8c20f4..3f1d0989 100644 --- a/docs/index.mdx +++ b/docs/index.mdx @@ -5,7 +5,7 @@ icon: "book" --- -**Version 0.4.70** - Latest stable release +**Version 0.4.71** - Latest stable release diff --git a/docs/reference/cli/build.mdx b/docs/reference/cli/build.mdx index db4261a0..37ec022d 100644 --- a/docs/reference/cli/build.mdx +++ b/docs/reference/cli/build.mdx @@ -98,17 +98,24 @@ They’re recorded in the lock file as placeholders under `environment.variables Minimal structure you’ll see: ```yaml -version: "1.0" -image: "my-env:dev@sha256:..." +version: "1.2" +image: "my-env:0.1.0@sha256:..." build: generatedAt: "2025-01-01T12:00:00Z" hudVersion: "0.x.y" directory: "my-env" version: "0.1.0" sourceHash: "..." + baseImage: "python:3.11-slim" + platform: "linux/amd64" environment: initializeMs: 450 toolCount: 3 + runtime: + python: "3.11.6" + cuda: null + cudnn: null + pytorch: null variables: provided: { API_KEY: "${API_KEY}" } required: ["OTHER_KEY"] @@ -116,6 +123,7 @@ tools: - name: setup description: Initialize environment inputSchema: { type: object } + internalTools: ["board", "seed"] ``` ## Next Steps diff --git a/hud/cli/__init__.py b/hud/cli/__init__.py index 1812a223..d8dc5eca 100644 --- a/hud/cli/__init__.py +++ b/hud/cli/__init__.py @@ -397,7 +397,7 @@ def dev( new: bool = typer.Option( False, "--new", - help="Show Cursor installation link for new server setup", + help="Create a new dev trace on hud.ai (opens in browser)", ), ) -> None: """🔥 Development mode - run MCP server with hot-reload. @@ -416,6 +416,7 @@ def dev( Examples: hud dev # Auto-detect in current directory + hud dev --new # Create live dev trace on hud.ai hud dev controller # Run specific module hud dev --inspector # Launch MCP Inspector hud dev --interactive # Launch interactive testing mode @@ -439,7 +440,7 @@ def dev( watch, docker=docker, docker_args=docker_args, - new=new, + new_trace=new, ) diff --git a/hud/cli/build.py b/hud/cli/build.py index b590c4ba..b7f169a1 100644 --- a/hud/cli/build.py +++ b/hud/cli/build.py @@ -291,6 +291,121 @@ def extract_env_vars_from_dockerfile(dockerfile_path: Path) -> tuple[list[str], return required, optional +def parse_base_image(dockerfile_path: Path) -> str | None: + """Extract the base image from the first FROM directive in Dockerfile. + + For multi-stage builds, returns the image from the first FROM. Strips any + trailing AS segment. + """ + try: + if not dockerfile_path.exists(): + return None + for raw_line in dockerfile_path.read_text().splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + if line.upper().startswith("FROM "): + rest = line[5:].strip() + # Remove stage alias if present + lower = rest.lower() + if " as " in lower: + # Split using the original case string at the index of lower-case match + idx = lower.index(" as ") + rest = rest[:idx] + return rest.strip() + except Exception: + return None + return None + + +def collect_runtime_metadata(image: str, *, verbose: bool = False) -> dict[str, str | None]: + """Probe container to capture Python/CUDA/cuDNN/PyTorch versions. + + Runs a tiny Python snippet inside the built image using docker run. + """ + hud_console = HUDConsole() + + runtime_script = ( + "import json, platform\n" + "info = {'python': platform.python_version()}\n" + "try:\n" + " import torch\n" + " info['pytorch'] = getattr(torch, '__version__', None)\n" + " cuda_version = None\n" + " try:\n" + " cuda_version = getattr(getattr(torch, 'version', None), 'cuda', None)\n" + " except Exception:\n" + " cuda_version = None\n" + " if cuda_version:\n" + " info['cuda'] = cuda_version\n" + " try:\n" + " cudnn_version = torch.backends.cudnn.version()\n" + " except Exception:\n" + " cudnn_version = None\n" + " if cudnn_version:\n" + " info['cudnn'] = str(cudnn_version)\n" + "except Exception:\n" + " pass\n" + "info.setdefault('pytorch', None)\n" + "info.setdefault('cuda', None)\n" + "info.setdefault('cudnn', None)\n" + "print(json.dumps(info))\n" + ) + + for binary in ("python", "python3"): + cmd = [ + "docker", + "run", + "--rm", + image, + binary, + "-c", + runtime_script, + ] + try: + result = subprocess.run( # noqa: S603 + cmd, capture_output=True, text=True, check=False + ) + except FileNotFoundError: + return {} + + if result.returncode != 0: + if verbose: + hud_console.debug( + f"Runtime probe failed with {binary}: {result.stderr.strip() or 'no stderr'}" + ) + continue + + output = (result.stdout or "").strip() + if not output: + return {} + + try: + data = json.loads(output.splitlines()[-1]) + except json.JSONDecodeError: + if verbose: + hud_console.debug( + "Runtime probe returned non-JSON output; skipping metadata capture" + ) + return {} + + if not isinstance(data, dict): + if verbose: + hud_console.debug( + "Runtime probe returned JSON that is not an object; skipping metadata capture" + ) + return {} + + return { + "python": data.get("python"), + "cuda": data.get("cuda"), + "cudnn": data.get("cudnn"), + "pytorch": data.get("pytorch"), + } + + return {} + + async def analyze_mcp_environment( image: str, verbose: bool = False, env_vars: dict[str, str] | None = None ) -> dict[str, Any]: @@ -325,17 +440,60 @@ async def analyze_mcp_environment( initialized = True initialize_ms = int((time.time() - start_time) * 1000) - # Delegate to standard analysis helper for consistency + # Delegate to standard analysis helper full_analysis = await client.analyze_environment() - # Normalize to build's expected fields + # Normalize and enrich with internalTools if a hub map is present tools_list = full_analysis.get("tools", []) - return { + hub_map = full_analysis.get("hub_tools", {}) or full_analysis.get("hubTools", {}) + + normalized_tools: list[dict[str, Any]] = [] + internal_total = 0 + for t in tools_list: + # Extract core fields (support object or dict forms) + if hasattr(t, "name"): + name = getattr(t, "name", None) + description = getattr(t, "description", None) + input_schema = getattr(t, "inputSchema", None) + existing_internal = getattr(t, "internalTools", None) + else: + name = t.get("name") + description = t.get("description") + # accept either inputSchema or input_schema + input_schema = t.get("inputSchema") or t.get("input_schema") + # accept either internalTools or internal_tools + existing_internal = t.get("internalTools") or t.get("internal_tools") + + tool_entry: dict[str, Any] = {"name": name} + if description: + tool_entry["description"] = description + if input_schema: + tool_entry["inputSchema"] = input_schema + + # Merge internal tools: preserve any existing declaration and add hub_map[name] + merged_internal: list[str] = [] + if isinstance(existing_internal, list): + merged_internal.extend([str(x) for x in existing_internal]) + if isinstance(hub_map, dict) and name in hub_map and isinstance(hub_map[name], list): + merged_internal.extend([str(x) for x in hub_map[name]]) + if merged_internal: + # Deduplicate while preserving order + merged_internal = list(dict.fromkeys(merged_internal)) + tool_entry["internalTools"] = merged_internal + internal_total += len(merged_internal) + + normalized_tools.append(tool_entry) + + result = { "initializeMs": initialize_ms, "toolCount": len(tools_list), - "tools": tools_list, + "internalToolCount": internal_total, + "tools": normalized_tools, "success": True, } + if hub_map: + result["hub_tools"] = hub_map + return result except TimeoutError: from hud.shared.exceptions import HudException @@ -562,7 +720,9 @@ def build_environment( finally: loop.close() - hud_console.success(f"Analyzed environment: {analysis['toolCount']} tools found") + # Show analysis results including hub tools + tool_msg = f"Analyzed environment: {analysis['toolCount']} tools found" + hud_console.success(tool_msg) # Extract environment variables from Dockerfile dockerfile_path = env_dir / "Dockerfile" @@ -604,9 +764,14 @@ def build_environment( if image_tag: base_name = image_tag.split(":")[0] if ":" in image_tag else image_tag + # Collect runtime metadata and compute base image/platform + runtime_info = collect_runtime_metadata(temp_tag, verbose=verbose) + base_image = parse_base_image(dockerfile_path) + effective_platform = platform if platform is not None else "linux/amd64" + # Create lock file content with images subsection at top lock_content = { - "version": "1.1", # Lock file format version + "version": "1.2", # Lock file format version "images": { "local": f"{base_name}:{new_version}", # Local tag with version "full": None, # Will be set with digest after build @@ -619,6 +784,8 @@ def build_environment( "version": new_version, # Fast source fingerprint for change detection "sourceHash": compute_source_hash(env_dir), + "baseImage": base_image, + "platform": effective_platform, }, "environment": { "initializeMs": analysis["initializeMs"], @@ -626,6 +793,11 @@ def build_environment( }, } + if runtime_info: + lock_content["environment"]["runtime"] = runtime_info + internal_count = int(analysis.get("internalToolCount", 0) or 0) + lock_content["environment"]["internalToolCount"] = internal_count + # Add environment variables section if any exist # Include env vars from .env file as well env_vars_from_file = set(env_from_file.keys()) if env_from_file else set() @@ -662,14 +834,23 @@ def build_environment( # Add tools with full schemas for RL config generation if analysis["tools"]: - lock_content["tools"] = [ - { + tools_serialized: list[dict[str, Any]] = [] + for tool in analysis["tools"]: + entry: dict[str, Any] = { "name": tool["name"], + # Preserve legacy shape: always include description/inputSchema "description": tool.get("description", ""), "inputSchema": tool.get("inputSchema", {}), } - for tool in analysis["tools"] - ] + if tool.get("internalTools"): + entry["internalTools"] = tool.get("internalTools") + tools_serialized.append(entry) + lock_content["tools"] = tools_serialized + + # Add hub tools if present (analyze_environment returns hub_tools with snake_case) + hub_tools = analysis.get("hub_tools") or analysis.get("hubTools") + if hub_tools: + lock_content["hubTools"] = hub_tools # Write lock file lock_path = env_dir / "hud.lock.yaml" diff --git a/hud/cli/dev.py b/hud/cli/dev.py index 4b8ac174..38bccda1 100644 --- a/hud/cli/dev.py +++ b/hud/cli/dev.py @@ -28,8 +28,8 @@ def show_dev_server_info( inspector: bool, interactive: bool, env_dir: Path | None = None, - new: bool = False, docker_mode: bool = False, + telemetry: dict[str, Any] | None = None, ) -> str: """Show consistent server info for both Python and Docker modes. @@ -68,6 +68,15 @@ def show_dev_server_info( f"{hud_console.sym.ITEM} Eval API: http://localhost:{port}/eval (POST)" ) + # Show debugging URLs from telemetry + if telemetry: + if "live_url" in telemetry: + hud_console.info(f"{hud_console.sym.ITEM} Live URL: {telemetry['live_url']}") + if "vnc_url" in telemetry: + hud_console.info(f"{hud_console.sym.ITEM} VNC URL: {telemetry['vnc_url']}") + if "cdp_url" in telemetry: + hud_console.info(f"{hud_console.sym.ITEM} CDP URL: {telemetry['cdp_url']}") + # Check for VNC (browser environment) if env_dir and (env_dir / "environment" / "server.py").exists(): try: @@ -138,7 +147,7 @@ async def run_mcp_module( verbose: bool, inspector: bool, interactive: bool, - new: bool = False, + new_trace: bool = False, ) -> None: """Run an MCP module directly.""" # Check if this is a reload (not first run) @@ -236,9 +245,9 @@ async def run_mcp_module( # Show server info only on first run if not is_reload: - # Try dynamic trace first for HTTP mode (only if --new) + # Try dynamic trace first for HTTP mode (only if --new flag is set) live_trace_url: str | None = None - if transport == "http" and new: + if transport == "http" and new_trace: try: local_mcp_config: dict[str, dict[str, Any]] = { "hud": { @@ -254,11 +263,13 @@ async def run_mcp_module( build_status=False, environment_name=mcp_server.name or "mcp-server", ) + except SystemExit: + raise # Let API key requirement exits through except Exception: # noqa: S110 pass # Show UI using shared flow logic - if transport == "http" and live_trace_url and new: + if transport == "http" and live_trace_url: # Minimal UI with live trace from hud.cli.flows.dev import generate_cursor_deeplink, show_dev_ui @@ -281,7 +292,6 @@ async def run_mcp_module( inspector=inspector, interactive=interactive, env_dir=Path.cwd().parent if (Path.cwd().parent / "environment").exists() else None, - new=new, ) # Check if there's an environment backend and remind user to start it (first run only) @@ -401,7 +411,7 @@ def run_with_reload( verbose: bool, inspector: bool, interactive: bool, - new: bool = False, + new_trace: bool = False, ) -> None: """Run module with file watching and auto-reload.""" try: @@ -445,7 +455,7 @@ def handle_signal(signum: int, frame: Any) -> None: if verbose: cmd.append("--verbose") - if new: + if new_trace and is_first_run: cmd.append("--new") if verbose: @@ -519,7 +529,7 @@ def run_docker_dev_server( inspector: bool, interactive: bool, docker_args: list[str], - new: bool = False, + new_trace: bool = False, ) -> None: """Run MCP server in Docker with volume mounts, expose via local HTTP proxy.""" import atexit @@ -631,6 +641,10 @@ def signal_handler(signum: int, frame: Any) -> None: if "@" in image_name: image_name = image_name.split("@")[0] + # Extract debugging ports from lock file + debugging_ports = lock_data.get("environment", {}).get("debuggingPorts", []) + telemetry = lock_data.get("environment", {}).get("telemetry", {}) + except Exception as e: hud_console.error(f"Failed to read lock file: {e}") raise typer.Exit(1) from e @@ -661,6 +675,12 @@ def signal_handler(signum: int, frame: Any) -> None: "-e", "HUD_DEV=1", ] + + # Add debugging port mappings if available + if debugging_ports: + hud_console.info(f"Exposing debugging ports: {', '.join(map(str, debugging_ports))}") + for port_num in debugging_ports: + base_args.extend(["-p", f"{port_num}:{port_num}"]) combined_args = [*base_args, *docker_args] if docker_args else base_args docker_cmd = create_docker_run_command( image_name, @@ -676,13 +696,13 @@ def signal_handler(signum: int, frame: Any) -> None: } } - # Attempt to create dynamic trace early (before any UI) + # Attempt to create dynamic trace early (before any UI) if --new flag is set import asyncio as _asy from hud.cli.flows.dev import create_dynamic_trace, generate_cursor_deeplink, show_dev_ui live_trace_url: str | None = None - if new: + if new_trace: try: local_mcp_config: dict[str, dict[str, Any]] = { "hud": { @@ -697,11 +717,13 @@ def signal_handler(signum: int, frame: Any) -> None: environment_name=image_name, ) ) + except SystemExit: + raise # Let API key requirement exits through except Exception: # noqa: S110 pass # Show appropriate UI - if live_trace_url and new: + if live_trace_url: # Minimal UI with live trace cursor_deeplink = generate_cursor_deeplink(image_name, port) show_dev_ui( @@ -724,8 +746,8 @@ def signal_handler(signum: int, frame: Any) -> None: inspector=inspector, interactive=interactive, env_dir=env_dir, - new=new, docker_mode=True, + telemetry=telemetry, ) hud_console.dim_info( "", @@ -822,7 +844,7 @@ def run_mcp_dev_server( watch: list[str] | None, docker: bool = False, docker_args: list[str] | None = None, - new: bool = False, + new_trace: bool = False, ) -> None: """Run MCP development server with hot-reload.""" docker_args = docker_args or [] @@ -847,12 +869,12 @@ def run_mcp_dev_server( hud_console.note("Detected Dockerfile - using Docker mode with volume mounts") hud_console.dim_info("Tip", "Use 'hud dev --help' to see all options") hud_console.info("") - run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new) + run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new_trace) return # Route to Docker mode if explicitly requested if docker: - run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new) + run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new_trace) return transport = "stdio" if stdio else "http" @@ -896,6 +918,8 @@ def run_mcp_dev_server( is_child = os.environ.get("_HUD_DEV_CHILD") == "1" if is_child: - asyncio.run(run_mcp_module(module, transport, port, verbose, False, False, new)) + asyncio.run(run_mcp_module(module, transport, port, verbose, False, False, new_trace)) else: - run_with_reload(module, watch_paths, transport, port, verbose, inspector, interactive, new) + run_with_reload( + module, watch_paths, transport, port, verbose, inspector, interactive, new_trace + ) diff --git a/hud/cli/flows/dev.py b/hud/cli/flows/dev.py index 3e1ecd47..8072cf01 100644 --- a/hud/cli/flows/dev.py +++ b/hud/cli/flows/dev.py @@ -26,6 +26,7 @@ async def create_dynamic_trace( - mcp_config: points to the local MCP config (same as Cursor) - build_status: True if Docker mode (built image), False if basic Python mode - environment_name: Name of the environment/server/image + - git_info: Repository information (if available) Returns the full URL to the live trace when successful, otherwise None. """ @@ -33,17 +34,37 @@ async def create_dynamic_trace( # Endpoint TBD; use a sensible default path that the backend can wire up url = f"{api_base}/dev/dynamic-traces" + # Get git repository information + from hud.cli.utils.git import get_git_info + + git_info = get_git_info() + payload = { "mcp_config": mcp_config, "build_status": bool(build_status), "environment_name": environment_name, } - # Best-effort; if missing API key, log and continue + # Add git info if available + if git_info and git_info.get("remote_url"): + payload["git_info"] = git_info + logger.info("Detected git repository: %s", git_info.get("remote_url")) + else: + logger.info("No git repository detected") + + # Require API key for dev mode api_key = settings.api_key if not api_key: - logger.warning("Skipping dynamic trace creation; missing HUD_API_KEY") - return None, None + hud_console.error("HUD_API_KEY is required for hud dev command") + hud_console.info("") + hud_console.info("Please set your API key using one of these methods:") + hud_console.info(" 1. Set environment variable: export HUD_API_KEY=your_key") + hud_console.info(" 2. Use hud set command: hud set api_key your_key") + hud_console.info("") + hud_console.info("Get your API key at: https://hud.ai/settings") + import sys + + sys.exit(1) try: resp = await make_request("POST", url=url, json=payload, api_key=api_key) diff --git a/hud/cli/flows/tests/__init__.py b/hud/cli/flows/tests/__init__.py new file mode 100644 index 00000000..ef9800e2 --- /dev/null +++ b/hud/cli/flows/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for CLI flows.""" diff --git a/hud/cli/flows/tests/test_dev.py b/hud/cli/flows/tests/test_dev.py new file mode 100644 index 00000000..2d0a6841 --- /dev/null +++ b/hud/cli/flows/tests/test_dev.py @@ -0,0 +1,126 @@ +"""Tests for CLI flows dev module.""" + +from __future__ import annotations + +import base64 +import json +from unittest import mock + +import pytest + +from hud.cli.flows.dev import generate_cursor_deeplink + + +class TestGenerateCursorDeeplink: + """Test Cursor deeplink generation.""" + + def test_generate_deeplink_basic(self): + """Test basic deeplink generation.""" + result = generate_cursor_deeplink("my-server", 8000) + + assert result.startswith("cursor://anysphere.cursor-deeplink/mcp/install?") + assert "name=my-server" in result + assert "config=" in result + + def test_generate_deeplink_config_content(self): + """Test that config contains correct URL.""" + result = generate_cursor_deeplink("test-server", 9999) + + # Extract and decode the config + config_part = result.split("config=")[1] + decoded = base64.b64decode(config_part).decode() + config = json.loads(decoded) + + assert config["url"] == "http://localhost:9999/mcp" + + def test_generate_deeplink_different_ports(self): + """Test deeplink generation with different ports.""" + result_8000 = generate_cursor_deeplink("server", 8000) + result_3000 = generate_cursor_deeplink("server", 3000) + + # Decode configs + config_8000 = json.loads(base64.b64decode(result_8000.split("config=")[1])) + config_3000 = json.loads(base64.b64decode(result_3000.split("config=")[1])) + + assert "8000" in config_8000["url"] + assert "3000" in config_3000["url"] + + def test_generate_deeplink_special_characters_in_name(self): + """Test deeplink with special characters in server name.""" + # Server name with special characters should still work + result = generate_cursor_deeplink("my-cool_server.v2", 8000) + + assert "name=my-cool_server.v2" in result + + +class TestCreateDynamicTrace: + """Test dynamic trace creation.""" + + @pytest.mark.asyncio + @mock.patch("hud.cli.flows.dev.make_request") + @mock.patch("hud.cli.utils.git.get_git_info") + @mock.patch("hud.cli.flows.dev.settings") + async def test_create_dynamic_trace_success(self, mock_settings, mock_git, mock_request): + """Test successful trace creation.""" + from hud.cli.flows.dev import create_dynamic_trace + + mock_settings.hud_api_url = "https://api.hud.ai" + mock_settings.api_key = "test-key" + mock_git.return_value = {"remote_url": "https://github.com/user/repo"} + mock_request.return_value = {"id": "trace-123"} + + trace_id, url = await create_dynamic_trace( + mcp_config={"server": {"url": "http://localhost:8000"}}, + build_status=True, + environment_name="test-env", + ) + + assert trace_id == "trace-123" + assert url == "https://hud.ai/trace/trace-123" + mock_request.assert_called_once() + + @pytest.mark.asyncio + @mock.patch("hud.cli.flows.dev.make_request") + @mock.patch("hud.cli.utils.git.get_git_info") + @mock.patch("hud.cli.flows.dev.settings") + async def test_create_dynamic_trace_no_git(self, mock_settings, mock_git, mock_request): + """Test trace creation without git info.""" + from hud.cli.flows.dev import create_dynamic_trace + + mock_settings.hud_api_url = "https://api.hud.ai" + mock_settings.api_key = "test-key" + mock_git.return_value = {} # No remote_url + mock_request.return_value = {"id": "trace-456"} + + trace_id, _ = await create_dynamic_trace( + mcp_config={"server": {"url": "http://localhost:8000"}}, + build_status=False, + environment_name="test-env", + ) + + assert trace_id == "trace-456" + # Verify git_info was not included in payload + call_args = mock_request.call_args + assert "git_info" not in call_args.kwargs.get("json", {}) + + @pytest.mark.asyncio + @mock.patch("hud.cli.flows.dev.make_request") + @mock.patch("hud.cli.utils.git.get_git_info") + @mock.patch("hud.cli.flows.dev.settings") + async def test_create_dynamic_trace_api_error(self, mock_settings, mock_git, mock_request): + """Test trace creation when API fails.""" + from hud.cli.flows.dev import create_dynamic_trace + + mock_settings.hud_api_url = "https://api.hud.ai" + mock_settings.api_key = "test-key" + mock_git.return_value = {} + mock_request.side_effect = Exception("API Error") + + trace_id, url = await create_dynamic_trace( + mcp_config={"server": {}}, + build_status=True, + environment_name="test-env", + ) + + assert trace_id is None + assert url is None diff --git a/hud/cli/init.py b/hud/cli/init.py index d9885187..d07cb486 100644 --- a/hud/cli/init.py +++ b/hud/cli/init.py @@ -14,16 +14,15 @@ from hud.utils.hud_console import HUDConsole -# Presets mapping to environment folders in public SDK repo +# Presets mapping to public GitHub repositories under hud-evals org GITHUB_OWNER = "hud-evals" -GITHUB_REPO = "hud-python" GITHUB_BRANCH = "main" PRESET_MAP: dict[str, str | None] = { - "blank": "blank", - "deep-research": "deepresearch", - "browser": "browser", - "rubrics": "rubrics", + "blank": "hud-blank", + "deep-research": "hud-deepresearch", + "browser": "hud-browser", + "rubrics": "hud-rubrics", } SKIP_DIR_NAMES = {"node_modules", "__pycache__", "dist", "build", ".next", ".git"} @@ -90,8 +89,8 @@ def _prompt_for_preset() -> str: try: choices = [ {"name": "blank", "message": "blank"}, - {"name": "deep-research", "message": "deep-research"}, {"name": "browser", "message": "browser"}, + {"name": "deep-research", "message": "deep-research"}, {"name": "rubrics", "message": "rubrics"}, ] display_choices = [c["message"] for c in choices] @@ -108,10 +107,10 @@ def _prompt_for_preset() -> str: return "blank" -def _download_tarball_subdir( - owner: str, repo: str, ref: str, subdir: str, dest_dir: Path, files_created: list[str] +def _download_tarball_repo( + owner: str, repo: str, ref: str, dest_dir: Path, files_created: list[str] ) -> None: - """Download a GitHub tarball and extract only a subdirectory.""" + """Download a GitHub tarball and extract the entire repository.""" tarball_url = f"https://codeload.github.com/{owner}/{repo}/tar.gz/{ref}" token = os.getenv("GITHUB_TOKEN") @@ -140,16 +139,17 @@ def _download_tarball_subdir( if not members: return top = members[0].name.split("/", 1)[0] - target_prefix = f"{top}/environments/{subdir.strip('/')}" for member in members: name = member.name - if not (name == target_prefix or name.startswith(target_prefix + "/")): + if name == top: continue - rel_path = name[len(target_prefix) :].lstrip("/") + if not name.startswith(top + "/"): + continue + + rel_path = name[len(top) + 1 :] if not rel_path: - dest_dir.mkdir(parents=True, exist_ok=True) continue out_path = (dest_dir / rel_path).resolve() @@ -194,9 +194,9 @@ def create_environment( target_dir = Path.cwd() / name if directory == "." else Path(directory) / name if preset_normalized not in PRESET_MAP: + available = ", ".join(sorted(PRESET_MAP.keys())) hud_console.warning( - f"Unknown preset '{preset_normalized}', defaulting to 'blank' " - "(available: blank, deep-research, browser, rubrics)" + f"Unknown preset '{preset_normalized}', defaulting to 'blank' (available: {available})" ) preset_normalized = "blank" @@ -210,17 +210,14 @@ def create_environment( hud_console.warning(f"Overwriting existing files in {target_dir}") # Download preset from GitHub - env_folder = PRESET_MAP[preset_normalized] - if env_folder is None: - hud_console.error("Internal error: preset mapping missing folder name") + repo_name = PRESET_MAP[preset_normalized] + if repo_name is None: + hud_console.error("Internal error: preset mapping missing repo name") raise typer.Exit(1) hud_console.header(f"Initializing HUD Environment: {name} (preset: {preset_normalized})") - hud_console.section_title("Downloading template from public SDK") - source_url = ( - f"https://github.com/{GITHUB_OWNER}/{GITHUB_REPO}/tree/" - f"{GITHUB_BRANCH}/environments/{env_folder}" - ) + hud_console.section_title("Downloading template from GitHub") + source_url = f"https://github.com/{GITHUB_OWNER}/{repo_name}" hud_console.info("Source: " + source_url) target_dir.mkdir(parents=True, exist_ok=True) @@ -228,11 +225,10 @@ def create_environment( started = time.time() files_created_dl: list[str] = [] try: - _download_tarball_subdir( + _download_tarball_repo( owner=GITHUB_OWNER, - repo=GITHUB_REPO, + repo=repo_name, ref=GITHUB_BRANCH, - subdir=env_folder, dest_dir=target_dir, files_created=files_created_dl, ) diff --git a/hud/cli/tests/test_build.py b/hud/cli/tests/test_build.py index ec05b814..9a7bb77b 100644 --- a/hud/cli/tests/test_build.py +++ b/hud/cli/tests/test_build.py @@ -334,6 +334,7 @@ class TestBuildEnvironment: """Test the main build_environment function.""" @mock.patch("hud.cli.build.build_docker_image") + @mock.patch("hud.cli.build.collect_runtime_metadata") @mock.patch("hud.cli.build.analyze_mcp_environment") @mock.patch("hud.cli.build.save_to_registry") @mock.patch("hud.cli.build.get_docker_image_id") @@ -344,6 +345,7 @@ def test_build_environment_success( mock_get_id, mock_save_registry, mock_analyze, + mock_collect_runtime, mock_build_docker, tmp_path, ): @@ -378,6 +380,12 @@ def test_build_environment_success( ], } mock_get_id.return_value = "sha256:abc123" + mock_collect_runtime.return_value = { + "python": "3.11.6", + "cuda": None, + "cudnn": None, + "pytorch": None, + } # Mock final rebuild mock_result = mock.Mock() @@ -398,9 +406,76 @@ def test_build_environment_success( assert lock_data["images"]["full"] == "test-env:0.1.0@sha256:abc123" assert lock_data["images"]["local"] == "test-env:0.1.0" assert lock_data["build"]["version"] == "0.1.0" + assert lock_data["build"]["baseImage"] == "python:3.11" + assert lock_data["build"]["platform"] == "linux/amd64" assert lock_data["environment"]["toolCount"] == 2 + assert lock_data["environment"]["runtime"]["python"] == "3.11.6" assert len(lock_data["tools"]) == 2 + @mock.patch("hud.cli.build.build_docker_image") + @mock.patch("hud.cli.build.collect_runtime_metadata") + @mock.patch("hud.cli.build.analyze_mcp_environment") + @mock.patch("hud.cli.build.save_to_registry") + @mock.patch("hud.cli.build.get_docker_image_id") + @mock.patch("subprocess.run") + def test_build_environment_internal_tools( + self, + mock_run, + mock_get_id, + mock_save_registry, + mock_analyze, + mock_collect_runtime, + mock_build_docker, + tmp_path, + ): + """Dispatcher tools should include internalTools in lock, with count.""" + env_dir = tmp_path / "env-int" + env_dir.mkdir() + (env_dir / "pyproject.toml").write_text(""" +[tool.hud] +image = "test/env:dev" +""") + dockerfile = env_dir / "Dockerfile" + dockerfile.write_text(""" +FROM python:3.11 +""") + + mock_build_docker.return_value = True + mock_analyze.return_value = { + "success": True, + "toolCount": 1, + "internalToolCount": 2, + "initializeMs": 500, + "tools": [ + { + "name": "setup", + "description": "setup dispatcher", + "inputSchema": {"type": "object"}, + "internalTools": ["board", "seed"], + } + ], + } + mock_get_id.return_value = "sha256:fff111" + mock_collect_runtime.return_value = { + "python": "3.11.6", + "cuda": None, + "cudnn": None, + "pytorch": None, + } + + mock_result = mock.Mock() + mock_result.returncode = 0 + mock_run.return_value = mock_result + + build_environment(str(env_dir), "env-int:latest") + + lock_file = env_dir / "hud.lock.yaml" + with open(lock_file) as f: + data = yaml.safe_load(f) + assert data["environment"]["internalToolCount"] == 2 + assert data["tools"][0]["name"] == "setup" + assert data["tools"][0]["internalTools"] == ["board", "seed"] + def test_build_environment_no_directory(self): """Test build when directory doesn't exist.""" with pytest.raises(typer.Exit): diff --git a/hud/cli/tests/test_dev.py b/hud/cli/tests/test_dev.py new file mode 100644 index 00000000..d1027303 --- /dev/null +++ b/hud/cli/tests/test_dev.py @@ -0,0 +1,163 @@ +"""Tests for CLI dev module.""" + +from __future__ import annotations + +from unittest import mock + +from hud.cli.dev import auto_detect_module, should_use_docker_mode + + +class TestShouldUseDockerMode: + """Test Docker mode detection.""" + + def test_docker_mode_with_dockerfile(self, tmp_path): + """Test detection when Dockerfile exists.""" + dockerfile = tmp_path / "Dockerfile" + dockerfile.write_text("FROM python:3.11") + + assert should_use_docker_mode(tmp_path) is True + + def test_no_docker_mode_without_dockerfile(self, tmp_path): + """Test detection when Dockerfile doesn't exist.""" + assert should_use_docker_mode(tmp_path) is False + + def test_docker_mode_empty_dockerfile(self, tmp_path): + """Test detection with empty Dockerfile.""" + dockerfile = tmp_path / "Dockerfile" + dockerfile.write_text("") + + assert should_use_docker_mode(tmp_path) is True + + +class TestAutoDetectModule: + """Test MCP module auto-detection.""" + + def test_detect_module_from_init_with_mcpserver(self, tmp_path, monkeypatch): + """Test detection from __init__.py with MCPServer.""" + monkeypatch.chdir(tmp_path) + + init_file = tmp_path / "__init__.py" + init_file.write_text(""" +from hud.server import MCPServer +mcp = MCPServer(name='test') +""") + + module_name, extra_path = auto_detect_module() + + assert module_name == tmp_path.name + assert extra_path is None + + def test_detect_module_from_init_with_fastmcp(self, tmp_path, monkeypatch): + """Test detection from __init__.py with FastMCP.""" + monkeypatch.chdir(tmp_path) + + init_file = tmp_path / "__init__.py" + init_file.write_text(""" +from fastmcp import FastMCP +mcp = FastMCP(name='test') +""") + + module_name, extra_path = auto_detect_module() + + assert module_name == tmp_path.name + assert extra_path is None + + def test_detect_module_from_main_py(self, tmp_path, monkeypatch): + """Test detection from main.py with MCPServer.""" + monkeypatch.chdir(tmp_path) + + # Need both __init__.py and main.py + init_file = tmp_path / "__init__.py" + init_file.write_text("") + + main_file = tmp_path / "main.py" + main_file.write_text(""" +from hud.server import MCPServer +mcp = MCPServer(name='test') +""") + + module_name, extra_path = auto_detect_module() + + assert module_name == f"{tmp_path.name}.main" + assert extra_path == tmp_path.parent + + def test_no_detection_without_mcp(self, tmp_path, monkeypatch): + """Test no detection when mcp not defined.""" + monkeypatch.chdir(tmp_path) + + init_file = tmp_path / "__init__.py" + init_file.write_text("# Just a comment") + + module_name, extra_path = auto_detect_module() + + assert module_name is None + assert extra_path is None + + def test_no_detection_empty_dir(self, tmp_path, monkeypatch): + """Test no detection in empty directory.""" + monkeypatch.chdir(tmp_path) + + module_name, extra_path = auto_detect_module() + + assert module_name is None + assert extra_path is None + + +class TestShowDevServerInfo: + """Test dev server info display.""" + + @mock.patch("hud.cli.dev.hud_console") + def test_show_dev_server_info_http(self, mock_console): + """Test showing server info for HTTP transport.""" + from hud.cli.dev import show_dev_server_info + + result = show_dev_server_info( + server_name="test-server", + port=8000, + transport="http", + inspector=False, + interactive=False, + ) + + # Returns cursor deeplink + assert result.startswith("cursor://") + assert "test-server" in result + + # Console should have been called + assert mock_console.section_title.called + assert mock_console.info.called + + @mock.patch("hud.cli.dev.hud_console") + def test_show_dev_server_info_stdio(self, mock_console): + """Test showing server info for stdio transport.""" + from hud.cli.dev import show_dev_server_info + + result = show_dev_server_info( + server_name="test-server", + port=8000, + transport="stdio", + inspector=False, + interactive=False, + ) + + # Returns cursor deeplink + assert result.startswith("cursor://") + + @mock.patch("hud.cli.dev.hud_console") + def test_show_dev_server_info_with_telemetry(self, mock_console): + """Test showing server info with telemetry URLs.""" + from hud.cli.dev import show_dev_server_info + + result = show_dev_server_info( + server_name="browser-env", + port=8000, + transport="http", + inspector=False, + interactive=False, + telemetry={ + "live_url": "https://hud.ai/trace/123", + "vnc_url": "http://localhost:5900", + }, + ) + + assert result.startswith("cursor://") diff --git a/hud/cli/tests/test_init.py b/hud/cli/tests/test_init.py new file mode 100644 index 00000000..40f5889b --- /dev/null +++ b/hud/cli/tests/test_init.py @@ -0,0 +1,124 @@ +"""Tests for CLI init module.""" + +from __future__ import annotations + +from hud.cli.init import _replace_placeholders + + +class TestReplacePlaceholders: + """Test placeholder replacement in template files.""" + + def test_replace_in_pyproject(self, tmp_path): + """Test replacing placeholders in pyproject.toml.""" + # Create server directory structure + server_dir = tmp_path / "server" + server_dir.mkdir() + + pyproject = server_dir / "pyproject.toml" + pyproject.write_text(""" +[project] +name = "blank" +description = "blank environment" +""") + + modified = _replace_placeholders(tmp_path, "my-cool-env") + + # Normalize paths for cross-platform comparison + modified_normalized = [p.replace("\\", "/") for p in modified] + assert "server/pyproject.toml" in modified_normalized + content = pyproject.read_text() + assert "my_cool_env" in content + assert "blank" not in content + + def test_replace_in_readme(self, tmp_path): + """Test replacing placeholders in README.md.""" + readme = tmp_path / "README.md" + readme.write_text("# blank\n\nThis is the blank environment.") + + modified = _replace_placeholders(tmp_path, "test-env") + + assert "README.md" in modified + content = readme.read_text() + assert "test_env" in content + assert "blank" not in content + + def test_replace_in_tasks_json(self, tmp_path): + """Test replacing placeholders in tasks.json.""" + tasks = tmp_path / "tasks.json" + tasks.write_text('{"name": "blank", "tasks": []}') + + modified = _replace_placeholders(tmp_path, "my-tasks") + + assert "tasks.json" in modified + content = tasks.read_text() + assert "my_tasks" in content + + def test_no_replace_in_non_placeholder_files(self, tmp_path): + """Test that non-placeholder files are not modified.""" + other_file = tmp_path / "other.py" + other_file.write_text("# blank comment") + + modified = _replace_placeholders(tmp_path, "test") + + assert "other.py" not in modified + content = other_file.read_text() + assert "blank" in content # Should be unchanged + + def test_skip_pycache_directories(self, tmp_path): + """Test that __pycache__ directories are skipped.""" + pycache = tmp_path / "__pycache__" + pycache.mkdir() + + cached_file = pycache / "module.pyc" + cached_file.write_text("blank") + + modified = _replace_placeholders(tmp_path, "test") + + # __pycache__ files should not be in modified list + assert not any("__pycache__" in f for f in modified) + + def test_normalize_special_characters(self, tmp_path): + """Test that environment name is normalized for Python identifiers.""" + server_dir = tmp_path / "server" + server_dir.mkdir() + + pyproject = server_dir / "pyproject.toml" + pyproject.write_text('name = "blank"') + + _replace_placeholders(tmp_path, "my cool-env.v2!") + + content = pyproject.read_text() + # Special characters should be replaced with underscores + assert "my_cool_env_v2_" in content + + def test_no_changes_when_no_placeholder(self, tmp_path): + """Test that files without placeholder are not modified.""" + server_dir = tmp_path / "server" + server_dir.mkdir() + + pyproject = server_dir / "pyproject.toml" + pyproject.write_text('name = "other-name"') + + modified = _replace_placeholders(tmp_path, "test") + + assert "server/pyproject.toml" not in modified + + def test_nested_directory_structure(self, tmp_path): + """Test replacement in nested directory structure.""" + # Create nested structure + server_dir = tmp_path / "server" + server_dir.mkdir() + (server_dir / "pyproject.toml").write_text('name = "blank"') + + env_dir = tmp_path / "environment" + env_dir.mkdir() + (env_dir / "pyproject.toml").write_text('name = "blank"') + (env_dir / "README.md").write_text("# blank environment") + + modified = _replace_placeholders(tmp_path, "nested-test") + + # Normalize paths for cross-platform comparison + modified_normalized = [p.replace("\\", "/") for p in modified] + assert "server/pyproject.toml" in modified_normalized + assert "environment/pyproject.toml" in modified_normalized + assert "environment/README.md" in modified_normalized diff --git a/hud/cli/tests/test_main_module.py b/hud/cli/tests/test_main_module.py index 4034d94d..42647b8c 100644 --- a/hud/cli/tests/test_main_module.py +++ b/hud/cli/tests/test_main_module.py @@ -2,6 +2,7 @@ from __future__ import annotations +import os import subprocess import sys @@ -20,11 +21,16 @@ def test_main_module_imports_correctly(self): def test_main_module_executes(self): """Test that running the module as main executes correctly.""" # Use subprocess to run the module as __main__ and check it doesn't crash - # We expect it to show help/error since we're not providing arguments + # Use --version flag for a quick, deterministic test that doesn't require user input + env = {**os.environ, "HUD_SKIP_VERSION_CHECK": "1"} result = subprocess.run( - [sys.executable, "-m", "hud.cli"], capture_output=True, text=True, timeout=10 + [sys.executable, "-m", "hud.cli", "--version"], + capture_output=True, + text=True, + timeout=30, + env=env, ) - # Should exit with an error code but not crash - # (The actual main function will show help or error for missing args) - assert result.returncode != 0 # CLI should exit with error for no args + # Should exit successfully with version info + assert result.returncode == 0 + assert "version" in result.stdout.lower() or "hud" in result.stdout.lower() diff --git a/hud/cli/utils/git.py b/hud/cli/utils/git.py new file mode 100644 index 00000000..864e12f8 --- /dev/null +++ b/hud/cli/utils/git.py @@ -0,0 +1,136 @@ +"""Git utilities for extracting repository information.""" + +from __future__ import annotations + +import logging +import subprocess +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + + +def get_git_remote_url(cwd: Path | None = None) -> str | None: + """ + Get the git remote origin URL for the current repository. + + Args: + cwd: Working directory (defaults to current directory) + + Returns: + Git remote URL if available, None otherwise + """ + cwd = cwd or Path.cwd() + + try: + # Check if we're in a git repository + subprocess.run( + ["git", "rev-parse", "--git-dir"], # noqa: S607 + cwd=cwd, + capture_output=True, + check=True, + ) + + # Get the remote origin URL + result = subprocess.run( + ["git", "config", "--get", "remote.origin.url"], # noqa: S607 + cwd=cwd, + capture_output=True, + text=True, + check=True, + ) + + url = result.stdout.strip() + if url: + return normalize_github_url(url) + return None + + except subprocess.CalledProcessError: + # Not a git repository or no remote origin + return None + except Exception as e: + logger.debug("Error getting git remote URL: %s", e) + return None + + +def normalize_github_url(url: str) -> str: + """ + Normalize various git URL formats to standard HTTPS GitHub URL. + + Examples: + git@github.com:user/repo.git -> https://github.com/user/repo + https://github.com/user/repo.git -> https://github.com/user/repo + git://github.com/user/repo.git -> https://github.com/user/repo + + Args: + url: Git remote URL in any format + + Returns: + Normalized HTTPS GitHub URL + """ + # Remove trailing .git + if url.endswith(".git"): + url = url[:-4] + + # Handle SSH format (git@github.com:user/repo) + if url.startswith("git@github.com:"): + url = url.replace("git@github.com:", "https://github.com/") + + # Handle git:// protocol + elif url.startswith("git://"): + url = url.replace("git://", "https://") + + # Ensure HTTPS + elif not url.startswith("https://") and "github.com:" in url: + parts = url.split("github.com:") + url = f"https://github.com/{parts[1]}" + + return url + + +def get_git_info(cwd: Path | None = None) -> dict[str, Any]: + """ + Get comprehensive git repository information. + + Args: + cwd: Working directory (defaults to current directory) + + Returns: + Dictionary with git info including: + - remote_url: The remote origin URL + - branch: Current branch name + - commit: Current commit hash (short) + """ + cwd = cwd or Path.cwd() + info: dict[str, Any] = {} + + # Get remote URL + info["remote_url"] = get_git_remote_url(cwd) + + try: + # Get current branch + result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], # noqa: S607 + cwd=cwd, + capture_output=True, + text=True, + check=True, + ) + info["branch"] = result.stdout.strip() + + # Get current commit (short hash) + result = subprocess.run( + ["git", "rev-parse", "--short", "HEAD"], # noqa: S607 + cwd=cwd, + capture_output=True, + text=True, + check=True, + ) + info["commit"] = result.stdout.strip() + + except subprocess.CalledProcessError: + pass + except Exception as e: + logger.debug("Error getting git info: %s", e) + + return info diff --git a/hud/cli/utils/tests/test_git.py b/hud/cli/utils/tests/test_git.py new file mode 100644 index 00000000..fea9e153 --- /dev/null +++ b/hud/cli/utils/tests/test_git.py @@ -0,0 +1,142 @@ +"""Tests for git utilities.""" + +from __future__ import annotations + +from unittest import mock + +from hud.cli.utils.git import get_git_info, get_git_remote_url, normalize_github_url + + +class TestNormalizeGithubUrl: + """Test GitHub URL normalization.""" + + def test_normalize_ssh_url(self): + """Test normalizing SSH format URL.""" + url = "git@github.com:user/repo.git" + result = normalize_github_url(url) + assert result == "https://github.com/user/repo" + + def test_normalize_https_with_git_suffix(self): + """Test normalizing HTTPS URL with .git suffix.""" + url = "https://github.com/user/repo.git" + result = normalize_github_url(url) + assert result == "https://github.com/user/repo" + + def test_normalize_git_protocol(self): + """Test normalizing git:// protocol URL.""" + url = "git://github.com/user/repo.git" + result = normalize_github_url(url) + assert result == "https://github.com/user/repo" + + def test_normalize_already_clean(self): + """Test URL that's already normalized.""" + url = "https://github.com/user/repo" + result = normalize_github_url(url) + assert result == "https://github.com/user/repo" + + def test_normalize_with_github_com_colon(self): + """Test URL with github.com: format.""" + url = "ssh://github.com:user/repo.git" + result = normalize_github_url(url) + assert result == "https://github.com/user/repo" + + +class TestGetGitRemoteUrl: + """Test getting git remote URL.""" + + @mock.patch("subprocess.run") + def test_get_remote_url_success(self, mock_run): + """Test successfully getting remote URL.""" + # First call checks if we're in a git repo + mock_run.side_effect = [ + mock.Mock(returncode=0), # git rev-parse --git-dir + mock.Mock(returncode=0, stdout="git@github.com:user/repo.git\n"), # git config + ] + + result = get_git_remote_url() + assert result == "https://github.com/user/repo" + + @mock.patch("subprocess.run") + def test_get_remote_url_not_git_repo(self, mock_run): + """Test when not in a git repository.""" + from subprocess import CalledProcessError + + mock_run.side_effect = CalledProcessError(128, "git") + + result = get_git_remote_url() + assert result is None + + @mock.patch("subprocess.run") + def test_get_remote_url_no_remote(self, mock_run): + """Test when no remote origin exists.""" + from subprocess import CalledProcessError + + mock_run.side_effect = [ + mock.Mock(returncode=0), # git rev-parse --git-dir + CalledProcessError(1, "git"), # git config fails + ] + + result = get_git_remote_url() + assert result is None + + @mock.patch("subprocess.run") + def test_get_remote_url_empty(self, mock_run): + """Test when remote URL is empty.""" + mock_run.side_effect = [ + mock.Mock(returncode=0), + mock.Mock(returncode=0, stdout=""), + ] + + result = get_git_remote_url() + assert result is None + + +class TestGetGitInfo: + """Test getting comprehensive git info.""" + + @mock.patch("hud.cli.utils.git.get_git_remote_url") + @mock.patch("subprocess.run") + def test_get_git_info_success(self, mock_run, mock_get_url): + """Test successfully getting all git info.""" + mock_get_url.return_value = "https://github.com/user/repo" + mock_run.side_effect = [ + mock.Mock(returncode=0, stdout="main\n"), # branch + mock.Mock(returncode=0, stdout="abc1234\n"), # commit + ] + + result = get_git_info() + + assert result["remote_url"] == "https://github.com/user/repo" + assert result["branch"] == "main" + assert result["commit"] == "abc1234" + + @mock.patch("hud.cli.utils.git.get_git_remote_url") + @mock.patch("subprocess.run") + def test_get_git_info_no_remote(self, mock_run, mock_get_url): + """Test git info when no remote exists.""" + mock_get_url.return_value = None + mock_run.side_effect = [ + mock.Mock(returncode=0, stdout="feature-branch\n"), + mock.Mock(returncode=0, stdout="def5678\n"), + ] + + result = get_git_info() + + assert result["remote_url"] is None + assert result["branch"] == "feature-branch" + assert result["commit"] == "def5678" + + @mock.patch("hud.cli.utils.git.get_git_remote_url") + @mock.patch("subprocess.run") + def test_get_git_info_subprocess_error(self, mock_run, mock_get_url): + """Test git info when subprocess fails.""" + from subprocess import CalledProcessError + + mock_get_url.return_value = "https://github.com/user/repo" + mock_run.side_effect = CalledProcessError(1, "git") + + result = get_git_info() + + assert result["remote_url"] == "https://github.com/user/repo" + assert "branch" not in result + assert "commit" not in result diff --git a/hud/telemetry/async_context.py b/hud/telemetry/async_context.py index 223f621c..90a00723 100644 --- a/hud/telemetry/async_context.py +++ b/hud/telemetry/async_context.py @@ -70,6 +70,7 @@ def __init__( job_id: str | None = None, task_id: str | None = None, group_id: str | None = None, + trace_id: str | None = None, ) -> None: self.name = name self.root = root @@ -77,7 +78,7 @@ def __init__( self.job_id = job_id self.task_id = task_id self.group_id = group_id - self.task_run_id = str(uuid.uuid4()) + self.task_run_id = trace_id if trace_id else str(uuid.uuid4()) self.trace_obj = Trace(self.task_run_id, name, job_id, task_id, group_id) self._otel_trace = None @@ -260,6 +261,7 @@ def async_trace( job_id: str | None = None, task_id: str | None = None, group_id: str | None = None, + trace_id: str | None = None, ) -> AsyncTrace: """Create an async trace context for telemetry tracking. @@ -274,6 +276,7 @@ def async_trace( job_id: Optional job ID to associate with this trace task_id: Optional task ID for custom task identifiers group_id: Optional group ID to associate with this trace + trace_id: Optional trace ID (auto-generated if not provided) Returns: AsyncTrace context manager @@ -302,6 +305,7 @@ def async_trace( job_id=job_id, task_id=task_id, group_id=group_id, + trace_id=trace_id, ) diff --git a/hud/telemetry/trace.py b/hud/telemetry/trace.py index 8442240c..2aa19080 100644 --- a/hud/telemetry/trace.py +++ b/hud/telemetry/trace.py @@ -96,10 +96,11 @@ def trace( job_id: str | None = None, task_id: str | None = None, group_id: str | None = None, + trace_id: str | None = None, ) -> Generator[Trace, None, None]: """Start a HUD trace context for telemetry tracking. - A unique task_run_id is automatically generated for each trace. + A unique task_run_id is automatically generated for each trace unless provided. Args: name: Descriptive name for this trace/task @@ -108,6 +109,7 @@ def trace( job_id: Optional job ID to associate with this trace task_id: Optional task ID (for custom task identifiers) group_id: Optional group ID to associate with this trace + trace_id: Optional trace ID (auto-generated if not provided) Yields: Trace: The trace object with logging capabilities @@ -129,20 +131,24 @@ def trace( # Ensure telemetry is configured configure_telemetry() - # Only generate task_run_id if using HUD backend - # For custom OTLP backends, we don't need it - from hud.settings import get_settings - - settings = get_settings() - - if settings.telemetry_enabled and settings.api_key: - task_run_id = str(uuid.uuid4()) + # Use provided trace_id or generate one + if trace_id: + task_run_id = trace_id else: - # Use a placeholder for custom backends - logger.warning( - "HUD API key is not set, using a placeholder for the task run ID. If this looks wrong, check your API key." # noqa: E501 - ) - task_run_id = str(uuid.uuid4()) + # Only generate task_run_id if using HUD backend + # For custom OTLP backends, we don't need it + from hud.settings import get_settings + + settings = get_settings() + + if settings.telemetry_enabled and settings.api_key: + task_run_id = str(uuid.uuid4()) + else: + # Use a placeholder for custom backends + logger.warning( + "HUD API key is not set, using a placeholder for the task run ID. If this looks wrong, check your API key." # noqa: E501 + ) + task_run_id = str(uuid.uuid4()) # Create trace object trace_obj = Trace(task_run_id, name, job_id, task_id, group_id) diff --git a/hud/tests/test_datasets_extended.py b/hud/tests/test_datasets_extended.py index ff4f9a89..69afb59b 100644 --- a/hud/tests/test_datasets_extended.py +++ b/hud/tests/test_datasets_extended.py @@ -238,36 +238,23 @@ async def test_run_dataset_with_metadata(self): @pytest.mark.asyncio async def test_run_dataset_exception_handling(self): """Test exception handling during task execution.""" - # Track execution - executed_tasks = [] - - # Create mock agent instances with proper run behavior - mock_agents = [] - for i in range(3): - agent = AsyncMock() - if i == 1: # Second task should fail - agent.run.side_effect = RuntimeError("Task 2 failed") - else: - agent.run.return_value = {"result": f"success-{i + 1}"} - mock_agents.append(agent) - - # Create a mock agent class that returns our prepared instances - agent_creation_count = 0 + # Track execution by task index + executed_task_indices: set[int] = set() + # Create a mock agent class where behavior depends on the task being run def create_mock_agent(**kwargs): - nonlocal agent_creation_count - agent = mock_agents[agent_creation_count] - agent_creation_count += 1 - - # Track when run is called - original_run = agent.run + agent = AsyncMock() - async def tracked_run(*args, **kwargs): - executed_tasks.append(agent_creation_count - 1) - return await original_run(*args, **kwargs) + async def mock_run(task, **run_kwargs): + # Extract task index from prompt "Task {i}" + task_idx = int(task.prompt.split()[-1]) + executed_task_indices.add(task_idx) - agent.run = tracked_run + if task_idx == 1: # Second task (index 1) should fail + raise RuntimeError("Task 2 failed") + return {"result": f"success-{task_idx + 1}"} + agent.run = mock_run return agent # Mock the agent class itself - runner calls agent_class.create() @@ -294,8 +281,8 @@ async def tracked_run(*args, **kwargs): results = await run_dataset("error_run", tasks, mock_agent_class) # type: ignore # All tasks should be attempted - assert len(executed_tasks) == 3 - assert executed_tasks == [0, 1, 2] + assert len(executed_task_indices) == 3 + assert executed_task_indices == {0, 1, 2} # First and third should succeed assert results[0] == {"result": "success-1"} diff --git a/hud/utils/tests/test_version.py b/hud/utils/tests/test_version.py index 71b968f8..df8f2b38 100644 --- a/hud/utils/tests/test_version.py +++ b/hud/utils/tests/test_version.py @@ -5,4 +5,4 @@ def test_import(): """Test that the package can be imported.""" import hud - assert hud.__version__ == "0.4.70" + assert hud.__version__ == "0.4.71" diff --git a/hud/version.py b/hud/version.py index 24cebad9..297ec9c8 100644 --- a/hud/version.py +++ b/hud/version.py @@ -4,4 +4,4 @@ from __future__ import annotations -__version__ = "0.4.70" +__version__ = "0.4.71" diff --git a/pyproject.toml b/pyproject.toml index 49700fbd..c1b9cce4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "hud-python" -version = "0.4.70" +version = "0.4.71" description = "SDK for the HUD platform." readme = "README.md" requires-python = ">=3.11, <3.13"