diff --git a/docs/docs.json b/docs/docs.json
index 131ab74b..fabb3e50 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -29,7 +29,7 @@
"navigation": {
"versions": [
{
- "version": "0.4.70",
+ "version": "0.4.71",
"groups": [
{
"group": "Get Started",
diff --git a/docs/index.mdx b/docs/index.mdx
index 7b8c20f4..3f1d0989 100644
--- a/docs/index.mdx
+++ b/docs/index.mdx
@@ -5,7 +5,7 @@ icon: "book"
---
-**Version 0.4.70** - Latest stable release
+**Version 0.4.71** - Latest stable release
diff --git a/docs/reference/cli/build.mdx b/docs/reference/cli/build.mdx
index db4261a0..37ec022d 100644
--- a/docs/reference/cli/build.mdx
+++ b/docs/reference/cli/build.mdx
@@ -98,17 +98,24 @@ They’re recorded in the lock file as placeholders under `environment.variables
Minimal structure you’ll see:
```yaml
-version: "1.0"
-image: "my-env:dev@sha256:..."
+version: "1.2"
+image: "my-env:0.1.0@sha256:..."
build:
generatedAt: "2025-01-01T12:00:00Z"
hudVersion: "0.x.y"
directory: "my-env"
version: "0.1.0"
sourceHash: "..."
+ baseImage: "python:3.11-slim"
+ platform: "linux/amd64"
environment:
initializeMs: 450
toolCount: 3
+ runtime:
+ python: "3.11.6"
+ cuda: null
+ cudnn: null
+ pytorch: null
variables:
provided: { API_KEY: "${API_KEY}" }
required: ["OTHER_KEY"]
@@ -116,6 +123,7 @@ tools:
- name: setup
description: Initialize environment
inputSchema: { type: object }
+ internalTools: ["board", "seed"]
```
## Next Steps
diff --git a/hud/cli/__init__.py b/hud/cli/__init__.py
index 1812a223..d8dc5eca 100644
--- a/hud/cli/__init__.py
+++ b/hud/cli/__init__.py
@@ -397,7 +397,7 @@ def dev(
new: bool = typer.Option(
False,
"--new",
- help="Show Cursor installation link for new server setup",
+ help="Create a new dev trace on hud.ai (opens in browser)",
),
) -> None:
"""🔥 Development mode - run MCP server with hot-reload.
@@ -416,6 +416,7 @@ def dev(
Examples:
hud dev # Auto-detect in current directory
+ hud dev --new # Create live dev trace on hud.ai
hud dev controller # Run specific module
hud dev --inspector # Launch MCP Inspector
hud dev --interactive # Launch interactive testing mode
@@ -439,7 +440,7 @@ def dev(
watch,
docker=docker,
docker_args=docker_args,
- new=new,
+ new_trace=new,
)
diff --git a/hud/cli/build.py b/hud/cli/build.py
index b590c4ba..b7f169a1 100644
--- a/hud/cli/build.py
+++ b/hud/cli/build.py
@@ -291,6 +291,121 @@ def extract_env_vars_from_dockerfile(dockerfile_path: Path) -> tuple[list[str],
return required, optional
+def parse_base_image(dockerfile_path: Path) -> str | None:
+ """Extract the base image from the first FROM directive in Dockerfile.
+
+ For multi-stage builds, returns the image from the first FROM. Strips any
+ trailing AS segment.
+ """
+ try:
+ if not dockerfile_path.exists():
+ return None
+ for raw_line in dockerfile_path.read_text().splitlines():
+ line = raw_line.strip()
+ if not line or line.startswith("#"):
+ continue
+ if line.upper().startswith("FROM "):
+ rest = line[5:].strip()
+ # Remove stage alias if present
+ lower = rest.lower()
+ if " as " in lower:
+ # Split using the original case string at the index of lower-case match
+ idx = lower.index(" as ")
+ rest = rest[:idx]
+ return rest.strip()
+ except Exception:
+ return None
+ return None
+
+
+def collect_runtime_metadata(image: str, *, verbose: bool = False) -> dict[str, str | None]:
+ """Probe container to capture Python/CUDA/cuDNN/PyTorch versions.
+
+ Runs a tiny Python snippet inside the built image using docker run.
+ """
+ hud_console = HUDConsole()
+
+ runtime_script = (
+ "import json, platform\n"
+ "info = {'python': platform.python_version()}\n"
+ "try:\n"
+ " import torch\n"
+ " info['pytorch'] = getattr(torch, '__version__', None)\n"
+ " cuda_version = None\n"
+ " try:\n"
+ " cuda_version = getattr(getattr(torch, 'version', None), 'cuda', None)\n"
+ " except Exception:\n"
+ " cuda_version = None\n"
+ " if cuda_version:\n"
+ " info['cuda'] = cuda_version\n"
+ " try:\n"
+ " cudnn_version = torch.backends.cudnn.version()\n"
+ " except Exception:\n"
+ " cudnn_version = None\n"
+ " if cudnn_version:\n"
+ " info['cudnn'] = str(cudnn_version)\n"
+ "except Exception:\n"
+ " pass\n"
+ "info.setdefault('pytorch', None)\n"
+ "info.setdefault('cuda', None)\n"
+ "info.setdefault('cudnn', None)\n"
+ "print(json.dumps(info))\n"
+ )
+
+ for binary in ("python", "python3"):
+ cmd = [
+ "docker",
+ "run",
+ "--rm",
+ image,
+ binary,
+ "-c",
+ runtime_script,
+ ]
+ try:
+ result = subprocess.run( # noqa: S603
+ cmd, capture_output=True, text=True, check=False
+ )
+ except FileNotFoundError:
+ return {}
+
+ if result.returncode != 0:
+ if verbose:
+ hud_console.debug(
+ f"Runtime probe failed with {binary}: {result.stderr.strip() or 'no stderr'}"
+ )
+ continue
+
+ output = (result.stdout or "").strip()
+ if not output:
+ return {}
+
+ try:
+ data = json.loads(output.splitlines()[-1])
+ except json.JSONDecodeError:
+ if verbose:
+ hud_console.debug(
+ "Runtime probe returned non-JSON output; skipping metadata capture"
+ )
+ return {}
+
+ if not isinstance(data, dict):
+ if verbose:
+ hud_console.debug(
+ "Runtime probe returned JSON that is not an object; skipping metadata capture"
+ )
+ return {}
+
+ return {
+ "python": data.get("python"),
+ "cuda": data.get("cuda"),
+ "cudnn": data.get("cudnn"),
+ "pytorch": data.get("pytorch"),
+ }
+
+ return {}
+
+
async def analyze_mcp_environment(
image: str, verbose: bool = False, env_vars: dict[str, str] | None = None
) -> dict[str, Any]:
@@ -325,17 +440,60 @@ async def analyze_mcp_environment(
initialized = True
initialize_ms = int((time.time() - start_time) * 1000)
- # Delegate to standard analysis helper for consistency
+ # Delegate to standard analysis helper
full_analysis = await client.analyze_environment()
- # Normalize to build's expected fields
+ # Normalize and enrich with internalTools if a hub map is present
tools_list = full_analysis.get("tools", [])
- return {
+ hub_map = full_analysis.get("hub_tools", {}) or full_analysis.get("hubTools", {})
+
+ normalized_tools: list[dict[str, Any]] = []
+ internal_total = 0
+ for t in tools_list:
+ # Extract core fields (support object or dict forms)
+ if hasattr(t, "name"):
+ name = getattr(t, "name", None)
+ description = getattr(t, "description", None)
+ input_schema = getattr(t, "inputSchema", None)
+ existing_internal = getattr(t, "internalTools", None)
+ else:
+ name = t.get("name")
+ description = t.get("description")
+ # accept either inputSchema or input_schema
+ input_schema = t.get("inputSchema") or t.get("input_schema")
+ # accept either internalTools or internal_tools
+ existing_internal = t.get("internalTools") or t.get("internal_tools")
+
+ tool_entry: dict[str, Any] = {"name": name}
+ if description:
+ tool_entry["description"] = description
+ if input_schema:
+ tool_entry["inputSchema"] = input_schema
+
+ # Merge internal tools: preserve any existing declaration and add hub_map[name]
+ merged_internal: list[str] = []
+ if isinstance(existing_internal, list):
+ merged_internal.extend([str(x) for x in existing_internal])
+ if isinstance(hub_map, dict) and name in hub_map and isinstance(hub_map[name], list):
+ merged_internal.extend([str(x) for x in hub_map[name]])
+ if merged_internal:
+ # Deduplicate while preserving order
+ merged_internal = list(dict.fromkeys(merged_internal))
+ tool_entry["internalTools"] = merged_internal
+ internal_total += len(merged_internal)
+
+ normalized_tools.append(tool_entry)
+
+ result = {
"initializeMs": initialize_ms,
"toolCount": len(tools_list),
- "tools": tools_list,
+ "internalToolCount": internal_total,
+ "tools": normalized_tools,
"success": True,
}
+ if hub_map:
+ result["hub_tools"] = hub_map
+ return result
except TimeoutError:
from hud.shared.exceptions import HudException
@@ -562,7 +720,9 @@ def build_environment(
finally:
loop.close()
- hud_console.success(f"Analyzed environment: {analysis['toolCount']} tools found")
+ # Show analysis results including hub tools
+ tool_msg = f"Analyzed environment: {analysis['toolCount']} tools found"
+ hud_console.success(tool_msg)
# Extract environment variables from Dockerfile
dockerfile_path = env_dir / "Dockerfile"
@@ -604,9 +764,14 @@ def build_environment(
if image_tag:
base_name = image_tag.split(":")[0] if ":" in image_tag else image_tag
+ # Collect runtime metadata and compute base image/platform
+ runtime_info = collect_runtime_metadata(temp_tag, verbose=verbose)
+ base_image = parse_base_image(dockerfile_path)
+ effective_platform = platform if platform is not None else "linux/amd64"
+
# Create lock file content with images subsection at top
lock_content = {
- "version": "1.1", # Lock file format version
+ "version": "1.2", # Lock file format version
"images": {
"local": f"{base_name}:{new_version}", # Local tag with version
"full": None, # Will be set with digest after build
@@ -619,6 +784,8 @@ def build_environment(
"version": new_version,
# Fast source fingerprint for change detection
"sourceHash": compute_source_hash(env_dir),
+ "baseImage": base_image,
+ "platform": effective_platform,
},
"environment": {
"initializeMs": analysis["initializeMs"],
@@ -626,6 +793,11 @@ def build_environment(
},
}
+ if runtime_info:
+ lock_content["environment"]["runtime"] = runtime_info
+ internal_count = int(analysis.get("internalToolCount", 0) or 0)
+ lock_content["environment"]["internalToolCount"] = internal_count
+
# Add environment variables section if any exist
# Include env vars from .env file as well
env_vars_from_file = set(env_from_file.keys()) if env_from_file else set()
@@ -662,14 +834,23 @@ def build_environment(
# Add tools with full schemas for RL config generation
if analysis["tools"]:
- lock_content["tools"] = [
- {
+ tools_serialized: list[dict[str, Any]] = []
+ for tool in analysis["tools"]:
+ entry: dict[str, Any] = {
"name": tool["name"],
+ # Preserve legacy shape: always include description/inputSchema
"description": tool.get("description", ""),
"inputSchema": tool.get("inputSchema", {}),
}
- for tool in analysis["tools"]
- ]
+ if tool.get("internalTools"):
+ entry["internalTools"] = tool.get("internalTools")
+ tools_serialized.append(entry)
+ lock_content["tools"] = tools_serialized
+
+ # Add hub tools if present (analyze_environment returns hub_tools with snake_case)
+ hub_tools = analysis.get("hub_tools") or analysis.get("hubTools")
+ if hub_tools:
+ lock_content["hubTools"] = hub_tools
# Write lock file
lock_path = env_dir / "hud.lock.yaml"
diff --git a/hud/cli/dev.py b/hud/cli/dev.py
index 4b8ac174..38bccda1 100644
--- a/hud/cli/dev.py
+++ b/hud/cli/dev.py
@@ -28,8 +28,8 @@ def show_dev_server_info(
inspector: bool,
interactive: bool,
env_dir: Path | None = None,
- new: bool = False,
docker_mode: bool = False,
+ telemetry: dict[str, Any] | None = None,
) -> str:
"""Show consistent server info for both Python and Docker modes.
@@ -68,6 +68,15 @@ def show_dev_server_info(
f"{hud_console.sym.ITEM} Eval API: http://localhost:{port}/eval (POST)"
)
+ # Show debugging URLs from telemetry
+ if telemetry:
+ if "live_url" in telemetry:
+ hud_console.info(f"{hud_console.sym.ITEM} Live URL: {telemetry['live_url']}")
+ if "vnc_url" in telemetry:
+ hud_console.info(f"{hud_console.sym.ITEM} VNC URL: {telemetry['vnc_url']}")
+ if "cdp_url" in telemetry:
+ hud_console.info(f"{hud_console.sym.ITEM} CDP URL: {telemetry['cdp_url']}")
+
# Check for VNC (browser environment)
if env_dir and (env_dir / "environment" / "server.py").exists():
try:
@@ -138,7 +147,7 @@ async def run_mcp_module(
verbose: bool,
inspector: bool,
interactive: bool,
- new: bool = False,
+ new_trace: bool = False,
) -> None:
"""Run an MCP module directly."""
# Check if this is a reload (not first run)
@@ -236,9 +245,9 @@ async def run_mcp_module(
# Show server info only on first run
if not is_reload:
- # Try dynamic trace first for HTTP mode (only if --new)
+ # Try dynamic trace first for HTTP mode (only if --new flag is set)
live_trace_url: str | None = None
- if transport == "http" and new:
+ if transport == "http" and new_trace:
try:
local_mcp_config: dict[str, dict[str, Any]] = {
"hud": {
@@ -254,11 +263,13 @@ async def run_mcp_module(
build_status=False,
environment_name=mcp_server.name or "mcp-server",
)
+ except SystemExit:
+ raise # Let API key requirement exits through
except Exception: # noqa: S110
pass
# Show UI using shared flow logic
- if transport == "http" and live_trace_url and new:
+ if transport == "http" and live_trace_url:
# Minimal UI with live trace
from hud.cli.flows.dev import generate_cursor_deeplink, show_dev_ui
@@ -281,7 +292,6 @@ async def run_mcp_module(
inspector=inspector,
interactive=interactive,
env_dir=Path.cwd().parent if (Path.cwd().parent / "environment").exists() else None,
- new=new,
)
# Check if there's an environment backend and remind user to start it (first run only)
@@ -401,7 +411,7 @@ def run_with_reload(
verbose: bool,
inspector: bool,
interactive: bool,
- new: bool = False,
+ new_trace: bool = False,
) -> None:
"""Run module with file watching and auto-reload."""
try:
@@ -445,7 +455,7 @@ def handle_signal(signum: int, frame: Any) -> None:
if verbose:
cmd.append("--verbose")
- if new:
+ if new_trace and is_first_run:
cmd.append("--new")
if verbose:
@@ -519,7 +529,7 @@ def run_docker_dev_server(
inspector: bool,
interactive: bool,
docker_args: list[str],
- new: bool = False,
+ new_trace: bool = False,
) -> None:
"""Run MCP server in Docker with volume mounts, expose via local HTTP proxy."""
import atexit
@@ -631,6 +641,10 @@ def signal_handler(signum: int, frame: Any) -> None:
if "@" in image_name:
image_name = image_name.split("@")[0]
+ # Extract debugging ports from lock file
+ debugging_ports = lock_data.get("environment", {}).get("debuggingPorts", [])
+ telemetry = lock_data.get("environment", {}).get("telemetry", {})
+
except Exception as e:
hud_console.error(f"Failed to read lock file: {e}")
raise typer.Exit(1) from e
@@ -661,6 +675,12 @@ def signal_handler(signum: int, frame: Any) -> None:
"-e",
"HUD_DEV=1",
]
+
+ # Add debugging port mappings if available
+ if debugging_ports:
+ hud_console.info(f"Exposing debugging ports: {', '.join(map(str, debugging_ports))}")
+ for port_num in debugging_ports:
+ base_args.extend(["-p", f"{port_num}:{port_num}"])
combined_args = [*base_args, *docker_args] if docker_args else base_args
docker_cmd = create_docker_run_command(
image_name,
@@ -676,13 +696,13 @@ def signal_handler(signum: int, frame: Any) -> None:
}
}
- # Attempt to create dynamic trace early (before any UI)
+ # Attempt to create dynamic trace early (before any UI) if --new flag is set
import asyncio as _asy
from hud.cli.flows.dev import create_dynamic_trace, generate_cursor_deeplink, show_dev_ui
live_trace_url: str | None = None
- if new:
+ if new_trace:
try:
local_mcp_config: dict[str, dict[str, Any]] = {
"hud": {
@@ -697,11 +717,13 @@ def signal_handler(signum: int, frame: Any) -> None:
environment_name=image_name,
)
)
+ except SystemExit:
+ raise # Let API key requirement exits through
except Exception: # noqa: S110
pass
# Show appropriate UI
- if live_trace_url and new:
+ if live_trace_url:
# Minimal UI with live trace
cursor_deeplink = generate_cursor_deeplink(image_name, port)
show_dev_ui(
@@ -724,8 +746,8 @@ def signal_handler(signum: int, frame: Any) -> None:
inspector=inspector,
interactive=interactive,
env_dir=env_dir,
- new=new,
docker_mode=True,
+ telemetry=telemetry,
)
hud_console.dim_info(
"",
@@ -822,7 +844,7 @@ def run_mcp_dev_server(
watch: list[str] | None,
docker: bool = False,
docker_args: list[str] | None = None,
- new: bool = False,
+ new_trace: bool = False,
) -> None:
"""Run MCP development server with hot-reload."""
docker_args = docker_args or []
@@ -847,12 +869,12 @@ def run_mcp_dev_server(
hud_console.note("Detected Dockerfile - using Docker mode with volume mounts")
hud_console.dim_info("Tip", "Use 'hud dev --help' to see all options")
hud_console.info("")
- run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new)
+ run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new_trace)
return
# Route to Docker mode if explicitly requested
if docker:
- run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new)
+ run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new_trace)
return
transport = "stdio" if stdio else "http"
@@ -896,6 +918,8 @@ def run_mcp_dev_server(
is_child = os.environ.get("_HUD_DEV_CHILD") == "1"
if is_child:
- asyncio.run(run_mcp_module(module, transport, port, verbose, False, False, new))
+ asyncio.run(run_mcp_module(module, transport, port, verbose, False, False, new_trace))
else:
- run_with_reload(module, watch_paths, transport, port, verbose, inspector, interactive, new)
+ run_with_reload(
+ module, watch_paths, transport, port, verbose, inspector, interactive, new_trace
+ )
diff --git a/hud/cli/flows/dev.py b/hud/cli/flows/dev.py
index 3e1ecd47..8072cf01 100644
--- a/hud/cli/flows/dev.py
+++ b/hud/cli/flows/dev.py
@@ -26,6 +26,7 @@ async def create_dynamic_trace(
- mcp_config: points to the local MCP config (same as Cursor)
- build_status: True if Docker mode (built image), False if basic Python mode
- environment_name: Name of the environment/server/image
+ - git_info: Repository information (if available)
Returns the full URL to the live trace when successful, otherwise None.
"""
@@ -33,17 +34,37 @@ async def create_dynamic_trace(
# Endpoint TBD; use a sensible default path that the backend can wire up
url = f"{api_base}/dev/dynamic-traces"
+ # Get git repository information
+ from hud.cli.utils.git import get_git_info
+
+ git_info = get_git_info()
+
payload = {
"mcp_config": mcp_config,
"build_status": bool(build_status),
"environment_name": environment_name,
}
- # Best-effort; if missing API key, log and continue
+ # Add git info if available
+ if git_info and git_info.get("remote_url"):
+ payload["git_info"] = git_info
+ logger.info("Detected git repository: %s", git_info.get("remote_url"))
+ else:
+ logger.info("No git repository detected")
+
+ # Require API key for dev mode
api_key = settings.api_key
if not api_key:
- logger.warning("Skipping dynamic trace creation; missing HUD_API_KEY")
- return None, None
+ hud_console.error("HUD_API_KEY is required for hud dev command")
+ hud_console.info("")
+ hud_console.info("Please set your API key using one of these methods:")
+ hud_console.info(" 1. Set environment variable: export HUD_API_KEY=your_key")
+ hud_console.info(" 2. Use hud set command: hud set api_key your_key")
+ hud_console.info("")
+ hud_console.info("Get your API key at: https://hud.ai/settings")
+ import sys
+
+ sys.exit(1)
try:
resp = await make_request("POST", url=url, json=payload, api_key=api_key)
diff --git a/hud/cli/flows/tests/__init__.py b/hud/cli/flows/tests/__init__.py
new file mode 100644
index 00000000..ef9800e2
--- /dev/null
+++ b/hud/cli/flows/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for CLI flows."""
diff --git a/hud/cli/flows/tests/test_dev.py b/hud/cli/flows/tests/test_dev.py
new file mode 100644
index 00000000..2d0a6841
--- /dev/null
+++ b/hud/cli/flows/tests/test_dev.py
@@ -0,0 +1,126 @@
+"""Tests for CLI flows dev module."""
+
+from __future__ import annotations
+
+import base64
+import json
+from unittest import mock
+
+import pytest
+
+from hud.cli.flows.dev import generate_cursor_deeplink
+
+
+class TestGenerateCursorDeeplink:
+ """Test Cursor deeplink generation."""
+
+ def test_generate_deeplink_basic(self):
+ """Test basic deeplink generation."""
+ result = generate_cursor_deeplink("my-server", 8000)
+
+ assert result.startswith("cursor://anysphere.cursor-deeplink/mcp/install?")
+ assert "name=my-server" in result
+ assert "config=" in result
+
+ def test_generate_deeplink_config_content(self):
+ """Test that config contains correct URL."""
+ result = generate_cursor_deeplink("test-server", 9999)
+
+ # Extract and decode the config
+ config_part = result.split("config=")[1]
+ decoded = base64.b64decode(config_part).decode()
+ config = json.loads(decoded)
+
+ assert config["url"] == "http://localhost:9999/mcp"
+
+ def test_generate_deeplink_different_ports(self):
+ """Test deeplink generation with different ports."""
+ result_8000 = generate_cursor_deeplink("server", 8000)
+ result_3000 = generate_cursor_deeplink("server", 3000)
+
+ # Decode configs
+ config_8000 = json.loads(base64.b64decode(result_8000.split("config=")[1]))
+ config_3000 = json.loads(base64.b64decode(result_3000.split("config=")[1]))
+
+ assert "8000" in config_8000["url"]
+ assert "3000" in config_3000["url"]
+
+ def test_generate_deeplink_special_characters_in_name(self):
+ """Test deeplink with special characters in server name."""
+ # Server name with special characters should still work
+ result = generate_cursor_deeplink("my-cool_server.v2", 8000)
+
+ assert "name=my-cool_server.v2" in result
+
+
+class TestCreateDynamicTrace:
+ """Test dynamic trace creation."""
+
+ @pytest.mark.asyncio
+ @mock.patch("hud.cli.flows.dev.make_request")
+ @mock.patch("hud.cli.utils.git.get_git_info")
+ @mock.patch("hud.cli.flows.dev.settings")
+ async def test_create_dynamic_trace_success(self, mock_settings, mock_git, mock_request):
+ """Test successful trace creation."""
+ from hud.cli.flows.dev import create_dynamic_trace
+
+ mock_settings.hud_api_url = "https://api.hud.ai"
+ mock_settings.api_key = "test-key"
+ mock_git.return_value = {"remote_url": "https://github.com/user/repo"}
+ mock_request.return_value = {"id": "trace-123"}
+
+ trace_id, url = await create_dynamic_trace(
+ mcp_config={"server": {"url": "http://localhost:8000"}},
+ build_status=True,
+ environment_name="test-env",
+ )
+
+ assert trace_id == "trace-123"
+ assert url == "https://hud.ai/trace/trace-123"
+ mock_request.assert_called_once()
+
+ @pytest.mark.asyncio
+ @mock.patch("hud.cli.flows.dev.make_request")
+ @mock.patch("hud.cli.utils.git.get_git_info")
+ @mock.patch("hud.cli.flows.dev.settings")
+ async def test_create_dynamic_trace_no_git(self, mock_settings, mock_git, mock_request):
+ """Test trace creation without git info."""
+ from hud.cli.flows.dev import create_dynamic_trace
+
+ mock_settings.hud_api_url = "https://api.hud.ai"
+ mock_settings.api_key = "test-key"
+ mock_git.return_value = {} # No remote_url
+ mock_request.return_value = {"id": "trace-456"}
+
+ trace_id, _ = await create_dynamic_trace(
+ mcp_config={"server": {"url": "http://localhost:8000"}},
+ build_status=False,
+ environment_name="test-env",
+ )
+
+ assert trace_id == "trace-456"
+ # Verify git_info was not included in payload
+ call_args = mock_request.call_args
+ assert "git_info" not in call_args.kwargs.get("json", {})
+
+ @pytest.mark.asyncio
+ @mock.patch("hud.cli.flows.dev.make_request")
+ @mock.patch("hud.cli.utils.git.get_git_info")
+ @mock.patch("hud.cli.flows.dev.settings")
+ async def test_create_dynamic_trace_api_error(self, mock_settings, mock_git, mock_request):
+ """Test trace creation when API fails."""
+ from hud.cli.flows.dev import create_dynamic_trace
+
+ mock_settings.hud_api_url = "https://api.hud.ai"
+ mock_settings.api_key = "test-key"
+ mock_git.return_value = {}
+ mock_request.side_effect = Exception("API Error")
+
+ trace_id, url = await create_dynamic_trace(
+ mcp_config={"server": {}},
+ build_status=True,
+ environment_name="test-env",
+ )
+
+ assert trace_id is None
+ assert url is None
diff --git a/hud/cli/init.py b/hud/cli/init.py
index d9885187..d07cb486 100644
--- a/hud/cli/init.py
+++ b/hud/cli/init.py
@@ -14,16 +14,15 @@
from hud.utils.hud_console import HUDConsole
-# Presets mapping to environment folders in public SDK repo
+# Presets mapping to public GitHub repositories under hud-evals org
GITHUB_OWNER = "hud-evals"
-GITHUB_REPO = "hud-python"
GITHUB_BRANCH = "main"
PRESET_MAP: dict[str, str | None] = {
- "blank": "blank",
- "deep-research": "deepresearch",
- "browser": "browser",
- "rubrics": "rubrics",
+ "blank": "hud-blank",
+ "deep-research": "hud-deepresearch",
+ "browser": "hud-browser",
+ "rubrics": "hud-rubrics",
}
SKIP_DIR_NAMES = {"node_modules", "__pycache__", "dist", "build", ".next", ".git"}
@@ -90,8 +89,8 @@ def _prompt_for_preset() -> str:
try:
choices = [
{"name": "blank", "message": "blank"},
- {"name": "deep-research", "message": "deep-research"},
{"name": "browser", "message": "browser"},
+ {"name": "deep-research", "message": "deep-research"},
{"name": "rubrics", "message": "rubrics"},
]
display_choices = [c["message"] for c in choices]
@@ -108,10 +107,10 @@ def _prompt_for_preset() -> str:
return "blank"
-def _download_tarball_subdir(
- owner: str, repo: str, ref: str, subdir: str, dest_dir: Path, files_created: list[str]
+def _download_tarball_repo(
+ owner: str, repo: str, ref: str, dest_dir: Path, files_created: list[str]
) -> None:
- """Download a GitHub tarball and extract only a subdirectory."""
+ """Download a GitHub tarball and extract the entire repository."""
tarball_url = f"https://codeload.github.com/{owner}/{repo}/tar.gz/{ref}"
token = os.getenv("GITHUB_TOKEN")
@@ -140,16 +139,17 @@ def _download_tarball_subdir(
if not members:
return
top = members[0].name.split("/", 1)[0]
- target_prefix = f"{top}/environments/{subdir.strip('/')}"
for member in members:
name = member.name
- if not (name == target_prefix or name.startswith(target_prefix + "/")):
+ if name == top:
continue
- rel_path = name[len(target_prefix) :].lstrip("/")
+ if not name.startswith(top + "/"):
+ continue
+
+ rel_path = name[len(top) + 1 :]
if not rel_path:
- dest_dir.mkdir(parents=True, exist_ok=True)
continue
out_path = (dest_dir / rel_path).resolve()
@@ -194,9 +194,9 @@ def create_environment(
target_dir = Path.cwd() / name if directory == "." else Path(directory) / name
if preset_normalized not in PRESET_MAP:
+ available = ", ".join(sorted(PRESET_MAP.keys()))
hud_console.warning(
- f"Unknown preset '{preset_normalized}', defaulting to 'blank' "
- "(available: blank, deep-research, browser, rubrics)"
+ f"Unknown preset '{preset_normalized}', defaulting to 'blank' (available: {available})"
)
preset_normalized = "blank"
@@ -210,17 +210,14 @@ def create_environment(
hud_console.warning(f"Overwriting existing files in {target_dir}")
# Download preset from GitHub
- env_folder = PRESET_MAP[preset_normalized]
- if env_folder is None:
- hud_console.error("Internal error: preset mapping missing folder name")
+ repo_name = PRESET_MAP[preset_normalized]
+ if repo_name is None:
+ hud_console.error("Internal error: preset mapping missing repo name")
raise typer.Exit(1)
hud_console.header(f"Initializing HUD Environment: {name} (preset: {preset_normalized})")
- hud_console.section_title("Downloading template from public SDK")
- source_url = (
- f"https://github.com/{GITHUB_OWNER}/{GITHUB_REPO}/tree/"
- f"{GITHUB_BRANCH}/environments/{env_folder}"
- )
+ hud_console.section_title("Downloading template from GitHub")
+ source_url = f"https://github.com/{GITHUB_OWNER}/{repo_name}"
hud_console.info("Source: " + source_url)
target_dir.mkdir(parents=True, exist_ok=True)
@@ -228,11 +225,10 @@ def create_environment(
started = time.time()
files_created_dl: list[str] = []
try:
- _download_tarball_subdir(
+ _download_tarball_repo(
owner=GITHUB_OWNER,
- repo=GITHUB_REPO,
+ repo=repo_name,
ref=GITHUB_BRANCH,
- subdir=env_folder,
dest_dir=target_dir,
files_created=files_created_dl,
)
diff --git a/hud/cli/tests/test_build.py b/hud/cli/tests/test_build.py
index ec05b814..9a7bb77b 100644
--- a/hud/cli/tests/test_build.py
+++ b/hud/cli/tests/test_build.py
@@ -334,6 +334,7 @@ class TestBuildEnvironment:
"""Test the main build_environment function."""
@mock.patch("hud.cli.build.build_docker_image")
+ @mock.patch("hud.cli.build.collect_runtime_metadata")
@mock.patch("hud.cli.build.analyze_mcp_environment")
@mock.patch("hud.cli.build.save_to_registry")
@mock.patch("hud.cli.build.get_docker_image_id")
@@ -344,6 +345,7 @@ def test_build_environment_success(
mock_get_id,
mock_save_registry,
mock_analyze,
+ mock_collect_runtime,
mock_build_docker,
tmp_path,
):
@@ -378,6 +380,12 @@ def test_build_environment_success(
],
}
mock_get_id.return_value = "sha256:abc123"
+ mock_collect_runtime.return_value = {
+ "python": "3.11.6",
+ "cuda": None,
+ "cudnn": None,
+ "pytorch": None,
+ }
# Mock final rebuild
mock_result = mock.Mock()
@@ -398,9 +406,76 @@ def test_build_environment_success(
assert lock_data["images"]["full"] == "test-env:0.1.0@sha256:abc123"
assert lock_data["images"]["local"] == "test-env:0.1.0"
assert lock_data["build"]["version"] == "0.1.0"
+ assert lock_data["build"]["baseImage"] == "python:3.11"
+ assert lock_data["build"]["platform"] == "linux/amd64"
assert lock_data["environment"]["toolCount"] == 2
+ assert lock_data["environment"]["runtime"]["python"] == "3.11.6"
assert len(lock_data["tools"]) == 2
+ @mock.patch("hud.cli.build.build_docker_image")
+ @mock.patch("hud.cli.build.collect_runtime_metadata")
+ @mock.patch("hud.cli.build.analyze_mcp_environment")
+ @mock.patch("hud.cli.build.save_to_registry")
+ @mock.patch("hud.cli.build.get_docker_image_id")
+ @mock.patch("subprocess.run")
+ def test_build_environment_internal_tools(
+ self,
+ mock_run,
+ mock_get_id,
+ mock_save_registry,
+ mock_analyze,
+ mock_collect_runtime,
+ mock_build_docker,
+ tmp_path,
+ ):
+ """Dispatcher tools should include internalTools in lock, with count."""
+ env_dir = tmp_path / "env-int"
+ env_dir.mkdir()
+ (env_dir / "pyproject.toml").write_text("""
+[tool.hud]
+image = "test/env:dev"
+""")
+ dockerfile = env_dir / "Dockerfile"
+ dockerfile.write_text("""
+FROM python:3.11
+""")
+
+ mock_build_docker.return_value = True
+ mock_analyze.return_value = {
+ "success": True,
+ "toolCount": 1,
+ "internalToolCount": 2,
+ "initializeMs": 500,
+ "tools": [
+ {
+ "name": "setup",
+ "description": "setup dispatcher",
+ "inputSchema": {"type": "object"},
+ "internalTools": ["board", "seed"],
+ }
+ ],
+ }
+ mock_get_id.return_value = "sha256:fff111"
+ mock_collect_runtime.return_value = {
+ "python": "3.11.6",
+ "cuda": None,
+ "cudnn": None,
+ "pytorch": None,
+ }
+
+ mock_result = mock.Mock()
+ mock_result.returncode = 0
+ mock_run.return_value = mock_result
+
+ build_environment(str(env_dir), "env-int:latest")
+
+ lock_file = env_dir / "hud.lock.yaml"
+ with open(lock_file) as f:
+ data = yaml.safe_load(f)
+ assert data["environment"]["internalToolCount"] == 2
+ assert data["tools"][0]["name"] == "setup"
+ assert data["tools"][0]["internalTools"] == ["board", "seed"]
+
def test_build_environment_no_directory(self):
"""Test build when directory doesn't exist."""
with pytest.raises(typer.Exit):
diff --git a/hud/cli/tests/test_dev.py b/hud/cli/tests/test_dev.py
new file mode 100644
index 00000000..d1027303
--- /dev/null
+++ b/hud/cli/tests/test_dev.py
@@ -0,0 +1,163 @@
+"""Tests for CLI dev module."""
+
+from __future__ import annotations
+
+from unittest import mock
+
+from hud.cli.dev import auto_detect_module, should_use_docker_mode
+
+
+class TestShouldUseDockerMode:
+ """Test Docker mode detection."""
+
+ def test_docker_mode_with_dockerfile(self, tmp_path):
+ """Test detection when Dockerfile exists."""
+ dockerfile = tmp_path / "Dockerfile"
+ dockerfile.write_text("FROM python:3.11")
+
+ assert should_use_docker_mode(tmp_path) is True
+
+ def test_no_docker_mode_without_dockerfile(self, tmp_path):
+ """Test detection when Dockerfile doesn't exist."""
+ assert should_use_docker_mode(tmp_path) is False
+
+ def test_docker_mode_empty_dockerfile(self, tmp_path):
+ """Test detection with empty Dockerfile."""
+ dockerfile = tmp_path / "Dockerfile"
+ dockerfile.write_text("")
+
+ assert should_use_docker_mode(tmp_path) is True
+
+
+class TestAutoDetectModule:
+ """Test MCP module auto-detection."""
+
+ def test_detect_module_from_init_with_mcpserver(self, tmp_path, monkeypatch):
+ """Test detection from __init__.py with MCPServer."""
+ monkeypatch.chdir(tmp_path)
+
+ init_file = tmp_path / "__init__.py"
+ init_file.write_text("""
+from hud.server import MCPServer
+mcp = MCPServer(name='test')
+""")
+
+ module_name, extra_path = auto_detect_module()
+
+ assert module_name == tmp_path.name
+ assert extra_path is None
+
+ def test_detect_module_from_init_with_fastmcp(self, tmp_path, monkeypatch):
+ """Test detection from __init__.py with FastMCP."""
+ monkeypatch.chdir(tmp_path)
+
+ init_file = tmp_path / "__init__.py"
+ init_file.write_text("""
+from fastmcp import FastMCP
+mcp = FastMCP(name='test')
+""")
+
+ module_name, extra_path = auto_detect_module()
+
+ assert module_name == tmp_path.name
+ assert extra_path is None
+
+ def test_detect_module_from_main_py(self, tmp_path, monkeypatch):
+ """Test detection from main.py with MCPServer."""
+ monkeypatch.chdir(tmp_path)
+
+ # Need both __init__.py and main.py
+ init_file = tmp_path / "__init__.py"
+ init_file.write_text("")
+
+ main_file = tmp_path / "main.py"
+ main_file.write_text("""
+from hud.server import MCPServer
+mcp = MCPServer(name='test')
+""")
+
+ module_name, extra_path = auto_detect_module()
+
+ assert module_name == f"{tmp_path.name}.main"
+ assert extra_path == tmp_path.parent
+
+ def test_no_detection_without_mcp(self, tmp_path, monkeypatch):
+ """Test no detection when mcp not defined."""
+ monkeypatch.chdir(tmp_path)
+
+ init_file = tmp_path / "__init__.py"
+ init_file.write_text("# Just a comment")
+
+ module_name, extra_path = auto_detect_module()
+
+ assert module_name is None
+ assert extra_path is None
+
+ def test_no_detection_empty_dir(self, tmp_path, monkeypatch):
+ """Test no detection in empty directory."""
+ monkeypatch.chdir(tmp_path)
+
+ module_name, extra_path = auto_detect_module()
+
+ assert module_name is None
+ assert extra_path is None
+
+
+class TestShowDevServerInfo:
+ """Test dev server info display."""
+
+ @mock.patch("hud.cli.dev.hud_console")
+ def test_show_dev_server_info_http(self, mock_console):
+ """Test showing server info for HTTP transport."""
+ from hud.cli.dev import show_dev_server_info
+
+ result = show_dev_server_info(
+ server_name="test-server",
+ port=8000,
+ transport="http",
+ inspector=False,
+ interactive=False,
+ )
+
+ # Returns cursor deeplink
+ assert result.startswith("cursor://")
+ assert "test-server" in result
+
+ # Console should have been called
+ assert mock_console.section_title.called
+ assert mock_console.info.called
+
+ @mock.patch("hud.cli.dev.hud_console")
+ def test_show_dev_server_info_stdio(self, mock_console):
+ """Test showing server info for stdio transport."""
+ from hud.cli.dev import show_dev_server_info
+
+ result = show_dev_server_info(
+ server_name="test-server",
+ port=8000,
+ transport="stdio",
+ inspector=False,
+ interactive=False,
+ )
+
+ # Returns cursor deeplink
+ assert result.startswith("cursor://")
+
+ @mock.patch("hud.cli.dev.hud_console")
+ def test_show_dev_server_info_with_telemetry(self, mock_console):
+ """Test showing server info with telemetry URLs."""
+ from hud.cli.dev import show_dev_server_info
+
+ result = show_dev_server_info(
+ server_name="browser-env",
+ port=8000,
+ transport="http",
+ inspector=False,
+ interactive=False,
+ telemetry={
+ "live_url": "https://hud.ai/trace/123",
+ "vnc_url": "http://localhost:5900",
+ },
+ )
+
+ assert result.startswith("cursor://")
diff --git a/hud/cli/tests/test_init.py b/hud/cli/tests/test_init.py
new file mode 100644
index 00000000..40f5889b
--- /dev/null
+++ b/hud/cli/tests/test_init.py
@@ -0,0 +1,124 @@
+"""Tests for CLI init module."""
+
+from __future__ import annotations
+
+from hud.cli.init import _replace_placeholders
+
+
+class TestReplacePlaceholders:
+ """Test placeholder replacement in template files."""
+
+ def test_replace_in_pyproject(self, tmp_path):
+ """Test replacing placeholders in pyproject.toml."""
+ # Create server directory structure
+ server_dir = tmp_path / "server"
+ server_dir.mkdir()
+
+ pyproject = server_dir / "pyproject.toml"
+ pyproject.write_text("""
+[project]
+name = "blank"
+description = "blank environment"
+""")
+
+ modified = _replace_placeholders(tmp_path, "my-cool-env")
+
+ # Normalize paths for cross-platform comparison
+ modified_normalized = [p.replace("\\", "/") for p in modified]
+ assert "server/pyproject.toml" in modified_normalized
+ content = pyproject.read_text()
+ assert "my_cool_env" in content
+ assert "blank" not in content
+
+ def test_replace_in_readme(self, tmp_path):
+ """Test replacing placeholders in README.md."""
+ readme = tmp_path / "README.md"
+ readme.write_text("# blank\n\nThis is the blank environment.")
+
+ modified = _replace_placeholders(tmp_path, "test-env")
+
+ assert "README.md" in modified
+ content = readme.read_text()
+ assert "test_env" in content
+ assert "blank" not in content
+
+ def test_replace_in_tasks_json(self, tmp_path):
+ """Test replacing placeholders in tasks.json."""
+ tasks = tmp_path / "tasks.json"
+ tasks.write_text('{"name": "blank", "tasks": []}')
+
+ modified = _replace_placeholders(tmp_path, "my-tasks")
+
+ assert "tasks.json" in modified
+ content = tasks.read_text()
+ assert "my_tasks" in content
+
+ def test_no_replace_in_non_placeholder_files(self, tmp_path):
+ """Test that non-placeholder files are not modified."""
+ other_file = tmp_path / "other.py"
+ other_file.write_text("# blank comment")
+
+ modified = _replace_placeholders(tmp_path, "test")
+
+ assert "other.py" not in modified
+ content = other_file.read_text()
+ assert "blank" in content # Should be unchanged
+
+ def test_skip_pycache_directories(self, tmp_path):
+ """Test that __pycache__ directories are skipped."""
+ pycache = tmp_path / "__pycache__"
+ pycache.mkdir()
+
+ cached_file = pycache / "module.pyc"
+ cached_file.write_text("blank")
+
+ modified = _replace_placeholders(tmp_path, "test")
+
+ # __pycache__ files should not be in modified list
+ assert not any("__pycache__" in f for f in modified)
+
+ def test_normalize_special_characters(self, tmp_path):
+ """Test that environment name is normalized for Python identifiers."""
+ server_dir = tmp_path / "server"
+ server_dir.mkdir()
+
+ pyproject = server_dir / "pyproject.toml"
+ pyproject.write_text('name = "blank"')
+
+ _replace_placeholders(tmp_path, "my cool-env.v2!")
+
+ content = pyproject.read_text()
+ # Special characters should be replaced with underscores
+ assert "my_cool_env_v2_" in content
+
+ def test_no_changes_when_no_placeholder(self, tmp_path):
+ """Test that files without placeholder are not modified."""
+ server_dir = tmp_path / "server"
+ server_dir.mkdir()
+
+ pyproject = server_dir / "pyproject.toml"
+ pyproject.write_text('name = "other-name"')
+
+ modified = _replace_placeholders(tmp_path, "test")
+
+ assert "server/pyproject.toml" not in modified
+
+ def test_nested_directory_structure(self, tmp_path):
+ """Test replacement in nested directory structure."""
+ # Create nested structure
+ server_dir = tmp_path / "server"
+ server_dir.mkdir()
+ (server_dir / "pyproject.toml").write_text('name = "blank"')
+
+ env_dir = tmp_path / "environment"
+ env_dir.mkdir()
+ (env_dir / "pyproject.toml").write_text('name = "blank"')
+ (env_dir / "README.md").write_text("# blank environment")
+
+ modified = _replace_placeholders(tmp_path, "nested-test")
+
+ # Normalize paths for cross-platform comparison
+ modified_normalized = [p.replace("\\", "/") for p in modified]
+ assert "server/pyproject.toml" in modified_normalized
+ assert "environment/pyproject.toml" in modified_normalized
+ assert "environment/README.md" in modified_normalized
diff --git a/hud/cli/tests/test_main_module.py b/hud/cli/tests/test_main_module.py
index 4034d94d..42647b8c 100644
--- a/hud/cli/tests/test_main_module.py
+++ b/hud/cli/tests/test_main_module.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+import os
import subprocess
import sys
@@ -20,11 +21,16 @@ def test_main_module_imports_correctly(self):
def test_main_module_executes(self):
"""Test that running the module as main executes correctly."""
# Use subprocess to run the module as __main__ and check it doesn't crash
- # We expect it to show help/error since we're not providing arguments
+ # Use --version flag for a quick, deterministic test that doesn't require user input
+ env = {**os.environ, "HUD_SKIP_VERSION_CHECK": "1"}
result = subprocess.run(
- [sys.executable, "-m", "hud.cli"], capture_output=True, text=True, timeout=10
+ [sys.executable, "-m", "hud.cli", "--version"],
+ capture_output=True,
+ text=True,
+ timeout=30,
+ env=env,
)
- # Should exit with an error code but not crash
- # (The actual main function will show help or error for missing args)
- assert result.returncode != 0 # CLI should exit with error for no args
+ # Should exit successfully with version info
+ assert result.returncode == 0
+ assert "version" in result.stdout.lower() or "hud" in result.stdout.lower()
diff --git a/hud/cli/utils/git.py b/hud/cli/utils/git.py
new file mode 100644
index 00000000..864e12f8
--- /dev/null
+++ b/hud/cli/utils/git.py
@@ -0,0 +1,136 @@
+"""Git utilities for extracting repository information."""
+
+from __future__ import annotations
+
+import logging
+import subprocess
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+def get_git_remote_url(cwd: Path | None = None) -> str | None:
+ """
+ Get the git remote origin URL for the current repository.
+
+ Args:
+ cwd: Working directory (defaults to current directory)
+
+ Returns:
+ Git remote URL if available, None otherwise
+ """
+ cwd = cwd or Path.cwd()
+
+ try:
+ # Check if we're in a git repository
+ subprocess.run(
+ ["git", "rev-parse", "--git-dir"], # noqa: S607
+ cwd=cwd,
+ capture_output=True,
+ check=True,
+ )
+
+ # Get the remote origin URL
+ result = subprocess.run(
+ ["git", "config", "--get", "remote.origin.url"], # noqa: S607
+ cwd=cwd,
+ capture_output=True,
+ text=True,
+ check=True,
+ )
+
+ url = result.stdout.strip()
+ if url:
+ return normalize_github_url(url)
+ return None
+
+ except subprocess.CalledProcessError:
+ # Not a git repository or no remote origin
+ return None
+ except Exception as e:
+ logger.debug("Error getting git remote URL: %s", e)
+ return None
+
+
+def normalize_github_url(url: str) -> str:
+ """
+ Normalize various git URL formats to standard HTTPS GitHub URL.
+
+ Examples:
+ git@github.com:user/repo.git -> https://github.com/user/repo
+ https://github.com/user/repo.git -> https://github.com/user/repo
+ git://github.com/user/repo.git -> https://github.com/user/repo
+
+ Args:
+ url: Git remote URL in any format
+
+ Returns:
+ Normalized HTTPS GitHub URL
+ """
+ # Remove trailing .git
+ if url.endswith(".git"):
+ url = url[:-4]
+
+ # Handle SSH format (git@github.com:user/repo)
+ if url.startswith("git@github.com:"):
+ url = url.replace("git@github.com:", "https://github.com/")
+
+ # Handle git:// protocol
+ elif url.startswith("git://"):
+ url = url.replace("git://", "https://")
+
+ # Ensure HTTPS
+ elif not url.startswith("https://") and "github.com:" in url:
+ parts = url.split("github.com:")
+ url = f"https://github.com/{parts[1]}"
+
+ return url
+
+
+def get_git_info(cwd: Path | None = None) -> dict[str, Any]:
+ """
+ Get comprehensive git repository information.
+
+ Args:
+ cwd: Working directory (defaults to current directory)
+
+ Returns:
+ Dictionary with git info including:
+ - remote_url: The remote origin URL
+ - branch: Current branch name
+ - commit: Current commit hash (short)
+ """
+ cwd = cwd or Path.cwd()
+ info: dict[str, Any] = {}
+
+ # Get remote URL
+ info["remote_url"] = get_git_remote_url(cwd)
+
+ try:
+ # Get current branch
+ result = subprocess.run(
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"], # noqa: S607
+ cwd=cwd,
+ capture_output=True,
+ text=True,
+ check=True,
+ )
+ info["branch"] = result.stdout.strip()
+
+ # Get current commit (short hash)
+ result = subprocess.run(
+ ["git", "rev-parse", "--short", "HEAD"], # noqa: S607
+ cwd=cwd,
+ capture_output=True,
+ text=True,
+ check=True,
+ )
+ info["commit"] = result.stdout.strip()
+
+ except subprocess.CalledProcessError:
+ pass
+ except Exception as e:
+ logger.debug("Error getting git info: %s", e)
+
+ return info
diff --git a/hud/cli/utils/tests/test_git.py b/hud/cli/utils/tests/test_git.py
new file mode 100644
index 00000000..fea9e153
--- /dev/null
+++ b/hud/cli/utils/tests/test_git.py
@@ -0,0 +1,142 @@
+"""Tests for git utilities."""
+
+from __future__ import annotations
+
+from unittest import mock
+
+from hud.cli.utils.git import get_git_info, get_git_remote_url, normalize_github_url
+
+
+class TestNormalizeGithubUrl:
+ """Test GitHub URL normalization."""
+
+ def test_normalize_ssh_url(self):
+ """Test normalizing SSH format URL."""
+ url = "git@github.com:user/repo.git"
+ result = normalize_github_url(url)
+ assert result == "https://github.com/user/repo"
+
+ def test_normalize_https_with_git_suffix(self):
+ """Test normalizing HTTPS URL with .git suffix."""
+ url = "https://github.com/user/repo.git"
+ result = normalize_github_url(url)
+ assert result == "https://github.com/user/repo"
+
+ def test_normalize_git_protocol(self):
+ """Test normalizing git:// protocol URL."""
+ url = "git://github.com/user/repo.git"
+ result = normalize_github_url(url)
+ assert result == "https://github.com/user/repo"
+
+ def test_normalize_already_clean(self):
+ """Test URL that's already normalized."""
+ url = "https://github.com/user/repo"
+ result = normalize_github_url(url)
+ assert result == "https://github.com/user/repo"
+
+ def test_normalize_with_github_com_colon(self):
+ """Test URL with github.com: format."""
+ url = "ssh://github.com:user/repo.git"
+ result = normalize_github_url(url)
+ assert result == "https://github.com/user/repo"
+
+
+class TestGetGitRemoteUrl:
+ """Test getting git remote URL."""
+
+ @mock.patch("subprocess.run")
+ def test_get_remote_url_success(self, mock_run):
+ """Test successfully getting remote URL."""
+ # First call checks if we're in a git repo
+ mock_run.side_effect = [
+ mock.Mock(returncode=0), # git rev-parse --git-dir
+ mock.Mock(returncode=0, stdout="git@github.com:user/repo.git\n"), # git config
+ ]
+
+ result = get_git_remote_url()
+ assert result == "https://github.com/user/repo"
+
+ @mock.patch("subprocess.run")
+ def test_get_remote_url_not_git_repo(self, mock_run):
+ """Test when not in a git repository."""
+ from subprocess import CalledProcessError
+
+ mock_run.side_effect = CalledProcessError(128, "git")
+
+ result = get_git_remote_url()
+ assert result is None
+
+ @mock.patch("subprocess.run")
+ def test_get_remote_url_no_remote(self, mock_run):
+ """Test when no remote origin exists."""
+ from subprocess import CalledProcessError
+
+ mock_run.side_effect = [
+ mock.Mock(returncode=0), # git rev-parse --git-dir
+ CalledProcessError(1, "git"), # git config fails
+ ]
+
+ result = get_git_remote_url()
+ assert result is None
+
+ @mock.patch("subprocess.run")
+ def test_get_remote_url_empty(self, mock_run):
+ """Test when remote URL is empty."""
+ mock_run.side_effect = [
+ mock.Mock(returncode=0),
+ mock.Mock(returncode=0, stdout=""),
+ ]
+
+ result = get_git_remote_url()
+ assert result is None
+
+
+class TestGetGitInfo:
+ """Test getting comprehensive git info."""
+
+ @mock.patch("hud.cli.utils.git.get_git_remote_url")
+ @mock.patch("subprocess.run")
+ def test_get_git_info_success(self, mock_run, mock_get_url):
+ """Test successfully getting all git info."""
+ mock_get_url.return_value = "https://github.com/user/repo"
+ mock_run.side_effect = [
+ mock.Mock(returncode=0, stdout="main\n"), # branch
+ mock.Mock(returncode=0, stdout="abc1234\n"), # commit
+ ]
+
+ result = get_git_info()
+
+ assert result["remote_url"] == "https://github.com/user/repo"
+ assert result["branch"] == "main"
+ assert result["commit"] == "abc1234"
+
+ @mock.patch("hud.cli.utils.git.get_git_remote_url")
+ @mock.patch("subprocess.run")
+ def test_get_git_info_no_remote(self, mock_run, mock_get_url):
+ """Test git info when no remote exists."""
+ mock_get_url.return_value = None
+ mock_run.side_effect = [
+ mock.Mock(returncode=0, stdout="feature-branch\n"),
+ mock.Mock(returncode=0, stdout="def5678\n"),
+ ]
+
+ result = get_git_info()
+
+ assert result["remote_url"] is None
+ assert result["branch"] == "feature-branch"
+ assert result["commit"] == "def5678"
+
+ @mock.patch("hud.cli.utils.git.get_git_remote_url")
+ @mock.patch("subprocess.run")
+ def test_get_git_info_subprocess_error(self, mock_run, mock_get_url):
+ """Test git info when subprocess fails."""
+ from subprocess import CalledProcessError
+
+ mock_get_url.return_value = "https://github.com/user/repo"
+ mock_run.side_effect = CalledProcessError(1, "git")
+
+ result = get_git_info()
+
+ assert result["remote_url"] == "https://github.com/user/repo"
+ assert "branch" not in result
+ assert "commit" not in result
diff --git a/hud/telemetry/async_context.py b/hud/telemetry/async_context.py
index 223f621c..90a00723 100644
--- a/hud/telemetry/async_context.py
+++ b/hud/telemetry/async_context.py
@@ -70,6 +70,7 @@ def __init__(
job_id: str | None = None,
task_id: str | None = None,
group_id: str | None = None,
+ trace_id: str | None = None,
) -> None:
self.name = name
self.root = root
@@ -77,7 +78,7 @@ def __init__(
self.job_id = job_id
self.task_id = task_id
self.group_id = group_id
- self.task_run_id = str(uuid.uuid4())
+ self.task_run_id = trace_id if trace_id else str(uuid.uuid4())
self.trace_obj = Trace(self.task_run_id, name, job_id, task_id, group_id)
self._otel_trace = None
@@ -260,6 +261,7 @@ def async_trace(
job_id: str | None = None,
task_id: str | None = None,
group_id: str | None = None,
+ trace_id: str | None = None,
) -> AsyncTrace:
"""Create an async trace context for telemetry tracking.
@@ -274,6 +276,7 @@ def async_trace(
job_id: Optional job ID to associate with this trace
task_id: Optional task ID for custom task identifiers
group_id: Optional group ID to associate with this trace
+ trace_id: Optional trace ID (auto-generated if not provided)
Returns:
AsyncTrace context manager
@@ -302,6 +305,7 @@ def async_trace(
job_id=job_id,
task_id=task_id,
group_id=group_id,
+ trace_id=trace_id,
)
diff --git a/hud/telemetry/trace.py b/hud/telemetry/trace.py
index 8442240c..2aa19080 100644
--- a/hud/telemetry/trace.py
+++ b/hud/telemetry/trace.py
@@ -96,10 +96,11 @@ def trace(
job_id: str | None = None,
task_id: str | None = None,
group_id: str | None = None,
+ trace_id: str | None = None,
) -> Generator[Trace, None, None]:
"""Start a HUD trace context for telemetry tracking.
- A unique task_run_id is automatically generated for each trace.
+ A unique task_run_id is automatically generated for each trace unless provided.
Args:
name: Descriptive name for this trace/task
@@ -108,6 +109,7 @@ def trace(
job_id: Optional job ID to associate with this trace
task_id: Optional task ID (for custom task identifiers)
group_id: Optional group ID to associate with this trace
+ trace_id: Optional trace ID (auto-generated if not provided)
Yields:
Trace: The trace object with logging capabilities
@@ -129,20 +131,24 @@ def trace(
# Ensure telemetry is configured
configure_telemetry()
- # Only generate task_run_id if using HUD backend
- # For custom OTLP backends, we don't need it
- from hud.settings import get_settings
-
- settings = get_settings()
-
- if settings.telemetry_enabled and settings.api_key:
- task_run_id = str(uuid.uuid4())
+ # Use provided trace_id or generate one
+ if trace_id:
+ task_run_id = trace_id
else:
- # Use a placeholder for custom backends
- logger.warning(
- "HUD API key is not set, using a placeholder for the task run ID. If this looks wrong, check your API key." # noqa: E501
- )
- task_run_id = str(uuid.uuid4())
+ # Only generate task_run_id if using HUD backend
+ # For custom OTLP backends, we don't need it
+ from hud.settings import get_settings
+
+ settings = get_settings()
+
+ if settings.telemetry_enabled and settings.api_key:
+ task_run_id = str(uuid.uuid4())
+ else:
+ # Use a placeholder for custom backends
+ logger.warning(
+ "HUD API key is not set, using a placeholder for the task run ID. If this looks wrong, check your API key." # noqa: E501
+ )
+ task_run_id = str(uuid.uuid4())
# Create trace object
trace_obj = Trace(task_run_id, name, job_id, task_id, group_id)
diff --git a/hud/tests/test_datasets_extended.py b/hud/tests/test_datasets_extended.py
index ff4f9a89..69afb59b 100644
--- a/hud/tests/test_datasets_extended.py
+++ b/hud/tests/test_datasets_extended.py
@@ -238,36 +238,23 @@ async def test_run_dataset_with_metadata(self):
@pytest.mark.asyncio
async def test_run_dataset_exception_handling(self):
"""Test exception handling during task execution."""
- # Track execution
- executed_tasks = []
-
- # Create mock agent instances with proper run behavior
- mock_agents = []
- for i in range(3):
- agent = AsyncMock()
- if i == 1: # Second task should fail
- agent.run.side_effect = RuntimeError("Task 2 failed")
- else:
- agent.run.return_value = {"result": f"success-{i + 1}"}
- mock_agents.append(agent)
-
- # Create a mock agent class that returns our prepared instances
- agent_creation_count = 0
+ # Track execution by task index
+ executed_task_indices: set[int] = set()
+ # Create a mock agent class where behavior depends on the task being run
def create_mock_agent(**kwargs):
- nonlocal agent_creation_count
- agent = mock_agents[agent_creation_count]
- agent_creation_count += 1
-
- # Track when run is called
- original_run = agent.run
+ agent = AsyncMock()
- async def tracked_run(*args, **kwargs):
- executed_tasks.append(agent_creation_count - 1)
- return await original_run(*args, **kwargs)
+ async def mock_run(task, **run_kwargs):
+ # Extract task index from prompt "Task {i}"
+ task_idx = int(task.prompt.split()[-1])
+ executed_task_indices.add(task_idx)
- agent.run = tracked_run
+ if task_idx == 1: # Second task (index 1) should fail
+ raise RuntimeError("Task 2 failed")
+ return {"result": f"success-{task_idx + 1}"}
+ agent.run = mock_run
return agent
# Mock the agent class itself - runner calls agent_class.create()
@@ -294,8 +281,8 @@ async def tracked_run(*args, **kwargs):
results = await run_dataset("error_run", tasks, mock_agent_class) # type: ignore
# All tasks should be attempted
- assert len(executed_tasks) == 3
- assert executed_tasks == [0, 1, 2]
+ assert len(executed_task_indices) == 3
+ assert executed_task_indices == {0, 1, 2}
# First and third should succeed
assert results[0] == {"result": "success-1"}
diff --git a/hud/utils/tests/test_version.py b/hud/utils/tests/test_version.py
index 71b968f8..df8f2b38 100644
--- a/hud/utils/tests/test_version.py
+++ b/hud/utils/tests/test_version.py
@@ -5,4 +5,4 @@ def test_import():
"""Test that the package can be imported."""
import hud
- assert hud.__version__ == "0.4.70"
+ assert hud.__version__ == "0.4.71"
diff --git a/hud/version.py b/hud/version.py
index 24cebad9..297ec9c8 100644
--- a/hud/version.py
+++ b/hud/version.py
@@ -4,4 +4,4 @@
from __future__ import annotations
-__version__ = "0.4.70"
+__version__ = "0.4.71"
diff --git a/pyproject.toml b/pyproject.toml
index 49700fbd..c1b9cce4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "hud-python"
-version = "0.4.70"
+version = "0.4.71"
description = "SDK for the HUD platform."
readme = "README.md"
requires-python = ">=3.11, <3.13"