-
Notifications
You must be signed in to change notification settings - Fork 66
Hud dev additions #216
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hud dev additions #216
Changes from all commits
4fcc47f
b96a93b
ff76c80
6492ae5
16bbd9f
4e2ee5f
253052b
7870ec5
7b7182f
a3f5f6e
6ff24eb
e6110c5
469bada
d3ffba9
58f9a77
45f2df6
b62eb3c
937458c
ab74664
78e22a8
31ac1f5
146c394
8e84dab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -291,6 +291,121 @@ def extract_env_vars_from_dockerfile(dockerfile_path: Path) -> tuple[list[str], | |
| return required, optional | ||
|
|
||
|
|
||
| def parse_base_image(dockerfile_path: Path) -> str | None: | ||
| """Extract the base image from the first FROM directive in Dockerfile. | ||
|
|
||
| For multi-stage builds, returns the image from the first FROM. Strips any | ||
| trailing AS <stage> segment. | ||
| """ | ||
| try: | ||
| if not dockerfile_path.exists(): | ||
| return None | ||
| for raw_line in dockerfile_path.read_text().splitlines(): | ||
| line = raw_line.strip() | ||
| if not line or line.startswith("#"): | ||
| continue | ||
| if line.upper().startswith("FROM "): | ||
| rest = line[5:].strip() | ||
| # Remove stage alias if present | ||
| lower = rest.lower() | ||
| if " as " in lower: | ||
| # Split using the original case string at the index of lower-case match | ||
| idx = lower.index(" as ") | ||
| rest = rest[:idx] | ||
| return rest.strip() | ||
| except Exception: | ||
| return None | ||
| return None | ||
|
|
||
|
|
||
| def collect_runtime_metadata(image: str, *, verbose: bool = False) -> dict[str, str | None]: | ||
| """Probe container to capture Python/CUDA/cuDNN/PyTorch versions. | ||
|
|
||
| Runs a tiny Python snippet inside the built image using docker run. | ||
| """ | ||
| hud_console = HUDConsole() | ||
|
|
||
| runtime_script = ( | ||
| "import json, platform\n" | ||
| "info = {'python': platform.python_version()}\n" | ||
| "try:\n" | ||
| " import torch\n" | ||
| " info['pytorch'] = getattr(torch, '__version__', None)\n" | ||
| " cuda_version = None\n" | ||
| " try:\n" | ||
| " cuda_version = getattr(getattr(torch, 'version', None), 'cuda', None)\n" | ||
| " except Exception:\n" | ||
| " cuda_version = None\n" | ||
| " if cuda_version:\n" | ||
| " info['cuda'] = cuda_version\n" | ||
| " try:\n" | ||
| " cudnn_version = torch.backends.cudnn.version()\n" | ||
| " except Exception:\n" | ||
| " cudnn_version = None\n" | ||
| " if cudnn_version:\n" | ||
| " info['cudnn'] = str(cudnn_version)\n" | ||
| "except Exception:\n" | ||
| " pass\n" | ||
| "info.setdefault('pytorch', None)\n" | ||
| "info.setdefault('cuda', None)\n" | ||
| "info.setdefault('cudnn', None)\n" | ||
| "print(json.dumps(info))\n" | ||
| ) | ||
|
|
||
| for binary in ("python", "python3"): | ||
| cmd = [ | ||
| "docker", | ||
| "run", | ||
| "--rm", | ||
| image, | ||
| binary, | ||
| "-c", | ||
| runtime_script, | ||
| ] | ||
| try: | ||
| result = subprocess.run( # noqa: S603 | ||
| cmd, capture_output=True, text=True, check=False | ||
| ) | ||
| except FileNotFoundError: | ||
| return {} | ||
|
|
||
| if result.returncode != 0: | ||
| if verbose: | ||
| hud_console.debug( | ||
| f"Runtime probe failed with {binary}: {result.stderr.strip() or 'no stderr'}" | ||
| ) | ||
| continue | ||
|
|
||
| output = (result.stdout or "").strip() | ||
| if not output: | ||
| return {} | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Early return prevents fallback to python3 binaryThe |
||
|
|
||
| try: | ||
| data = json.loads(output.splitlines()[-1]) | ||
| except json.JSONDecodeError: | ||
| if verbose: | ||
| hud_console.debug( | ||
| "Runtime probe returned non-JSON output; skipping metadata capture" | ||
| ) | ||
| return {} | ||
|
|
||
| if not isinstance(data, dict): | ||
| if verbose: | ||
| hud_console.debug( | ||
| "Runtime probe returned JSON that is not an object; skipping metadata capture" | ||
| ) | ||
| return {} | ||
|
|
||
| return { | ||
| "python": data.get("python"), | ||
| "cuda": data.get("cuda"), | ||
| "cudnn": data.get("cudnn"), | ||
| "pytorch": data.get("pytorch"), | ||
| } | ||
|
|
||
| return {} | ||
|
|
||
|
|
||
| async def analyze_mcp_environment( | ||
| image: str, verbose: bool = False, env_vars: dict[str, str] | None = None | ||
| ) -> dict[str, Any]: | ||
|
|
@@ -325,17 +440,60 @@ async def analyze_mcp_environment( | |
| initialized = True | ||
| initialize_ms = int((time.time() - start_time) * 1000) | ||
|
|
||
| # Delegate to standard analysis helper for consistency | ||
| # Delegate to standard analysis helper | ||
| full_analysis = await client.analyze_environment() | ||
|
|
||
| # Normalize to build's expected fields | ||
| # Normalize and enrich with internalTools if a hub map is present | ||
| tools_list = full_analysis.get("tools", []) | ||
| return { | ||
| hub_map = full_analysis.get("hub_tools", {}) or full_analysis.get("hubTools", {}) | ||
cursor[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| normalized_tools: list[dict[str, Any]] = [] | ||
| internal_total = 0 | ||
| for t in tools_list: | ||
| # Extract core fields (support object or dict forms) | ||
| if hasattr(t, "name"): | ||
| name = getattr(t, "name", None) | ||
| description = getattr(t, "description", None) | ||
| input_schema = getattr(t, "inputSchema", None) | ||
| existing_internal = getattr(t, "internalTools", None) | ||
| else: | ||
| name = t.get("name") | ||
| description = t.get("description") | ||
| # accept either inputSchema or input_schema | ||
| input_schema = t.get("inputSchema") or t.get("input_schema") | ||
| # accept either internalTools or internal_tools | ||
| existing_internal = t.get("internalTools") or t.get("internal_tools") | ||
|
|
||
| tool_entry: dict[str, Any] = {"name": name} | ||
| if description: | ||
| tool_entry["description"] = description | ||
| if input_schema: | ||
| tool_entry["inputSchema"] = input_schema | ||
|
|
||
| # Merge internal tools: preserve any existing declaration and add hub_map[name] | ||
| merged_internal: list[str] = [] | ||
| if isinstance(existing_internal, list): | ||
| merged_internal.extend([str(x) for x in existing_internal]) | ||
| if isinstance(hub_map, dict) and name in hub_map and isinstance(hub_map[name], list): | ||
| merged_internal.extend([str(x) for x in hub_map[name]]) | ||
| if merged_internal: | ||
| # Deduplicate while preserving order | ||
| merged_internal = list(dict.fromkeys(merged_internal)) | ||
| tool_entry["internalTools"] = merged_internal | ||
| internal_total += len(merged_internal) | ||
|
|
||
| normalized_tools.append(tool_entry) | ||
|
|
||
| result = { | ||
| "initializeMs": initialize_ms, | ||
| "toolCount": len(tools_list), | ||
| "tools": tools_list, | ||
| "internalToolCount": internal_total, | ||
| "tools": normalized_tools, | ||
cursor[bot] marked this conversation as resolved.
Show resolved
Hide resolved
cursor[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| "success": True, | ||
| } | ||
| if hub_map: | ||
| result["hub_tools"] = hub_map | ||
| return result | ||
| except TimeoutError: | ||
| from hud.shared.exceptions import HudException | ||
|
|
||
|
|
@@ -562,7 +720,9 @@ def build_environment( | |
| finally: | ||
| loop.close() | ||
|
|
||
| hud_console.success(f"Analyzed environment: {analysis['toolCount']} tools found") | ||
| # Show analysis results including hub tools | ||
| tool_msg = f"Analyzed environment: {analysis['toolCount']} tools found" | ||
| hud_console.success(tool_msg) | ||
|
|
||
| # Extract environment variables from Dockerfile | ||
| dockerfile_path = env_dir / "Dockerfile" | ||
|
|
@@ -604,9 +764,14 @@ def build_environment( | |
| if image_tag: | ||
| base_name = image_tag.split(":")[0] if ":" in image_tag else image_tag | ||
|
|
||
| # Collect runtime metadata and compute base image/platform | ||
| runtime_info = collect_runtime_metadata(temp_tag, verbose=verbose) | ||
| base_image = parse_base_image(dockerfile_path) | ||
| effective_platform = platform if platform is not None else "linux/amd64" | ||
|
|
||
| # Create lock file content with images subsection at top | ||
| lock_content = { | ||
| "version": "1.1", # Lock file format version | ||
| "version": "1.2", # Lock file format version | ||
| "images": { | ||
| "local": f"{base_name}:{new_version}", # Local tag with version | ||
| "full": None, # Will be set with digest after build | ||
|
|
@@ -619,13 +784,20 @@ def build_environment( | |
| "version": new_version, | ||
| # Fast source fingerprint for change detection | ||
| "sourceHash": compute_source_hash(env_dir), | ||
| "baseImage": base_image, | ||
| "platform": effective_platform, | ||
| }, | ||
| "environment": { | ||
| "initializeMs": analysis["initializeMs"], | ||
| "toolCount": analysis["toolCount"], | ||
| }, | ||
| } | ||
|
|
||
| if runtime_info: | ||
| lock_content["environment"]["runtime"] = runtime_info | ||
| internal_count = int(analysis.get("internalToolCount", 0) or 0) | ||
| lock_content["environment"]["internalToolCount"] = internal_count | ||
|
|
||
| # Add environment variables section if any exist | ||
| # Include env vars from .env file as well | ||
| env_vars_from_file = set(env_from_file.keys()) if env_from_file else set() | ||
|
|
@@ -662,14 +834,23 @@ def build_environment( | |
|
|
||
| # Add tools with full schemas for RL config generation | ||
| if analysis["tools"]: | ||
| lock_content["tools"] = [ | ||
| { | ||
| tools_serialized: list[dict[str, Any]] = [] | ||
| for tool in analysis["tools"]: | ||
| entry: dict[str, Any] = { | ||
| "name": tool["name"], | ||
| # Preserve legacy shape: always include description/inputSchema | ||
| "description": tool.get("description", ""), | ||
| "inputSchema": tool.get("inputSchema", {}), | ||
| } | ||
| for tool in analysis["tools"] | ||
| ] | ||
| if tool.get("internalTools"): | ||
| entry["internalTools"] = tool.get("internalTools") | ||
| tools_serialized.append(entry) | ||
| lock_content["tools"] = tools_serialized | ||
|
|
||
| # Add hub tools if present (analyze_environment returns hub_tools with snake_case) | ||
| hub_tools = analysis.get("hub_tools") or analysis.get("hubTools") | ||
| if hub_tools: | ||
| lock_content["hubTools"] = hub_tools | ||
|
|
||
| # Write lock file | ||
| lock_path = env_dir / "hud.lock.yaml" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bug: parse_base_image doesn't handle FROM --platform flag
The
parse_base_imagefunction correctly strips theAS <stage>suffix from FROM directives but doesn't handle the--platformflag. When a Dockerfile usesFROM --platform=linux/amd64 python:3.11, the function returns--platform=linux/amd64 python:3.11instead of justpython:3.11. This results in incorrectbaseImagemetadata being written to the lock file. The--platformflag is commonly used in multi-architecture Docker builds.