Skip to content

Commit 6301f99

Browse files
committed
fix: review findings for context assembler (OPE-172)
CI fixes: - Remove unused import get_repo_or_404 from context.py (F401) - Remove unused import dependency_analyzer from context_assembler.py (F401) Correctness: - Use project StructuredLogger (from services.observability) not stdlib logging.getLogger() -- matches all other routes/services - Guard local_path: skip rule reading when repo path is empty or missing instead of falling back to Path('') which resolves to cwd - Add return type annotation -> dict[str, Any] on assemble_context Async safety: - Wrap blocking get_file_dependencies() in asyncio.to_thread via _load_deps_sync helper (matches project pattern in repos.py) - Wrap blocking Path.read_text() in asyncio.to_thread via _read_rules_file_sync helper Budget enforcement: - _build_package now checks remaining budget BEFORE appending each tier (files, deps, rules). Individual file/dep entries are added only while budget allows, preventing Tier 1+2 from blowing past the token limit before rules are considered. Test: - Add endpoint path assertion: verify api_post called with '/context/assemble' (not just payload check)
1 parent 6730648 commit 6301f99

3 files changed

Lines changed: 56 additions & 35 deletions

File tree

backend/routes/context.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,22 @@
44
Uses semantic search + dependency graph + project rules to build a
55
minimal, precise context package for AI coding assistants.
66
"""
7-
import logging
87
import time
8+
from typing import Any
99

1010
from fastapi import APIRouter, Depends, HTTPException
1111
from pydantic import BaseModel, Field
1212

13-
from dependencies import get_repo_or_404, verify_repo_access
13+
from dependencies import verify_repo_access
1414
from middleware.auth import AuthContext, require_auth
1515
from services.observability import (
1616
add_breadcrumb,
1717
capture_exception,
18+
logger,
1819
metrics,
1920
set_operation_context,
2021
)
2122

22-
logger = logging.getLogger(__name__)
23-
2423
router = APIRouter(tags=["context"])
2524

2625

@@ -34,7 +33,7 @@ class AssembleRequest(BaseModel):
3433
async def assemble_context(
3534
request: AssembleRequest,
3635
auth: AuthContext = Depends(require_auth),
37-
):
36+
) -> dict[str, Any]:
3837
"""Assemble task-specific context from semantic search + deps + rules.
3938
4039
Returns a markdown context package sized to fit within token_budget,

backend/services/context_assembler.py

Lines changed: 50 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
project rules, and returns an assembled context package within a
66
token budget. This is the core of OPE-172.
77
"""
8-
import logging
8+
import asyncio
99
import re
1010
from pathlib import Path
1111
from typing import Any, Dict, List, Optional, Tuple
1212

13-
logger = logging.getLogger(__name__)
13+
from services.observability import logger
1414

1515
# Rule files in priority order (first found wins, same as dna_extractor)
1616
RULES_FILES = [
@@ -59,8 +59,8 @@ def _split_rules_into_sections(content: str) -> List[Dict[str, str]]:
5959
return sections
6060

6161

62-
def _read_rules_file(repo_path: Path) -> Tuple[Optional[str], Optional[str]]:
63-
"""Find and read the first matching rules file in the repo."""
62+
def _read_rules_file_sync(repo_path: Path) -> Tuple[Optional[str], Optional[str]]:
63+
"""Find and read the first matching rules file in the repo (sync)."""
6464
for filename in RULES_FILES:
6565
rules_path = repo_path / filename
6666
if rules_path.exists() and rules_path.is_file():
@@ -69,10 +69,16 @@ def _read_rules_file(repo_path: Path) -> Tuple[Optional[str], Optional[str]]:
6969
if content.strip():
7070
return content, filename
7171
except OSError as exc:
72-
logger.warning("Could not read rules file %s: %s", rules_path, exc)
72+
logger.warning("Could not read rules file", path=str(rules_path), error=str(exc))
7373
return None, None
7474

7575

76+
def _load_deps_sync(repo_id: str) -> List[Dict]:
77+
"""Load file dependencies from Supabase (sync)."""
78+
from services.supabase_service import get_supabase_service
79+
return get_supabase_service().get_file_dependencies(repo_id)
80+
81+
7682
class ContextAssembler:
7783
"""Assembles per-task context from semantic search + deps + rules."""
7884

@@ -88,22 +94,26 @@ async def assemble(
8894
Returns dict with 'context' (markdown string), 'files_found',
8995
'tokens_used', and 'debug' metadata.
9096
"""
91-
from dependencies import indexer, dependency_analyzer, get_repo_or_404
92-
from services.supabase_service import get_supabase_service
97+
from dependencies import indexer, get_repo_or_404
9398

9499
repo = get_repo_or_404(repo_id, user_id)
95-
local_path = Path(repo.get("local_path", ""))
100+
local_path_str = repo.get("local_path", "")
96101

97102
# Step 1: Semantic search for the most relevant files
98103
search_results = await self._search(task, repo_id, indexer)
99104
found_files = self._unique_files(search_results)
100105

101-
# Step 2: Expand with 1-hop dependencies
102-
dep_files = self._expand_deps(found_files, repo_id, get_supabase_service())
106+
# Step 2: Expand with 1-hop dependencies (sync DB call off event loop)
107+
dep_files = await self._expand_deps(found_files, repo_id)
103108

104109
# Step 3: Match relevant rule sections
105110
all_files = list(dict.fromkeys(found_files + dep_files))
106-
rules_content, rules_source = _read_rules_file(local_path)
111+
rules_content: Optional[str] = None
112+
rules_source: Optional[str] = None
113+
if local_path_str and Path(local_path_str).is_dir():
114+
rules_content, rules_source = await asyncio.to_thread(
115+
_read_rules_file_sync, Path(local_path_str),
116+
)
107117
matched_rules = self._match_rules(rules_content, all_files) if rules_content else []
108118

109119
# Step 4: Assemble within token budget
@@ -137,7 +147,7 @@ async def _search(
137147
)
138148
return results
139149
except Exception as exc:
140-
logger.error("Context search failed: %s", exc)
150+
logger.error("Context search failed", error=str(exc))
141151
return []
142152

143153
@staticmethod
@@ -153,14 +163,12 @@ def _unique_files(results: List[Dict]) -> List[str]:
153163
return files
154164

155165
@staticmethod
156-
def _expand_deps(
157-
seed_files: List[str], repo_id: str, db: Any,
158-
) -> List[str]:
166+
async def _expand_deps(seed_files: List[str], repo_id: str) -> List[str]:
159167
"""Add 1-hop imports/dependents for seed files."""
160168
try:
161-
all_deps = db.get_file_dependencies(repo_id)
169+
all_deps = await asyncio.to_thread(_load_deps_sync, repo_id)
162170
except Exception as exc:
163-
logger.warning("Could not load deps for expansion: %s", exc)
171+
logger.warning("Could not load deps for expansion", error=str(exc))
164172
return []
165173

166174
# Build adjacency maps
@@ -225,30 +233,41 @@ def _build_package(
225233
) -> str:
226234
"""Assemble markdown context package within token budget."""
227235
lines: List[str] = [f'## Context for: "{task}"', ""]
236+
remaining = budget - _estimate_tokens("\n".join(lines))
228237

229238
# Tier 1: Relevant files (highest priority)
230-
if found_files:
231-
lines.append("### Relevant files")
239+
if found_files and remaining > 50:
240+
tier_lines = ["### Relevant files"]
232241
for r in search_results:
233242
fp = r.get("file_path", "")
234243
name = r.get("qualified_name", r.get("name", ""))
235244
score = r.get("score", 0)
236245
sig = r.get("signature", "")
237246
pct = f"{score * 100:.0f}%" if isinstance(score, float) else str(score)
238247
desc = sig if sig else name
239-
lines.append(f"- `{fp}` -- {desc} (relevance: {pct})")
240-
lines.append("")
248+
entry = f"- `{fp}` -- {desc} (relevance: {pct})"
249+
entry_tokens = _estimate_tokens(entry)
250+
if entry_tokens <= remaining:
251+
tier_lines.append(entry)
252+
remaining -= entry_tokens
253+
else:
254+
break
255+
tier_lines.append("")
256+
lines.extend(tier_lines)
241257

242258
# Tier 2: Dependency files
243-
if dep_files:
244-
lines.append("### Depends on")
259+
if dep_files and remaining > 50:
260+
tier_lines = ["### Depends on"]
245261
for fp in dep_files[:10]:
246-
lines.append(f"- `{fp}`")
247-
lines.append("")
248-
249-
# Check budget before adding rules
250-
current = _estimate_tokens("\n".join(lines))
251-
remaining = budget - current
262+
entry = f"- `{fp}`"
263+
entry_tokens = _estimate_tokens(entry)
264+
if entry_tokens <= remaining:
265+
tier_lines.append(entry)
266+
remaining -= entry_tokens
267+
else:
268+
break
269+
tier_lines.append("")
270+
lines.extend(tier_lines)
252271

253272
# Tier 3: Matched rules
254273
if matched_rules and remaining > 50:
@@ -262,7 +281,8 @@ def _build_package(
262281
else:
263282
# Truncate the last section to fit
264283
chars_left = remaining * 4
265-
lines.append(section["body"][:chars_left] + "...")
284+
if chars_left > 20:
285+
lines.append(section["body"][:chars_left] + "...")
266286
break
267287
lines.append("")
268288

mcp-server/tests/test_handlers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ async def test_context_for_task_dispatches(self, mock_post):
7777
})
7878
assert len(result) == 1
7979
assert "Context for" in result[0].text
80+
call_path = mock_post.call_args[0][0]
81+
assert call_path == "/context/assemble"
8082
payload = mock_post.call_args[1]["json"]
8183
assert payload["task"] == "add auth to settings"
8284
assert payload["token_budget"] == 1500

0 commit comments

Comments
 (0)