From 67306489c3cc6b9cd046526722ee38e275db1cb4 Mon Sep 17 00:00:00 2001
From: Devanshu Rajesh Chicholikar <chicholikar.d@northeastern.edu>
Date: Tue, 10 Mar 2026 14:37:11 -0400
Subject: [PATCH 1/3] feat: context assembly service -- per-task context
 packaging (OPE-172)

New MCP tool: get_context_for_task
New endpoint: POST /api/v1/context/assemble

Assembly pipeline:
1. Semantic search (existing search_v2) finds top 5 relevant files
2. Dependency expansion adds 1-hop imports/dependents from cached graph
3. Rule matching splits CLAUDE.md/AGENTS.md by ## headers, keeps
   sections mentioning discovered files + always-relevant sections
4. Token budget enforcer fills by priority tier (search > deps > rules)

Files:
- backend/services/context_assembler.py (new, ~200 lines)
- backend/routes/context.py (new, ~80 lines)
- backend/dependencies.py (add context_assembler singleton)
- backend/main.py (register context_router)
- mcp-server/handlers.py (add get_context_for_task handler)
- mcp-server/tools.py (add tool schema)
- mcp-server/tests/ (updated expected tools + handler test)

MCP tests: 46 passed. Flake8 clean.
---
 backend/dependencies.py               |   2 +
 backend/main.py                       |   2 +
 backend/routes/context.py             |  82 ++++++++
 backend/services/context_assembler.py | 269 ++++++++++++++++++++++++++
 mcp-server/handlers.py                |  15 ++
 mcp-server/tests/test_handlers.py     |  19 ++
 mcp-server/tests/test_tools.py        |   1 +
 mcp-server/tools.py                   |  42 ++++
 8 files changed, 432 insertions(+)
 create mode 100644 backend/routes/context.py
 create mode 100644 backend/services/context_assembler.py

diff --git a/backend/dependencies.py b/backend/dependencies.py
index 62688b2..3d62bdb 100644
--- a/backend/dependencies.py
+++ b/backend/dependencies.py
@@ -11,6 +11,7 @@
 from services.dependency_analyzer import DependencyAnalyzer
 from services.style_analyzer import StyleAnalyzer
 from services.dna_extractor import DNAExtractor
+from services.context_assembler import ContextAssembler
 from services.rate_limiter import RateLimiter, APIKeyManager
 from services.supabase_service import get_supabase_service
 from services.input_validator import CostController
@@ -25,6 +26,7 @@
 dependency_analyzer = DependencyAnalyzer()
 style_analyzer = StyleAnalyzer()
 dna_extractor = DNAExtractor()
+context_assembler = ContextAssembler()
 
 # Rate limiting and API key management
 rate_limiter = RateLimiter(redis_client=cache.redis if cache.redis else None)
diff --git a/backend/main.py b/backend/main.py
index 09245ab..04ccecf 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -35,6 +35,7 @@
 from routes.github import router as github_router
 from routes.feedback import router as feedback_router
 from routes.admin import router as admin_router
+from routes.context import router as context_router
 from routes.ws_playground import websocket_playground_index
 from routes.ws_repos import websocket_repo_indexing
 
@@ -106,6 +107,7 @@ async def dispatch(self, request: Request, call_next):
 app.include_router(github_router, prefix=API_PREFIX)
 app.include_router(feedback_router, prefix=API_PREFIX)
 app.include_router(admin_router, prefix=API_PREFIX)
+app.include_router(context_router, prefix=API_PREFIX)
 
 # WebSocket endpoints (versioned)
 app.add_api_websocket_route(f"{API_PREFIX}/ws/index/{{repo_id}}", websocket_index)
diff --git a/backend/routes/context.py b/backend/routes/context.py
new file mode 100644
index 0000000..853476c
--- /dev/null
+++ b/backend/routes/context.py
@@ -0,0 +1,82 @@
+"""Context assembly endpoint.
+
+Provides per-task context packaging via POST /api/v1/context/assemble.
+Uses semantic search + dependency graph + project rules to build a
+minimal, precise context package for AI coding assistants.
+"""
+import logging
+import time
+
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel, Field
+
+from dependencies import get_repo_or_404, verify_repo_access
+from middleware.auth import AuthContext, require_auth
+from services.observability import (
+    add_breadcrumb,
+    capture_exception,
+    metrics,
+    set_operation_context,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["context"])
+
+
+class AssembleRequest(BaseModel):
+    task: str = Field(..., min_length=3, max_length=1000)
+    repo_id: str
+    token_budget: int = Field(default=1500, ge=100, le=10000)
+
+
+@router.post("/context/assemble")
+async def assemble_context(
+    request: AssembleRequest,
+    auth: AuthContext = Depends(require_auth),
+):
+    """Assemble task-specific context from semantic search + deps + rules.
+
+    Returns a markdown context package sized to fit within token_budget,
+    containing only the files, dependencies, and project rules relevant
+    to the given task description.
+    """
+    set_operation_context(
+        "context_assemble",
+        user_id=auth.user_id,
+        repo_id=request.repo_id,
+    )
+    add_breadcrumb("Context assembly requested", category="context", repo_id=request.repo_id)
+
+    verify_repo_access(request.repo_id, auth.user_id)
+
+    from dependencies import context_assembler
+
+    start = time.time()
+    try:
+        result = await context_assembler.assemble(
+            task=request.task,
+            repo_id=request.repo_id,
+            user_id=auth.user_id,
+            token_budget=request.token_budget,
+        )
+
+        elapsed = time.time() - start
+        logger.info(
+            "Context assembled",
+            repo_id=request.repo_id,
+            files=result["files_found"],
+            tokens=result["tokens_used"],
+            budget=request.token_budget,
+            duration_ms=round(elapsed * 1000),
+        )
+        metrics.timing("context_assemble_ms", elapsed * 1000)
+
+        return result
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        capture_exception(exc, operation="context_assemble", repo_id=request.repo_id)
+        logger.error("Context assembly failed: %s", exc)
+        raise HTTPException(status_code=500, detail="Context assembly failed")
diff --git a/backend/services/context_assembler.py b/backend/services/context_assembler.py
new file mode 100644
index 0000000..8265464
--- /dev/null
+++ b/backend/services/context_assembler.py
@@ -0,0 +1,269 @@
+"""Context assembly service for per-task context packaging.
+
+Takes a task description + repo, finds the most relevant files via
+semantic search, expands with 1-hop dependencies, matches applicable
+project rules, and returns an assembled context package within a
+token budget. This is the core of OPE-172.
+"""
+import logging
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+# Rule files in priority order (first found wins, same as dna_extractor)
+RULES_FILES = [
+    "CLAUDE.md", "AGENTS.md", ".cursorrules",
+    ".codeintel/rules.md", "CONVENTIONS.md",
+    ".github/copilot-instructions.md", "CODING_GUIDELINES.md",
+]
+
+# Sections that apply to every task regardless of file matches
+ALWAYS_RELEVANT_PATTERNS = re.compile(
+    r"(git|commit|workflow|what not to do|never|critical|do not|testing|review)",
+    re.IGNORECASE,
+)
+
+
+def _estimate_tokens(text: str) -> int:
+    """Rough token estimate: 1 token per 4 chars."""
+    return len(text) // 4
+
+
+def _split_rules_into_sections(content: str) -> List[Dict[str, str]]:
+    """Split markdown content by ## headers into discrete sections."""
+    sections: List[Dict[str, str]] = []
+    current_header = ""
+    current_body: List[str] = []
+
+    for line in content.splitlines():
+        if line.startswith("## "):
+            if current_header or current_body:
+                sections.append({
+                    "header": current_header,
+                    "body": "\n".join(current_body).strip(),
+                })
+            current_header = line
+            current_body = []
+        else:
+            current_body.append(line)
+
+    # Capture the last section
+    if current_header or current_body:
+        sections.append({
+            "header": current_header,
+            "body": "\n".join(current_body).strip(),
+        })
+
+    return sections
+
+
+def _read_rules_file(repo_path: Path) -> Tuple[Optional[str], Optional[str]]:
+    """Find and read the first matching rules file in the repo."""
+    for filename in RULES_FILES:
+        rules_path = repo_path / filename
+        if rules_path.exists() and rules_path.is_file():
+            try:
+                content = rules_path.read_text(encoding="utf-8", errors="replace")
+                if content.strip():
+                    return content, filename
+            except OSError as exc:
+                logger.warning("Could not read rules file %s: %s", rules_path, exc)
+    return None, None
+
+
+class ContextAssembler:
+    """Assembles per-task context from semantic search + deps + rules."""
+
+    async def assemble(
+        self,
+        task: str,
+        repo_id: str,
+        user_id: str,
+        token_budget: int = 1500,
+    ) -> Dict[str, Any]:
+        """Build a context package for a specific coding task.
+
+        Returns dict with 'context' (markdown string), 'files_found',
+        'tokens_used', and 'debug' metadata.
+        """
+        from dependencies import indexer, dependency_analyzer, get_repo_or_404
+        from services.supabase_service import get_supabase_service
+
+        repo = get_repo_or_404(repo_id, user_id)
+        local_path = Path(repo.get("local_path", ""))
+
+        # Step 1: Semantic search for the most relevant files
+        search_results = await self._search(task, repo_id, indexer)
+        found_files = self._unique_files(search_results)
+
+        # Step 2: Expand with 1-hop dependencies
+        dep_files = self._expand_deps(found_files, repo_id, get_supabase_service())
+
+        # Step 3: Match relevant rule sections
+        all_files = list(dict.fromkeys(found_files + dep_files))
+        rules_content, rules_source = _read_rules_file(local_path)
+        matched_rules = self._match_rules(rules_content, all_files) if rules_content else []
+
+        # Step 4: Assemble within token budget
+        context_md = self._build_package(
+            task, search_results, found_files, dep_files,
+            matched_rules, token_budget,
+        )
+
+        return {
+            "context": context_md,
+            "files_found": len(all_files),
+            "tokens_used": _estimate_tokens(context_md),
+            "token_budget": token_budget,
+            "rules_source": rules_source,
+            "search_hits": len(search_results),
+            "dep_files_added": len(dep_files),
+            "rule_sections_matched": len(matched_rules),
+        }
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    async def _search(
+        self, task: str, repo_id: str, indexer: Any, top_k: int = 5,
+    ) -> List[Dict]:
+        """Run semantic search and return top results."""
+        try:
+            results = await indexer.search_v2(
+                query=task, repo_id=repo_id, top_k=top_k, use_reranking=True,
+            )
+            return results
+        except Exception as exc:
+            logger.error("Context search failed: %s", exc)
+            return []
+
+    @staticmethod
+    def _unique_files(results: List[Dict]) -> List[str]:
+        """Extract unique file paths from search results, preserving order."""
+        seen: set[str] = set()
+        files: List[str] = []
+        for r in results:
+            fp = r.get("file_path", "")
+            if fp and fp not in seen:
+                seen.add(fp)
+                files.append(fp)
+        return files
+
+    @staticmethod
+    def _expand_deps(
+        seed_files: List[str], repo_id: str, db: Any,
+    ) -> List[str]:
+        """Add 1-hop imports/dependents for seed files."""
+        try:
+            all_deps = db.get_file_dependencies(repo_id)
+        except Exception as exc:
+            logger.warning("Could not load deps for expansion: %s", exc)
+            return []
+
+        # Build adjacency maps
+        imports_map: Dict[str, List[str]] = {}
+        dependents_map: Dict[str, List[str]] = {}
+        for row in all_deps:
+            fp = row.get("file_path", "")
+            deps = row.get("depends_on", [])
+            imports_map[fp] = deps
+            for dep in deps:
+                dependents_map.setdefault(dep, []).append(fp)
+
+        seed_set = set(seed_files)
+        expanded: List[str] = []
+        for fp in seed_files:
+            for imp in imports_map.get(fp, []):
+                if imp not in seed_set and imp not in expanded:
+                    expanded.append(imp)
+            for dep in dependents_map.get(fp, []):
+                if dep not in seed_set and dep not in expanded:
+                    expanded.append(dep)
+
+        return expanded
+
+    @staticmethod
+    def _match_rules(
+        rules_content: str, files: List[str],
+    ) -> List[Dict[str, str]]:
+        """Return rule sections relevant to the discovered files."""
+        sections = _split_rules_into_sections(rules_content)
+        stems = {Path(f).stem for f in files}
+        names = {Path(f).name for f in files}
+
+        matched: List[Dict[str, str]] = []
+        for section in sections:
+            header = section["header"]
+            body = section["body"]
+            combined = f"{header}\n{body}"
+
+            # Always-relevant sections (git rules, "what not to do", etc.)
+            if ALWAYS_RELEVANT_PATTERNS.search(header):
+                matched.append(section)
+                continue
+
+            # Sections mentioning any discovered file
+            if any(name in combined for name in names):
+                matched.append(section)
+                continue
+            if any(stem in combined for stem in stems if len(stem) > 2):
+                matched.append(section)
+
+        return matched
+
+    @staticmethod
+    def _build_package(
+        task: str,
+        search_results: List[Dict],
+        found_files: List[str],
+        dep_files: List[str],
+        matched_rules: List[Dict[str, str]],
+        budget: int,
+    ) -> str:
+        """Assemble markdown context package within token budget."""
+        lines: List[str] = [f'## Context for: "{task}"', ""]
+
+        # Tier 1: Relevant files (highest priority)
+        if found_files:
+            lines.append("### Relevant files")
+            for r in search_results:
+                fp = r.get("file_path", "")
+                name = r.get("qualified_name", r.get("name", ""))
+                score = r.get("score", 0)
+                sig = r.get("signature", "")
+                pct = f"{score * 100:.0f}%" if isinstance(score, float) else str(score)
+                desc = sig if sig else name
+                lines.append(f"- `{fp}` -- {desc} (relevance: {pct})")
+            lines.append("")
+
+        # Tier 2: Dependency files
+        if dep_files:
+            lines.append("### Depends on")
+            for fp in dep_files[:10]:
+                lines.append(f"- `{fp}`")
+            lines.append("")
+
+        # Check budget before adding rules
+        current = _estimate_tokens("\n".join(lines))
+        remaining = budget - current
+
+        # Tier 3: Matched rules
+        if matched_rules and remaining > 50:
+            lines.append("### Rules that apply")
+            for section in matched_rules:
+                section_text = section["header"] + "\n" + section["body"]
+                section_tokens = _estimate_tokens(section_text)
+                if section_tokens <= remaining:
+                    lines.append(section["body"])
+                    remaining -= section_tokens
+                else:
+                    # Truncate the last section to fit
+                    chars_left = remaining * 4
+                    lines.append(section["body"][:chars_left] + "...")
+                    break
+            lines.append("")
+
+        return "\n".join(lines)
diff --git a/mcp-server/handlers.py b/mcp-server/handlers.py
index da4142b..595263d 100644
--- a/mcp-server/handlers.py
+++ b/mcp-server/handlers.py
@@ -165,6 +165,20 @@ async def _handle_index_repository(args: dict[str, Any]) -> str:
     return "\n".join(lines)
 
 
+async def _handle_get_context_for_task(args: dict[str, Any]) -> str:
+    payload = {
+        "task": args["task_description"],
+        "repo_id": args["repo_id"],
+        "token_budget": args.get("token_budget", 1500),
+    }
+    result = await api_post("/context/assemble", json=payload)
+    context = result.get("context", "No context assembled.")
+    tokens = result.get("tokens_used", 0)
+    budget = result.get("token_budget", 0)
+    files = result.get("files_found", 0)
+    return f"{context}\n\n---\n_{files} files, {tokens}/{budget} tokens_"
+
+
 async def _handle_delete_repository(args: dict[str, Any]) -> str:
     repo_id = args["repo_id"]
     result = await api_delete(f"/repos/{repo_id}")
@@ -185,6 +199,7 @@ async def _handle_delete_repository(args: dict[str, Any]) -> str:
     "get_repo_directories": _handle_get_repo_directories,
     "index_repository": _handle_index_repository,
     "delete_repository": _handle_delete_repository,
+    "get_context_for_task": _handle_get_context_for_task,
 }
 
 
diff --git a/mcp-server/tests/test_handlers.py b/mcp-server/tests/test_handlers.py
index 63648ea..1a130ed 100644
--- a/mcp-server/tests/test_handlers.py
+++ b/mcp-server/tests/test_handlers.py
@@ -62,6 +62,25 @@ async def test_dna_calls_correct_endpoint(self, mock_get):
         call_path = mock_get.call_args[0][0]
         assert "/repos/r1/dna" in call_path
 
+    @pytest.mark.asyncio
+    @patch("handlers.api_post", new_callable=AsyncMock)
+    async def test_context_for_task_dispatches(self, mock_post):
+        mock_post.return_value = {
+            "context": "## Context for: \"test task\"",
+            "tokens_used": 200,
+            "token_budget": 1500,
+            "files_found": 3,
+        }
+        result = await call_tool("get_context_for_task", {
+            "task_description": "add auth to settings",
+            "repo_id": "abc",
+        })
+        assert len(result) == 1
+        assert "Context for" in result[0].text
+        payload = mock_post.call_args[1]["json"]
+        assert payload["task"] == "add auth to settings"
+        assert payload["token_budget"] == 1500
+
     @pytest.mark.asyncio
     @patch("handlers.api_get", new_callable=AsyncMock)
     async def test_none_arguments_handled(self, mock_get):
diff --git a/mcp-server/tests/test_tools.py b/mcp-server/tests/test_tools.py
index bc239fb..25aebea 100644
--- a/mcp-server/tests/test_tools.py
+++ b/mcp-server/tests/test_tools.py
@@ -19,6 +19,7 @@
     "get_repo_directories",
     "index_repository",
     "delete_repository",
+    "get_context_for_task",
 }
 
 
diff --git a/mcp-server/tools.py b/mcp-server/tools.py
index fb13e87..9705383 100644
--- a/mcp-server/tools.py
+++ b/mcp-server/tools.py
@@ -145,6 +145,48 @@ def get_tool_schemas() -> list[types.Tool]:
                 "required": ["repo_id"],
             },
         ),
+        # --- Context assembly ---
+        types.Tool(
+            name="get_context_for_task",
+            description=(
+                "Get precisely assembled context for a specific coding task. "
+                "Returns only the files, dependencies, and project rules "
+                "relevant to your task, within a token budget. Use this "
+                "BEFORE writing any code to get exactly the right context "
+                "for the task at hand -- semantic search finds relevant "
+                "code, dependency expansion adds imports, and rule matching "
+                "includes only the project conventions that apply."
+            ),
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "task_description": {
+                        "type": "string",
+                        "description": (
+                            "Natural language description of the coding task. "
+                            "Examples: 'add rate limiting to settings endpoints', "
+                            "'implement OAuth for the admin dashboard', "
+                            "'fix the dependency graph for TypeScript repos'"
+                        ),
+                    },
+                    "repo_id": {
+                        "type": "string",
+                        "description": "Repository identifier",
+                    },
+                    "token_budget": {
+                        "type": "integer",
+                        "description": (
+                            "Maximum tokens for the context package "
+                            "(default: 1500). Lower = more focused."
+                        ),
+                        "default": 1500,
+                        "minimum": 100,
+                        "maximum": 10000,
+                    },
+                },
+                "required": ["task_description", "repo_id"],
+            },
+        ),
         # --- Write tools ---
         types.Tool(
             name="add_repository",

From 6301f99fa5bf32c718738cb2220a22c5398a2315 Mon Sep 17 00:00:00 2001
From: Devanshu Rajesh Chicholikar <chicholikar.d@northeastern.edu>
Date: Tue, 10 Mar 2026 14:54:48 -0400
Subject: [PATCH 2/3] fix: review findings for context assembler (OPE-172)

CI fixes:
- Remove unused import get_repo_or_404 from context.py (F401)
- Remove unused import dependency_analyzer from context_assembler.py (F401)

Correctness:
- Use project StructuredLogger (from services.observability) not stdlib
  logging.getLogger() -- matches all other routes/services
- Guard local_path: skip rule reading when repo path is empty or missing
  instead of falling back to Path('') which resolves to cwd
- Add return type annotation -> dict[str, Any] on assemble_context

Async safety:
- Wrap blocking get_file_dependencies() in asyncio.to_thread via
  _load_deps_sync helper (matches project pattern in repos.py)
- Wrap blocking Path.read_text() in asyncio.to_thread via
  _read_rules_file_sync helper

Budget enforcement:
- _build_package now checks remaining budget BEFORE appending each
  tier (files, deps, rules). Individual file/dep entries are added
  only while budget allows, preventing Tier 1+2 from blowing past
  the token limit before rules are considered.

Test:
- Add endpoint path assertion: verify api_post called with
  '/context/assemble' (not just payload check)
---
 backend/routes/context.py             |  9 ++-
 backend/services/context_assembler.py | 80 +++++++++++++++++----------
 mcp-server/tests/test_handlers.py     |  2 +
 3 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/backend/routes/context.py b/backend/routes/context.py
index 853476c..603ed5c 100644
--- a/backend/routes/context.py
+++ b/backend/routes/context.py
@@ -4,23 +4,22 @@
 Uses semantic search + dependency graph + project rules to build a
 minimal, precise context package for AI coding assistants.
 """
-import logging
 import time
+from typing import Any
 
 from fastapi import APIRouter, Depends, HTTPException
 from pydantic import BaseModel, Field
 
-from dependencies import get_repo_or_404, verify_repo_access
+from dependencies import verify_repo_access
 from middleware.auth import AuthContext, require_auth
 from services.observability import (
     add_breadcrumb,
     capture_exception,
+    logger,
     metrics,
     set_operation_context,
 )
 
-logger = logging.getLogger(__name__)
-
 router = APIRouter(tags=["context"])
 
 
@@ -34,7 +33,7 @@ class AssembleRequest(BaseModel):
 async def assemble_context(
     request: AssembleRequest,
     auth: AuthContext = Depends(require_auth),
-):
+) -> dict[str, Any]:
     """Assemble task-specific context from semantic search + deps + rules.
 
     Returns a markdown context package sized to fit within token_budget,
diff --git a/backend/services/context_assembler.py b/backend/services/context_assembler.py
index 8265464..df63fd4 100644
--- a/backend/services/context_assembler.py
+++ b/backend/services/context_assembler.py
@@ -5,12 +5,12 @@
 project rules, and returns an assembled context package within a
 token budget. This is the core of OPE-172.
 """
-import logging
+import asyncio
 import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
-logger = logging.getLogger(__name__)
+from services.observability import logger
 
 # Rule files in priority order (first found wins, same as dna_extractor)
 RULES_FILES = [
@@ -59,8 +59,8 @@ def _split_rules_into_sections(content: str) -> List[Dict[str, str]]:
     return sections
 
 
-def _read_rules_file(repo_path: Path) -> Tuple[Optional[str], Optional[str]]:
-    """Find and read the first matching rules file in the repo."""
+def _read_rules_file_sync(repo_path: Path) -> Tuple[Optional[str], Optional[str]]:
+    """Find and read the first matching rules file in the repo (sync)."""
     for filename in RULES_FILES:
         rules_path = repo_path / filename
         if rules_path.exists() and rules_path.is_file():
@@ -69,10 +69,16 @@ def _read_rules_file(repo_path: Path) -> Tuple[Optional[str], Optional[str]]:
                 if content.strip():
                     return content, filename
             except OSError as exc:
-                logger.warning("Could not read rules file %s: %s", rules_path, exc)
+                logger.warning("Could not read rules file", path=str(rules_path), error=str(exc))
     return None, None
 
 
+def _load_deps_sync(repo_id: str) -> List[Dict]:
+    """Load file dependencies from Supabase (sync)."""
+    from services.supabase_service import get_supabase_service
+    return get_supabase_service().get_file_dependencies(repo_id)
+
+
 class ContextAssembler:
     """Assembles per-task context from semantic search + deps + rules."""
 
@@ -88,22 +94,26 @@ async def assemble(
         Returns dict with 'context' (markdown string), 'files_found',
         'tokens_used', and 'debug' metadata.
         """
-        from dependencies import indexer, dependency_analyzer, get_repo_or_404
-        from services.supabase_service import get_supabase_service
+        from dependencies import indexer, get_repo_or_404
 
         repo = get_repo_or_404(repo_id, user_id)
-        local_path = Path(repo.get("local_path", ""))
+        local_path_str = repo.get("local_path", "")
 
         # Step 1: Semantic search for the most relevant files
         search_results = await self._search(task, repo_id, indexer)
         found_files = self._unique_files(search_results)
 
-        # Step 2: Expand with 1-hop dependencies
-        dep_files = self._expand_deps(found_files, repo_id, get_supabase_service())
+        # Step 2: Expand with 1-hop dependencies (sync DB call off event loop)
+        dep_files = await self._expand_deps(found_files, repo_id)
 
         # Step 3: Match relevant rule sections
         all_files = list(dict.fromkeys(found_files + dep_files))
-        rules_content, rules_source = _read_rules_file(local_path)
+        rules_content: Optional[str] = None
+        rules_source: Optional[str] = None
+        if local_path_str and Path(local_path_str).is_dir():
+            rules_content, rules_source = await asyncio.to_thread(
+                _read_rules_file_sync, Path(local_path_str),
+            )
         matched_rules = self._match_rules(rules_content, all_files) if rules_content else []
 
         # Step 4: Assemble within token budget
@@ -137,7 +147,7 @@ async def _search(
             )
             return results
         except Exception as exc:
-            logger.error("Context search failed: %s", exc)
+            logger.error("Context search failed", error=str(exc))
             return []
 
     @staticmethod
@@ -153,14 +163,12 @@ def _unique_files(results: List[Dict]) -> List[str]:
         return files
 
     @staticmethod
-    def _expand_deps(
-        seed_files: List[str], repo_id: str, db: Any,
-    ) -> List[str]:
+    async def _expand_deps(seed_files: List[str], repo_id: str) -> List[str]:
         """Add 1-hop imports/dependents for seed files."""
         try:
-            all_deps = db.get_file_dependencies(repo_id)
+            all_deps = await asyncio.to_thread(_load_deps_sync, repo_id)
         except Exception as exc:
-            logger.warning("Could not load deps for expansion: %s", exc)
+            logger.warning("Could not load deps for expansion", error=str(exc))
             return []
 
         # Build adjacency maps
@@ -225,10 +233,11 @@ def _build_package(
     ) -> str:
         """Assemble markdown context package within token budget."""
         lines: List[str] = [f'## Context for: "{task}"', ""]
+        remaining = budget - _estimate_tokens("\n".join(lines))
 
         # Tier 1: Relevant files (highest priority)
-        if found_files:
-            lines.append("### Relevant files")
+        if found_files and remaining > 50:
+            tier_lines = ["### Relevant files"]
             for r in search_results:
                 fp = r.get("file_path", "")
                 name = r.get("qualified_name", r.get("name", ""))
@@ -236,19 +245,29 @@ def _build_package(
                 sig = r.get("signature", "")
                 pct = f"{score * 100:.0f}%" if isinstance(score, float) else str(score)
                 desc = sig if sig else name
-                lines.append(f"- `{fp}` -- {desc} (relevance: {pct})")
-            lines.append("")
+                entry = f"- `{fp}` -- {desc} (relevance: {pct})"
+                entry_tokens = _estimate_tokens(entry)
+                if entry_tokens <= remaining:
+                    tier_lines.append(entry)
+                    remaining -= entry_tokens
+                else:
+                    break
+            tier_lines.append("")
+            lines.extend(tier_lines)
 
         # Tier 2: Dependency files
-        if dep_files:
-            lines.append("### Depends on")
+        if dep_files and remaining > 50:
+            tier_lines = ["### Depends on"]
             for fp in dep_files[:10]:
-                lines.append(f"- `{fp}`")
-            lines.append("")
-
-        # Check budget before adding rules
-        current = _estimate_tokens("\n".join(lines))
-        remaining = budget - current
+                entry = f"- `{fp}`"
+                entry_tokens = _estimate_tokens(entry)
+                if entry_tokens <= remaining:
+                    tier_lines.append(entry)
+                    remaining -= entry_tokens
+                else:
+                    break
+            tier_lines.append("")
+            lines.extend(tier_lines)
 
         # Tier 3: Matched rules
         if matched_rules and remaining > 50:
@@ -262,7 +281,8 @@ def _build_package(
                 else:
                     # Truncate the last section to fit
                     chars_left = remaining * 4
-                    lines.append(section["body"][:chars_left] + "...")
+                    if chars_left > 20:
+                        lines.append(section["body"][:chars_left] + "...")
                     break
             lines.append("")
 
diff --git a/mcp-server/tests/test_handlers.py b/mcp-server/tests/test_handlers.py
index 1a130ed..4644ca8 100644
--- a/mcp-server/tests/test_handlers.py
+++ b/mcp-server/tests/test_handlers.py
@@ -77,6 +77,8 @@ async def test_context_for_task_dispatches(self, mock_post):
         })
         assert len(result) == 1
         assert "Context for" in result[0].text
+        call_path = mock_post.call_args[0][0]
+        assert call_path == "/context/assemble"
         payload = mock_post.call_args[1]["json"]
         assert payload["task"] == "add auth to settings"
         assert payload["token_budget"] == 1500

From 24323cb8ab9e8365095956df480655a325814ab2 Mon Sep 17 00:00:00 2001
From: Devanshu Rajesh Chicholikar <chicholikar.d@northeastern.edu>
Date: Tue, 10 Mar 2026 15:07:52 -0400
Subject: [PATCH 3/3] fix: deduct tier header tokens from budget in
 _build_package

The '### Relevant files' and '### Depends on' headers plus their
trailing blank lines were appended without subtracting their token
cost from remaining. Three unaccounted headers compound to ~30-40
tokens of budget overrun. Now each tier deducts header_cost before
iterating entries.

Skipped singleton accessor nitpick: dependency_analyzer.py,
style_analyzer.py, and dna_extractor.py are all plain classes
instantiated in dependencies.py -- no get_*() accessor pattern
exists in the project. ContextAssembler already matches.
---
 backend/services/context_assembler.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/backend/services/context_assembler.py b/backend/services/context_assembler.py
index df63fd4..86fad72 100644
--- a/backend/services/context_assembler.py
+++ b/backend/services/context_assembler.py
@@ -237,7 +237,10 @@ def _build_package(
 
         # Tier 1: Relevant files (highest priority)
         if found_files and remaining > 50:
-            tier_lines = ["### Relevant files"]
+            header = "### Relevant files"
+            header_cost = _estimate_tokens(header) + 1  # +1 for trailing blank line
+            remaining -= header_cost
+            tier_lines = [header]
             for r in search_results:
                 fp = r.get("file_path", "")
                 name = r.get("qualified_name", r.get("name", ""))
@@ -257,7 +260,10 @@ def _build_package(
 
         # Tier 2: Dependency files
         if dep_files and remaining > 50:
-            tier_lines = ["### Depends on"]
+            header = "### Depends on"
+            header_cost = _estimate_tokens(header) + 1
+            remaining -= header_cost
+            tier_lines = [header]
             for fp in dep_files[:10]:
                 entry = f"- `{fp}`"
                 entry_tokens = _estimate_tokens(entry)