rostilos · rostilos · Jan 29, 2026 · Jan 29, 2026 · Jan 30, 2026
diff --git a/...-engine/src/main/java/org/rostilos/codecrow/analysisengine/aiclient/AiAnalysisClient.java b/...-engine/src/main/java/org/rostilos/codecrow/analysisengine/aiclient/AiAnalysisClient.java
@@ -160,7 +160,7 @@ public Map<String, Object> performAnalysis(AiAnalysisRequest request, java.util.
 
     /**
      * Extracts analysis data from nested response structure.
-     * Expected: response -> result -> {comment, issues}
+     * Expected: response -> result -> {comment, issues, inference_stats}
      * Issues can be either a List (array) or Map (object with numeric keys)
      */
     private Map<String, Object> extractAndValidateAnalysisData(Map response) throws IOException {
@@ -176,6 +176,15 @@ private Map<String, Object> extractAndValidateAnalysisData(Map response) throws
             if (result == null) {
                 throw new IOException("Missing 'result' field in AI response");
             }
+
+            // Check for error response from MCP client
+            Object errorFlag = result.get("error");
+            if (Boolean.TRUE.equals(errorFlag) || "true".equals(String.valueOf(errorFlag))) {
+                String errorMessage = result.get("error_message") != null 
+                    ? String.valueOf(result.get("error_message"))
+                    : String.valueOf(result.get("comment"));
+                throw new IOException("Analysis failed: " + errorMessage);
+            }
 
             if (!result.containsKey("comment") || !result.containsKey("issues")) {
                 throw new IOException("Analysis data missing required fields: 'comment' and/or 'issues'");

diff --git a/python-ecosystem/mcp-client/model/models.py b/python-ecosystem/mcp-client/model/models.py
@@ -166,6 +166,9 @@ class CodeReviewIssue(BaseModel):
     suggestedFixDescription: str = Field(description="Description of the suggested fix")
     suggestedFixDiff: Optional[str] = Field(default=None, description="Optional unified diff format patch for the fix")
     isResolved: bool = Field(default=False, description="Whether this issue from previous analysis is resolved")
+    # Resolution tracking fields
+    resolutionExplanation: Optional[str] = Field(default=None, description="Explanation of how the issue was resolved (separate from original reason)")
+    resolvedInCommit: Optional[str] = Field(default=None, description="Commit hash where the issue was resolved")
     # Additional fields preserved from previous issues during reconciliation
     visibility: Optional[str] = Field(default=None, description="Issue visibility status")
     codeSnippet: Optional[str] = Field(default=None, description="Code snippet associated with the issue")

diff --git a/python-ecosystem/mcp-client/service/multi_stage_orchestrator.py b/python-ecosystem/mcp-client/service/multi_stage_orchestrator.py
diff --git a/python-ecosystem/mcp-client/service/rag_client.py b/python-ecosystem/mcp-client/service/rag_client.py
@@ -65,7 +65,9 @@ async def get_pr_context(
         enable_priority_reranking: bool = True,
         min_relevance_score: float = None,
         base_branch: Optional[str] = None,
-        deleted_files: Optional[List[str]] = None
+        deleted_files: Optional[List[str]] = None,
+        pr_number: Optional[int] = None,
+        all_pr_changed_files: Optional[List[str]] = None
     ) -> Dict[str, Any]:
         """
         Get relevant context for PR review with multi-branch support.
@@ -83,6 +85,8 @@ async def get_pr_context(
             min_relevance_score: Minimum relevance threshold (default from RAG_MIN_RELEVANCE_SCORE)
             base_branch: Base branch (PR target, e.g., 'main'). Auto-detected if not provided.
             deleted_files: Files deleted in target branch (excluded from results)
+            pr_number: If set, enables hybrid query with PR-indexed data priority
+            all_pr_changed_files: All files in PR (for exclusion from branch query in hybrid mode)
 
         Returns:
             Dict with context information or empty dict if RAG is disabled
@@ -123,6 +127,12 @@ async def get_pr_context(
                 payload["base_branch"] = base_branch
             if deleted_files:
                 payload["deleted_files"] = deleted_files
+
+            # Add hybrid mode parameters
+            if pr_number:
+                payload["pr_number"] = pr_number
+            if all_pr_changed_files:
+                payload["all_pr_changed_files"] = all_pr_changed_files
 
             client = await self._get_client()
             response = await client.post(
@@ -284,3 +294,109 @@ async def get_deterministic_context(
         except Exception as e:
             logger.error(f"Unexpected error in deterministic RAG query: {e}")
             return {"context": {"chunks": [], "by_identifier": {}, "by_file": {}}}
+
+    # =========================================================================
+    # PR File Indexing Methods (for PR-specific RAG layer)
+    # =========================================================================
+
+    async def index_pr_files(
+        self,
+        workspace: str,
+        project: str,
+        pr_number: int,
+        branch: str,
+        files: List[Dict[str, str]]
+    ) -> Dict[str, Any]:
+        """
+        Index PR files into the main collection with PR-specific metadata.
+
+        Files are indexed with metadata (pr=true, pr_number=X) to enable
+        hybrid queries that prioritize PR data over branch data.
+
+        Existing PR points for the same pr_number are deleted first.
+
+        Args:
+            workspace: Workspace identifier
+            project: Project identifier
+            pr_number: PR number for metadata tagging
+            branch: Source branch name
+            files: List of {path: str, content: str, change_type: str}
+
+        Returns:
+            Dict with indexing status and chunk counts
+        """
+        if not self.enabled:
+            logger.debug("RAG disabled, skipping PR file indexing")
+            return {"status": "skipped", "chunks_indexed": 0}
+
+        if not files:
+            logger.debug("No files to index for PR")
+            return {"status": "skipped", "chunks_indexed": 0}
+
+        try:
+            payload = {
+                "workspace": workspace,
+                "project": project,
+                "pr_number": pr_number,
+                "branch": branch,
+                "files": files
+            }
+
+            client = await self._get_client()
+            response = await client.post(
+                f"{self.base_url}/index/pr-files",
+                json=payload,
+                timeout=120.0  # Longer timeout for indexing
+            )
+            response.raise_for_status()
+            result = response.json()
+
+            logger.info(f"Indexed PR #{pr_number}: {result.get('chunks_indexed', 0)} chunks from {result.get('files_processed', 0)} files")
+            return result
+
+        except httpx.HTTPError as e:
+            logger.warning(f"Failed to index PR files: {e}")
+            return {"status": "error", "error": str(e)}
+        except Exception as e:
+            logger.error(f"Unexpected error indexing PR files: {e}")
+            return {"status": "error", "error": str(e)}
+
+    async def delete_pr_files(
+        self,
+        workspace: str,
+        project: str,
+        pr_number: int
+    ) -> bool:
+        """
+        Delete all indexed points for a specific PR.
+
+        Called after analysis completes to clean up PR-specific data.
+
+        Args:
+            workspace: Workspace identifier
+            project: Project identifier
+            pr_number: PR number to delete
+
+        Returns:
+            True if deleted successfully, False otherwise
+        """
+        if not self.enabled:
+            return True
+
+        try:
+            client = await self._get_client()
+            response = await client.delete(
+                f"{self.base_url}/index/pr-files/{workspace}/{project}/{pr_number}"
+            )
+            response.raise_for_status()
+            result = response.json()
+
+            logger.info(f"Deleted PR #{pr_number} indexed data")
+            return result.get("status") == "deleted"
+
+        except httpx.HTTPError as e:
+            logger.warning(f"Failed to delete PR files: {e}")
+            return False
+        except Exception as e:
+            logger.error(f"Unexpected error deleting PR files: {e}")
+            return False
diff --git a/python-ecosystem/mcp-client/utils/diff_processor.py b/python-ecosystem/mcp-client/utils/diff_processor.py
@@ -50,7 +50,8 @@ class DiffFile:
     old_path: Optional[str] = None  # For renamed files
     additions: int = 0
     deletions: int = 0
-    content: str = ""
+    content: str = ""  # Diff content (unified diff format)
+    full_content: Optional[str] = None  # Full file content (populated separately if needed)
     hunks: List[str] = field(default_factory=list)
     is_binary: bool = False
     is_skipped: bool = False

diff --git a/python-ecosystem/mcp-client/utils/prompts/prompt_builder.py b/python-ecosystem/mcp-client/utils/prompts/prompt_builder.py
@@ -73,7 +73,8 @@ def build_stage_1_batch_prompt(
         project_rules: str = "",
         rag_context: str = "",
         is_incremental: bool = False,
-        previous_issues: str = ""
+        previous_issues: str = "",
+        all_pr_files: List[str] = None  # All files in this PR for cross-file awareness
     ) -> str:
         """
         Build prompt for Stage 1: Batch File Review.
@@ -103,6 +104,20 @@ def build_stage_1_batch_prompt(
 This is a follow-up review after the PR was updated with new commits.
 The diff above shows ONLY the changes since the last review - focus on these NEW changes.
 For any previous issues listed below, check if they are RESOLVED in the new changes.
+"""
+
+        # Add PR-wide file list for cross-batch awareness
+        pr_files_context = ""
+        if all_pr_files:
+            current_batch_files = [f['path'] for f in files]
+            other_files = [fp for fp in all_pr_files if fp not in current_batch_files]
+            if other_files:
+                pr_files_context = f"""
+## OTHER FILES IN THIS PR (for cross-file awareness)
+This PR also modifies these files (reviewed in other batches):
+{chr(10).join('- ' + fp for fp in other_files[:20])}
+{'... and ' + str(len(other_files) - 20) + ' more files' if len(other_files) > 20 else ''}
+Consider potential interactions with these files when reviewing.
 """
 
         return STAGE_1_BATCH_PROMPT_TEMPLATE.format(
@@ -111,7 +126,8 @@ def build_stage_1_batch_prompt(
             files_context=files_context,
             rag_context=rag_context or "(No additional codebase context available)",
             incremental_instructions=incremental_instructions,
-            previous_issues=previous_issues
+            previous_issues=previous_issues,
+            pr_files_context=pr_files_context
         )
 
     @staticmethod

diff --git a/python-ecosystem/mcp-client/utils/response_parser.py b/python-ecosystem/mcp-client/utils/response_parser.py
@@ -703,21 +703,14 @@ def create_error_response(error_message: str, exception_str: str = "") -> Dict[s
             exception_str: Optional exception details
 
         Returns:
-            Structured error response dictionary with issues as list
+            Structured error response dictionary marked as error (no fake issues)
         """
         full_message = f"{error_message}: {exception_str}" if exception_str else error_message
 
         return {
+            "status": "error",
             "comment": full_message,
-            "issues": [
-                {
-                    "severity": "HIGH",
-                    "category": "ERROR_HANDLING",
-                    "file": "system",
-                    "line": "0",
-                    "reason": full_message,
-                    "suggestedFixDescription": "Check system configuration and connectivity",
-                    "isResolved": False
-                }
-            ]
+            "issues": [],  # Don't create fake issues for errors - let Java handle error state properly
+            "error": True,
+            "error_message": full_message
         }