Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public Map<String, Object> performAnalysis(AiAnalysisRequest request, java.util.

/**
* Extracts analysis data from nested response structure.
* Expected: response -> result -> {comment, issues}
* Expected: response -> result -> {comment, issues, inference_stats}
* Issues can be either a List (array) or Map (object with numeric keys)
*/
private Map<String, Object> extractAndValidateAnalysisData(Map response) throws IOException {
Expand All @@ -176,6 +176,15 @@ private Map<String, Object> extractAndValidateAnalysisData(Map response) throws
if (result == null) {
throw new IOException("Missing 'result' field in AI response");
}

// Check for error response from MCP client
Object errorFlag = result.get("error");
if (Boolean.TRUE.equals(errorFlag) || "true".equals(String.valueOf(errorFlag))) {
String errorMessage = result.get("error_message") != null
? String.valueOf(result.get("error_message"))
: String.valueOf(result.get("comment"));
throw new IOException("Analysis failed: " + errorMessage);
}

if (!result.containsKey("comment") || !result.containsKey("issues")) {
throw new IOException("Analysis data missing required fields: 'comment' and/or 'issues'");
Expand Down
3 changes: 3 additions & 0 deletions python-ecosystem/mcp-client/model/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ class CodeReviewIssue(BaseModel):
suggestedFixDescription: str = Field(description="Description of the suggested fix")
suggestedFixDiff: Optional[str] = Field(default=None, description="Optional unified diff format patch for the fix")
isResolved: bool = Field(default=False, description="Whether this issue from previous analysis is resolved")
# Resolution tracking fields
resolutionExplanation: Optional[str] = Field(default=None, description="Explanation of how the issue was resolved (separate from original reason)")
resolvedInCommit: Optional[str] = Field(default=None, description="Commit hash where the issue was resolved")
# Additional fields preserved from previous issues during reconciliation
visibility: Optional[str] = Field(default=None, description="Issue visibility status")
codeSnippet: Optional[str] = Field(default=None, description="Code snippet associated with the issue")
Expand Down
243 changes: 206 additions & 37 deletions python-ecosystem/mcp-client/service/multi_stage_orchestrator.py

Large diffs are not rendered by default.

118 changes: 117 additions & 1 deletion python-ecosystem/mcp-client/service/rag_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ async def get_pr_context(
enable_priority_reranking: bool = True,
min_relevance_score: float = None,
base_branch: Optional[str] = None,
deleted_files: Optional[List[str]] = None
deleted_files: Optional[List[str]] = None,
pr_number: Optional[int] = None,
all_pr_changed_files: Optional[List[str]] = None
) -> Dict[str, Any]:
"""
Get relevant context for PR review with multi-branch support.
Expand All @@ -83,6 +85,8 @@ async def get_pr_context(
min_relevance_score: Minimum relevance threshold (default from RAG_MIN_RELEVANCE_SCORE)
base_branch: Base branch (PR target, e.g., 'main'). Auto-detected if not provided.
deleted_files: Files deleted in target branch (excluded from results)
pr_number: If set, enables hybrid query with PR-indexed data priority
all_pr_changed_files: All files in PR (for exclusion from branch query in hybrid mode)

Returns:
Dict with context information or empty dict if RAG is disabled
Expand Down Expand Up @@ -123,6 +127,12 @@ async def get_pr_context(
payload["base_branch"] = base_branch
if deleted_files:
payload["deleted_files"] = deleted_files

# Add hybrid mode parameters
if pr_number:
payload["pr_number"] = pr_number
if all_pr_changed_files:
payload["all_pr_changed_files"] = all_pr_changed_files

client = await self._get_client()
response = await client.post(
Expand Down Expand Up @@ -284,3 +294,109 @@ async def get_deterministic_context(
except Exception as e:
logger.error(f"Unexpected error in deterministic RAG query: {e}")
return {"context": {"chunks": [], "by_identifier": {}, "by_file": {}}}

# =========================================================================
# PR File Indexing Methods (for PR-specific RAG layer)
# =========================================================================

async def index_pr_files(
self,
workspace: str,
project: str,
pr_number: int,
branch: str,
files: List[Dict[str, str]]
) -> Dict[str, Any]:
"""
Index PR files into the main collection with PR-specific metadata.

Files are indexed with metadata (pr=true, pr_number=X) to enable
hybrid queries that prioritize PR data over branch data.

Existing PR points for the same pr_number are deleted first.

Args:
workspace: Workspace identifier
project: Project identifier
pr_number: PR number for metadata tagging
branch: Source branch name
files: List of {path: str, content: str, change_type: str}

Returns:
Dict with indexing status and chunk counts
"""
if not self.enabled:
logger.debug("RAG disabled, skipping PR file indexing")
return {"status": "skipped", "chunks_indexed": 0}

if not files:
logger.debug("No files to index for PR")
return {"status": "skipped", "chunks_indexed": 0}

try:
payload = {
"workspace": workspace,
"project": project,
"pr_number": pr_number,
"branch": branch,
"files": files
}

client = await self._get_client()
response = await client.post(
f"{self.base_url}/index/pr-files",
json=payload,
timeout=120.0 # Longer timeout for indexing
)
response.raise_for_status()
result = response.json()

logger.info(f"Indexed PR #{pr_number}: {result.get('chunks_indexed', 0)} chunks from {result.get('files_processed', 0)} files")
return result

except httpx.HTTPError as e:
logger.warning(f"Failed to index PR files: {e}")
return {"status": "error", "error": str(e)}
except Exception as e:
logger.error(f"Unexpected error indexing PR files: {e}")
return {"status": "error", "error": str(e)}

async def delete_pr_files(
self,
workspace: str,
project: str,
pr_number: int
) -> bool:
"""
Delete all indexed points for a specific PR.

Called after analysis completes to clean up PR-specific data.

Args:
workspace: Workspace identifier
project: Project identifier
pr_number: PR number to delete

Returns:
True if deleted successfully, False otherwise
"""
if not self.enabled:
return True

try:
client = await self._get_client()
response = await client.delete(
f"{self.base_url}/index/pr-files/{workspace}/{project}/{pr_number}"
)
response.raise_for_status()
result = response.json()

logger.info(f"Deleted PR #{pr_number} indexed data")
return result.get("status") == "deleted"

except httpx.HTTPError as e:
logger.warning(f"Failed to delete PR files: {e}")
return False
except Exception as e:
logger.error(f"Unexpected error deleting PR files: {e}")
return False
3 changes: 2 additions & 1 deletion python-ecosystem/mcp-client/utils/diff_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ class DiffFile:
old_path: Optional[str] = None # For renamed files
additions: int = 0
deletions: int = 0
content: str = ""
content: str = "" # Diff content (unified diff format)
full_content: Optional[str] = None # Full file content (populated separately if needed)
hunks: List[str] = field(default_factory=list)
is_binary: bool = False
is_skipped: bool = False
Expand Down
20 changes: 18 additions & 2 deletions python-ecosystem/mcp-client/utils/prompts/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def build_stage_1_batch_prompt(
project_rules: str = "",
rag_context: str = "",
is_incremental: bool = False,
previous_issues: str = ""
previous_issues: str = "",
all_pr_files: List[str] = None # All files in this PR for cross-file awareness
) -> str:
"""
Build prompt for Stage 1: Batch File Review.
Expand Down Expand Up @@ -103,6 +104,20 @@ def build_stage_1_batch_prompt(
This is a follow-up review after the PR was updated with new commits.
The diff above shows ONLY the changes since the last review - focus on these NEW changes.
For any previous issues listed below, check if they are RESOLVED in the new changes.
"""

# Add PR-wide file list for cross-batch awareness
pr_files_context = ""
if all_pr_files:
current_batch_files = [f['path'] for f in files]
other_files = [fp for fp in all_pr_files if fp not in current_batch_files]
if other_files:
pr_files_context = f"""
## OTHER FILES IN THIS PR (for cross-file awareness)
This PR also modifies these files (reviewed in other batches):
{chr(10).join('- ' + fp for fp in other_files[:20])}
{'... and ' + str(len(other_files) - 20) + ' more files' if len(other_files) > 20 else ''}
Consider potential interactions with these files when reviewing.
"""

return STAGE_1_BATCH_PROMPT_TEMPLATE.format(
Expand All @@ -111,7 +126,8 @@ def build_stage_1_batch_prompt(
files_context=files_context,
rag_context=rag_context or "(No additional codebase context available)",
incremental_instructions=incremental_instructions,
previous_issues=previous_issues
previous_issues=previous_issues,
pr_files_context=pr_files_context
)

@staticmethod
Expand Down
17 changes: 5 additions & 12 deletions python-ecosystem/mcp-client/utils/response_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,21 +703,14 @@ def create_error_response(error_message: str, exception_str: str = "") -> Dict[s
exception_str: Optional exception details

Returns:
Structured error response dictionary with issues as list
Structured error response dictionary marked as error (no fake issues)
"""
full_message = f"{error_message}: {exception_str}" if exception_str else error_message

return {
"status": "error",
"comment": full_message,
"issues": [
{
"severity": "HIGH",
"category": "ERROR_HANDLING",
"file": "system",
"line": "0",
"reason": full_message,
"suggestedFixDescription": "Check system configuration and connectivity",
"isResolved": False
}
]
"issues": [], # Don't create fake issues for errors - let Java handle error state properly
"error": True,
"error_message": full_message
}
Loading