Skip to content

Commit a6ec271

Browse files
committed
Check in initial diff engine and semantic analysis
1 parent 5fa2989 commit a6ec271

File tree

13 files changed

+1390
-1
lines changed

13 files changed

+1390
-1
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ tree-sitter-java = "0.20"
3232
tree-sitter-python = "0.20"
3333
tree-sitter-javascript = "0.20"
3434
tree-sitter-cpp = "0.20"
35-
tree-sitter-c-sharp = "0.20"
35+
tree-sitter-c = "0.20"
3636

3737
# CLI dependencies
3838
clap = { version = "4.0", features = ["derive"] }

crates/diff-engine/src/changes.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
//! Change classification and analysis
2+
3+
use smart_diff_parser::{Change, ChangeType, CodeElement};
4+
use serde::{Deserialize, Serialize};
5+
6+
/// Change classifier that categorizes detected changes
7+
pub struct ChangeClassifier;
8+
9+
impl ChangeClassifier {
10+
/// Classify a change based on its characteristics
11+
pub fn classify_change(&self, source: Option<&CodeElement>, target: Option<&CodeElement>) -> ChangeType {
12+
match (source, target) {
13+
(None, Some(_)) => ChangeType::Add,
14+
(Some(_), None) => ChangeType::Delete,
15+
(Some(src), Some(tgt)) => {
16+
if src.name != tgt.name {
17+
if src.file_path != tgt.file_path {
18+
ChangeType::CrossFileMove
19+
} else {
20+
ChangeType::Rename
21+
}
22+
} else if src.file_path != tgt.file_path {
23+
ChangeType::CrossFileMove
24+
} else if src.start_line != tgt.start_line {
25+
ChangeType::Move
26+
} else {
27+
ChangeType::Modify
28+
}
29+
}
30+
(None, None) => ChangeType::Modify, // Shouldn't happen
31+
}
32+
}
33+
34+
/// Detect if a change represents a function split
35+
pub fn detect_split(&self, source: &CodeElement, targets: &[CodeElement]) -> bool {
36+
targets.len() > 1 && targets.iter().all(|t| {
37+
t.name.contains(&source.name) || source.name.contains(&t.name)
38+
})
39+
}
40+
41+
/// Detect if changes represent a function merge
42+
pub fn detect_merge(&self, sources: &[CodeElement], target: &CodeElement) -> bool {
43+
sources.len() > 1 && sources.iter().all(|s| {
44+
s.name.contains(&target.name) || target.name.contains(&s.name)
45+
})
46+
}
47+
}

crates/diff-engine/src/engine.rs

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
//! Main diff engine
2+
3+
use crate::matching::FunctionMatcher;
4+
use crate::tree_edit::{TreeEditDistance, EditCost};
5+
use crate::changes::ChangeClassifier;
6+
use crate::refactoring::RefactoringDetector;
7+
use smart_diff_parser::{Function, MatchResult};
8+
use thiserror::Error;
9+
use serde::{Deserialize, Serialize};
10+
11+
/// Main diff engine that orchestrates the comparison process
12+
pub struct DiffEngine {
13+
function_matcher: FunctionMatcher,
14+
tree_edit_distance: TreeEditDistance,
15+
change_classifier: ChangeClassifier,
16+
refactoring_detector: RefactoringDetector,
17+
}
18+
19+
/// Result of diff computation
20+
#[derive(Debug, Serialize, Deserialize)]
21+
pub struct DiffResult {
22+
pub match_result: MatchResult,
23+
pub refactoring_patterns: Vec<crate::refactoring::RefactoringPattern>,
24+
pub execution_time_ms: u64,
25+
pub statistics: DiffStatistics,
26+
}
27+
28+
/// Statistics about the diff computation
29+
#[derive(Debug, Serialize, Deserialize)]
30+
pub struct DiffStatistics {
31+
pub functions_compared: usize,
32+
pub functions_matched: usize,
33+
pub functions_added: usize,
34+
pub functions_removed: usize,
35+
pub functions_modified: usize,
36+
pub average_similarity: f64,
37+
}
38+
39+
/// Diff engine errors
40+
#[derive(Error, Debug)]
41+
pub enum DiffError {
42+
#[error("Comparison failed: {0}")]
43+
ComparisonFailed(String),
44+
45+
#[error("Invalid input: {0}")]
46+
InvalidInput(String),
47+
48+
#[error("Processing error: {0}")]
49+
ProcessingError(String),
50+
}
51+
52+
impl DiffEngine {
53+
pub fn new() -> Self {
54+
Self {
55+
function_matcher: FunctionMatcher::new(0.7),
56+
tree_edit_distance: TreeEditDistance::new(EditCost::default()),
57+
change_classifier: ChangeClassifier,
58+
refactoring_detector: RefactoringDetector::new(),
59+
}
60+
}
61+
62+
/// Compare two sets of functions
63+
pub fn compare_functions(&self, source_functions: &[Function], target_functions: &[Function]) -> Result<DiffResult, DiffError> {
64+
let start_time = std::time::Instant::now();
65+
66+
// Match functions
67+
let match_result = self.function_matcher.match_functions(source_functions, target_functions);
68+
69+
// Detect refactoring patterns
70+
let refactoring_patterns = self.refactoring_detector.detect_patterns(&match_result.changes);
71+
72+
// Calculate statistics
73+
let statistics = self.calculate_statistics(source_functions, target_functions, &match_result);
74+
75+
let execution_time_ms = start_time.elapsed().as_millis() as u64;
76+
77+
Ok(DiffResult {
78+
match_result,
79+
refactoring_patterns,
80+
execution_time_ms,
81+
statistics,
82+
})
83+
}
84+
85+
fn calculate_statistics(&self, source: &[Function], target: &[Function], match_result: &MatchResult) -> DiffStatistics {
86+
let functions_compared = source.len() + target.len();
87+
let functions_matched = match_result.mapping.len();
88+
let functions_added = match_result.unmatched_target.len();
89+
let functions_removed = match_result.unmatched_source.len();
90+
let functions_modified = match_result.changes.iter()
91+
.filter(|c| matches!(c.change_type, smart_diff_parser::ChangeType::Modify))
92+
.count();
93+
94+
DiffStatistics {
95+
functions_compared,
96+
functions_matched,
97+
functions_added,
98+
functions_removed,
99+
functions_modified,
100+
average_similarity: match_result.similarity,
101+
}
102+
}
103+
}

crates/diff-engine/src/matching.rs

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
//! Function matching algorithms
2+
3+
use smart_diff_parser::{Function, MatchResult};
4+
use serde::{Deserialize, Serialize};
5+
use std::collections::HashMap;
6+
7+
/// Function matcher that finds optimal mappings between function sets
8+
pub struct FunctionMatcher {
9+
threshold: f64,
10+
}
11+
12+
/// Similarity score between two functions
13+
#[derive(Debug, Clone, Serialize, Deserialize)]
14+
pub struct SimilarityScore {
15+
pub signature_similarity: f64,
16+
pub body_similarity: f64,
17+
pub context_similarity: f64,
18+
pub overall_similarity: f64,
19+
}
20+
21+
impl FunctionMatcher {
22+
pub fn new(threshold: f64) -> Self {
23+
Self { threshold }
24+
}
25+
26+
/// Match functions between two sets using Hungarian algorithm
27+
pub fn match_functions(&self, source_functions: &[Function], target_functions: &[Function]) -> MatchResult {
28+
let mut result = MatchResult::new();
29+
30+
if source_functions.is_empty() && target_functions.is_empty() {
31+
result.similarity = 1.0;
32+
return result;
33+
}
34+
35+
// Calculate similarity matrix
36+
let similarity_matrix = self.calculate_similarity_matrix(source_functions, target_functions);
37+
38+
// Apply Hungarian algorithm for optimal matching
39+
let matches = self.hungarian_matching(&similarity_matrix);
40+
41+
// Process matches and create result
42+
self.process_matches(source_functions, target_functions, &matches, &mut result);
43+
44+
result.calculate_similarity();
45+
result
46+
}
47+
48+
fn calculate_similarity_matrix(&self, source: &[Function], target: &[Function]) -> Vec<Vec<f64>> {
49+
let mut matrix = Vec::new();
50+
51+
for source_func in source {
52+
let mut row = Vec::new();
53+
for target_func in target {
54+
let similarity = self.calculate_function_similarity(source_func, target_func);
55+
row.push(similarity.overall_similarity);
56+
}
57+
matrix.push(row);
58+
}
59+
60+
matrix
61+
}
62+
63+
/// Calculate similarity between two functions
64+
pub fn calculate_function_similarity(&self, func1: &Function, func2: &Function) -> SimilarityScore {
65+
// Signature similarity (40% weight)
66+
let signature_similarity = func1.signature.similarity(&func2.signature);
67+
68+
// Body similarity using AST structure (40% weight)
69+
let body_similarity = self.calculate_ast_similarity(&func1.body, &func2.body);
70+
71+
// Context similarity (20% weight) - based on surrounding functions, calls, etc.
72+
let context_similarity = self.calculate_context_similarity(func1, func2);
73+
74+
// Weighted overall similarity
75+
let overall_similarity =
76+
signature_similarity * 0.4 +
77+
body_similarity * 0.4 +
78+
context_similarity * 0.2;
79+
80+
SimilarityScore {
81+
signature_similarity,
82+
body_similarity,
83+
context_similarity,
84+
overall_similarity,
85+
}
86+
}
87+
88+
fn calculate_ast_similarity(&self, ast1: &smart_diff_parser::ASTNode, ast2: &smart_diff_parser::ASTNode) -> f64 {
89+
// Simple structural similarity based on node types and tree structure
90+
if ast1.node_type != ast2.node_type {
91+
return 0.0;
92+
}
93+
94+
if ast1.children.is_empty() && ast2.children.is_empty() {
95+
return 1.0;
96+
}
97+
98+
if ast1.children.len() != ast2.children.len() {
99+
return 0.5; // Partial similarity for different child counts
100+
}
101+
102+
let mut total_similarity = 0.0;
103+
for (child1, child2) in ast1.children.iter().zip(ast2.children.iter()) {
104+
total_similarity += self.calculate_ast_similarity(child1, child2);
105+
}
106+
107+
total_similarity / ast1.children.len() as f64
108+
}
109+
110+
fn calculate_context_similarity(&self, func1: &Function, func2: &Function) -> f64 {
111+
// Compare function calls, dependencies, etc.
112+
let calls1 = func1.extract_function_calls();
113+
let calls2 = func2.extract_function_calls();
114+
115+
if calls1.is_empty() && calls2.is_empty() {
116+
return 1.0;
117+
}
118+
119+
let common_calls = calls1.iter()
120+
.filter(|call| calls2.contains(call))
121+
.count();
122+
123+
let total_calls = calls1.len().max(calls2.len());
124+
if total_calls > 0 {
125+
common_calls as f64 / total_calls as f64
126+
} else {
127+
1.0
128+
}
129+
}
130+
131+
fn hungarian_matching(&self, similarity_matrix: &[Vec<f64>]) -> Vec<(usize, usize)> {
132+
// Placeholder implementation - in reality would use Hungarian algorithm
133+
let mut matches = Vec::new();
134+
135+
for (i, row) in similarity_matrix.iter().enumerate() {
136+
if let Some((j, &similarity)) = row.iter().enumerate().max_by(|a, b| a.1.partial_cmp(b.1).unwrap()) {
137+
if similarity >= self.threshold {
138+
matches.push((i, j));
139+
}
140+
}
141+
}
142+
143+
matches
144+
}
145+
146+
fn process_matches(&self, source: &[Function], target: &[Function],
147+
matches: &[(usize, usize)], result: &mut MatchResult) {
148+
let mut matched_source = std::collections::HashSet::new();
149+
let mut matched_target = std::collections::HashSet::new();
150+
151+
for &(source_idx, target_idx) in matches {
152+
let source_func = &source[source_idx];
153+
let target_func = &target[target_idx];
154+
155+
result.mapping.insert(source_func.hash.clone(), target_func.hash.clone());
156+
matched_source.insert(source_idx);
157+
matched_target.insert(target_idx);
158+
159+
// Create change record if functions are different
160+
let similarity = self.calculate_function_similarity(source_func, target_func);
161+
if similarity.overall_similarity < 1.0 {
162+
let change = smart_diff_parser::Change::new(
163+
smart_diff_parser::ChangeType::Modify,
164+
format!("Function '{}' modified (similarity: {:.2})",
165+
source_func.signature.name, similarity.overall_similarity)
166+
).with_confidence(similarity.overall_similarity);
167+
168+
result.changes.push(change);
169+
}
170+
}
171+
172+
// Record unmatched functions
173+
for (i, func) in source.iter().enumerate() {
174+
if !matched_source.contains(&i) {
175+
result.unmatched_source.push(func.hash.clone());
176+
177+
let change = smart_diff_parser::Change::new(
178+
smart_diff_parser::ChangeType::Delete,
179+
format!("Function '{}' deleted", func.signature.name)
180+
);
181+
result.changes.push(change);
182+
}
183+
}
184+
185+
for (i, func) in target.iter().enumerate() {
186+
if !matched_target.contains(&i) {
187+
result.unmatched_target.push(func.hash.clone());
188+
189+
let change = smart_diff_parser::Change::new(
190+
smart_diff_parser::ChangeType::Add,
191+
format!("Function '{}' added", func.signature.name)
192+
);
193+
result.changes.push(change);
194+
}
195+
}
196+
}
197+
}

0 commit comments

Comments
 (0)