|
| 1 | +# Smart Code Diff - Semantic Analysis Engine |
| 2 | + |
| 3 | +Advanced semantic analysis engine that provides comprehensive symbol resolution, cross-file reference tracking, and scope management for multi-language codebases. |
| 4 | + |
| 5 | +## Features |
| 6 | + |
| 7 | +### 🔍 **Symbol Resolution** |
| 8 | +- **Cross-file symbol lookup** with import resolution |
| 9 | +- **Qualified name resolution** (e.g., `MyClass.myMethod`) |
| 10 | +- **Scope-aware symbol visibility** with shadowing detection |
| 11 | +- **Multi-language support** (Java, Python, JavaScript, C/C++) |
| 12 | + |
| 13 | +### 📊 **Symbol Table Management** |
| 14 | +- **Hierarchical scope tracking** with parent-child relationships |
| 15 | +- **Symbol reference tracking** (declarations, definitions, usages, calls) |
| 16 | +- **Symbol statistics and metrics** for codebase analysis |
| 17 | +- **Efficient symbol lookup** with caching and optimization |
| 18 | + |
| 19 | +### 🌐 **Import Resolution** |
| 20 | +- **Language-specific import parsing**: |
| 21 | + - Java: `import`, `import static`, wildcard imports |
| 22 | + - Python: `import`, `from...import`, aliases |
| 23 | + - JavaScript: ES6 imports, CommonJS require |
| 24 | + - C/C++: `#include` system and local headers |
| 25 | +- **Cross-file dependency tracking** |
| 26 | +- **Import graph construction** for dependency analysis |
| 27 | + |
| 28 | +### 🎯 **Scope Management** |
| 29 | +- **Hierarchical scope resolution** (global → file → class → function → block) |
| 30 | +- **Symbol shadowing detection** and resolution |
| 31 | +- **Scope analysis metrics** (depth, symbol distribution, etc.) |
| 32 | +- **Context-aware symbol lookup** |
| 33 | + |
| 34 | +## Quick Start |
| 35 | + |
| 36 | +### Basic Symbol Resolution |
| 37 | + |
| 38 | +```rust |
| 39 | +use smart_diff_semantic::{SymbolResolver, SymbolResolverConfig}; |
| 40 | +use smart_diff_parser::{TreeSitterParser, Language}; |
| 41 | + |
| 42 | +// Create resolver with default configuration |
| 43 | +let mut resolver = SymbolResolver::with_defaults(); |
| 44 | +let parser = TreeSitterParser::new()?; |
| 45 | + |
| 46 | +// Parse and process a file |
| 47 | +let code = r#" |
| 48 | +public class Calculator { |
| 49 | + public int add(int a, int b) { |
| 50 | + return a + b; |
| 51 | + } |
| 52 | +} |
| 53 | +"#; |
| 54 | + |
| 55 | +let parse_result = parser.parse(code, Language::Java)?; |
| 56 | +resolver.process_file("Calculator.java", &parse_result)?; |
| 57 | + |
| 58 | +// Find symbols |
| 59 | +let symbol = resolver.find_symbol("Calculator", Some("Calculator.java")); |
| 60 | +println!("Found: {:?}", symbol); |
| 61 | +``` |
| 62 | + |
| 63 | +### Cross-File Resolution |
| 64 | + |
| 65 | +```rust |
| 66 | +// Process multiple files |
| 67 | +let files = vec![ |
| 68 | + ("Interface.java".to_string(), interface_parse_result), |
| 69 | + ("Implementation.java".to_string(), impl_parse_result), |
| 70 | +]; |
| 71 | + |
| 72 | +resolver.process_files(files)?; |
| 73 | + |
| 74 | +// Access import graph |
| 75 | +let import_graph = resolver.get_import_graph(); |
| 76 | +for (file, imports) in import_graph { |
| 77 | + println!("{} imports: {:?}", file, imports); |
| 78 | +} |
| 79 | +``` |
| 80 | + |
| 81 | +### Advanced Scope Management |
| 82 | + |
| 83 | +```rust |
| 84 | +use smart_diff_semantic::{ScopeManager, ScopeType}; |
| 85 | + |
| 86 | +let mut scope_manager = ScopeManager::new(Language::Java); |
| 87 | + |
| 88 | +// Create nested scopes |
| 89 | +let global_scope = scope_manager.create_scope( |
| 90 | + ScopeType::Global, |
| 91 | + "file.java".to_string(), |
| 92 | + 1, 100 |
| 93 | +); |
| 94 | +scope_manager.enter_scope(global_scope); |
| 95 | + |
| 96 | +let class_scope = scope_manager.create_scope( |
| 97 | + ScopeType::Class, |
| 98 | + "file.java".to_string(), |
| 99 | + 5, 50 |
| 100 | +); |
| 101 | +scope_manager.enter_scope(class_scope); |
| 102 | + |
| 103 | +// Resolve symbols with scope awareness |
| 104 | +let resolution = scope_manager.resolve_symbol("myVariable"); |
| 105 | +if let Some(res) = resolution { |
| 106 | + println!("Found {} in scope {} (shadowed: {})", |
| 107 | + res.symbol.name, res.scope_id, res.is_shadowed); |
| 108 | +} |
| 109 | +``` |
| 110 | + |
| 111 | +### Symbol Table Statistics |
| 112 | + |
| 113 | +```rust |
| 114 | +let symbol_table = resolver.get_symbol_table(); |
| 115 | +let stats = symbol_table.get_statistics(); |
| 116 | + |
| 117 | +println!("Total symbols: {}", stats.total_symbols); |
| 118 | +println!("Functions: {}", stats.function_count); |
| 119 | +println!("Classes: {}", stats.class_count); |
| 120 | +println!("Average references per symbol: {:.2}", stats.avg_references_per_symbol); |
| 121 | +``` |
| 122 | + |
| 123 | +## Configuration |
| 124 | + |
| 125 | +### SymbolResolverConfig |
| 126 | + |
| 127 | +```rust |
| 128 | +use smart_diff_semantic::SymbolResolverConfig; |
| 129 | +use std::collections::HashSet; |
| 130 | + |
| 131 | +let config = SymbolResolverConfig { |
| 132 | + resolve_cross_file: true, // Enable cross-file resolution |
| 133 | + track_usages: true, // Track all symbol usages |
| 134 | + resolve_imports: true, // Resolve import statements |
| 135 | + max_resolution_depth: 10, // Maximum recursion depth |
| 136 | + file_extensions: { // Supported file extensions |
| 137 | + let mut ext = HashSet::new(); |
| 138 | + ext.insert("java".to_string()); |
| 139 | + ext.insert("py".to_string()); |
| 140 | + ext.insert("js".to_string()); |
| 141 | + ext.insert("cpp".to_string()); |
| 142 | + ext.insert("c".to_string()); |
| 143 | + ext |
| 144 | + }, |
| 145 | +}; |
| 146 | + |
| 147 | +let resolver = SymbolResolver::new(config); |
| 148 | +``` |
| 149 | + |
| 150 | +## Symbol Types |
| 151 | + |
| 152 | +The engine recognizes various symbol types: |
| 153 | + |
| 154 | +- **`Function`** - Standalone functions |
| 155 | +- **`Method`** - Class/object methods |
| 156 | +- **`Class`** - Class definitions |
| 157 | +- **`Interface`** - Interface definitions |
| 158 | +- **`Variable`** - Local and global variables |
| 159 | +- **`Constant`** - Constants and final variables |
| 160 | +- **`Parameter`** - Function/method parameters |
| 161 | +- **`Field`** - Class/struct fields |
| 162 | +- **`Module`** - Modules and namespaces |
| 163 | +- **`Namespace`** - Namespace declarations |
| 164 | + |
| 165 | +## Reference Types |
| 166 | + |
| 167 | +Symbol references are categorized by usage: |
| 168 | + |
| 169 | +- **`Declaration`** - Symbol declaration |
| 170 | +- **`Definition`** - Symbol definition (implementation) |
| 171 | +- **`Usage`** - General symbol usage |
| 172 | +- **`Call`** - Function/method calls |
| 173 | +- **`Assignment`** - Variable assignments |
| 174 | + |
| 175 | +## Scope Types |
| 176 | + |
| 177 | +Hierarchical scope management supports: |
| 178 | + |
| 179 | +- **`Global`** - Global/file-level scope |
| 180 | +- **`File`** - File-specific scope |
| 181 | +- **`Class`** - Class/interface scope |
| 182 | +- **`Function`** - Function/method scope |
| 183 | +- **`Block`** - Block-level scope (loops, conditionals) |
| 184 | +- **`Module`** - Module/namespace scope |
| 185 | + |
| 186 | +## Import Resolution |
| 187 | + |
| 188 | +### Java |
| 189 | +```java |
| 190 | +import java.util.List; // Regular import |
| 191 | +import java.util.*; // Wildcard import |
| 192 | +import static java.lang.Math.PI; // Static import |
| 193 | +``` |
| 194 | + |
| 195 | +### Python |
| 196 | +```python |
| 197 | +import os # Module import |
| 198 | +import numpy as np # Import with alias |
| 199 | +from collections import defaultdict # From import |
| 200 | +from datetime import datetime as dt # From import with alias |
| 201 | +``` |
| 202 | + |
| 203 | +### JavaScript |
| 204 | +```javascript |
| 205 | +import React from 'react'; // ES6 default import |
| 206 | +import { useState } from 'react'; // ES6 named import |
| 207 | +import * as React from 'react'; // ES6 wildcard import |
| 208 | +const fs = require('fs'); // CommonJS require |
| 209 | +``` |
| 210 | + |
| 211 | +### C/C++ |
| 212 | +```c |
| 213 | +#include <stdio.h> // System header |
| 214 | +#include "myheader.h" // Local header |
| 215 | +``` |
| 216 | + |
| 217 | +## Examples |
| 218 | + |
| 219 | +Run the comprehensive demo: |
| 220 | + |
| 221 | +```bash |
| 222 | +cargo run --example symbol_resolution_demo |
| 223 | +``` |
| 224 | + |
| 225 | +This demonstrates: |
| 226 | +- Basic symbol resolution |
| 227 | +- Cross-file reference tracking |
| 228 | +- Import statement parsing |
| 229 | +- Scope management |
| 230 | +- Symbol statistics |
| 231 | + |
| 232 | +## Testing |
| 233 | + |
| 234 | +Run the test suite: |
| 235 | + |
| 236 | +```bash |
| 237 | +cargo test -p smart-diff-semantic |
| 238 | +``` |
| 239 | + |
| 240 | +Tests cover: |
| 241 | +- Symbol resolution algorithms |
| 242 | +- Import parsing for all languages |
| 243 | +- Scope management and shadowing |
| 244 | +- Cross-file reference resolution |
| 245 | +- Symbol table operations |
| 246 | + |
| 247 | +## Performance |
| 248 | + |
| 249 | +The semantic analysis engine is optimized for: |
| 250 | +- **Memory efficiency** with symbol deduplication |
| 251 | +- **Fast lookups** using hash maps and caching |
| 252 | +- **Scalable processing** for large codebases |
| 253 | +- **Incremental updates** for real-time analysis |
| 254 | + |
| 255 | +## Architecture |
| 256 | + |
| 257 | +``` |
| 258 | +SymbolResolver |
| 259 | +├── SymbolTable (hierarchical symbol storage) |
| 260 | +├── ScopeManager (scope hierarchy management) |
| 261 | +├── ImportResolver (cross-file dependency tracking) |
| 262 | +└── ReferenceTracker (usage and call tracking) |
| 263 | +``` |
| 264 | + |
| 265 | +The engine integrates seamlessly with the parser crate to provide comprehensive semantic analysis capabilities for the Smart Code Diffing Tool. |
0 commit comments