Skip to content

Commit f4c375a

Browse files
committed
Check in initial diff engine and semantic analysis
1 parent a6ec271 commit f4c375a

File tree

6 files changed

+208
-50
lines changed

6 files changed

+208
-50
lines changed

crates/parser/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ tree-sitter-java.workspace = true
2121
tree-sitter-python.workspace = true
2222
tree-sitter-javascript.workspace = true
2323
tree-sitter-cpp.workspace = true
24-
tree-sitter-c-sharp.workspace = true
24+
tree-sitter-c.workspace = true
2525

2626
# Additional dependencies
2727
once_cell = "1.19"

crates/parser/src/ast.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,33 @@ pub enum NodeType {
2020
Module,
2121
Class,
2222
Interface,
23-
23+
2424
// Functions and methods
2525
Function,
2626
Method,
2727
Constructor,
28-
28+
2929
// Statements
3030
Block,
3131
IfStatement,
3232
WhileLoop,
3333
ForLoop,
34-
34+
ReturnStatement,
35+
ExpressionStatement,
36+
3537
// Expressions
3638
BinaryExpression,
3739
UnaryExpression,
3840
CallExpression,
41+
AssignmentExpression,
3942
Identifier,
4043
Literal,
41-
44+
45+
// Declarations
46+
VariableDeclaration,
47+
ParameterDeclaration,
48+
FieldDeclaration,
49+
4250
// Other
4351
Comment,
4452
Unknown,

crates/parser/src/language.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ pub enum Language {
1212
JavaScript,
1313
TypeScript,
1414
Cpp,
15-
CSharp,
15+
C,
1616
Rust,
1717
Go,
1818
Unknown,
@@ -25,8 +25,8 @@ impl Language {
2525
"py" | "pyw" => Language::Python,
2626
"js" | "jsx" => Language::JavaScript,
2727
"ts" | "tsx" => Language::TypeScript,
28-
"cpp" | "cc" | "cxx" | "c++" | "hpp" | "h" => Language::Cpp,
29-
"cs" => Language::CSharp,
28+
"cpp" | "cc" | "cxx" | "c++" | "hpp" => Language::Cpp,
29+
"c" | "h" => Language::C,
3030
"rs" => Language::Rust,
3131
"go" => Language::Go,
3232
_ => Language::Unknown,
@@ -40,7 +40,7 @@ impl Language {
4040
Language::JavaScript => Some("javascript"),
4141
Language::TypeScript => Some("typescript"),
4242
Language::Cpp => Some("cpp"),
43-
Language::CSharp => Some("c_sharp"),
43+
Language::C => Some("c"),
4444
Language::Rust => Some("rust"),
4545
Language::Go => Some("go"),
4646
Language::Unknown => None,

crates/parser/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
pub mod ast;
77
pub mod function;
88
pub mod language;
9+
pub mod language_config;
910
pub mod matching;
1011
pub mod parser;
1112
pub mod tree_sitter;

crates/parser/src/parser.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ impl Parser for DefaultParser {
5656
Language::Python,
5757
Language::JavaScript,
5858
Language::Cpp,
59-
Language::CSharp,
59+
Language::C,
6060
]
6161
}
6262
}

crates/parser/src/tree_sitter.rs

Lines changed: 189 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,87 +2,236 @@
22
33
use crate::ast::{ASTNode, NodeType, NodeMetadata};
44
use crate::language::Language;
5+
use crate::language_config::{LanguageConfig, LANGUAGE_CONFIGS};
56
use crate::parser::{ParseError, ParseResult, Parser};
67
use std::collections::HashMap;
8+
use once_cell::sync::Lazy;
79

810
/// Tree-sitter based parser implementation
911
pub struct TreeSitterParser {
1012
parsers: HashMap<Language, tree_sitter::Parser>,
1113
}
1214

15+
/// Global language configurations
16+
static LANGUAGE_CONFIGS: Lazy<HashMap<Language, fn() -> tree_sitter::Language>> = Lazy::new(|| {
17+
let mut configs = HashMap::new();
18+
configs.insert(Language::Java, || tree_sitter_java::language());
19+
configs.insert(Language::Python, || tree_sitter_python::language());
20+
configs.insert(Language::JavaScript, || tree_sitter_javascript::language());
21+
configs.insert(Language::Cpp, || tree_sitter_cpp::language());
22+
configs.insert(Language::C, || tree_sitter_c::language());
23+
configs
24+
});
25+
1326
impl TreeSitterParser {
1427
pub fn new() -> Result<Self, ParseError> {
1528
let mut parsers = HashMap::new();
16-
29+
1730
// Initialize parsers for supported languages
18-
// Note: This is a placeholder - actual tree-sitter integration would be more complex
19-
31+
for (&language, language_fn) in LANGUAGE_CONFIGS.iter() {
32+
let mut parser = tree_sitter::Parser::new();
33+
parser.set_language(language_fn())
34+
.map_err(|e| ParseError::TreeSitterError(format!("Failed to set language {:?}: {}", language, e)))?;
35+
parsers.insert(language, parser);
36+
}
37+
2038
Ok(Self { parsers })
2139
}
40+
41+
/// Get available languages
42+
pub fn supported_languages() -> Vec<Language> {
43+
LANGUAGE_CONFIGS.keys().cloned().collect()
44+
}
2245

2346
fn convert_tree_sitter_node(&self, node: &tree_sitter::Node, source: &str) -> ASTNode {
24-
let node_type = self.map_node_type(node.kind());
47+
let node_kind = node.kind();
48+
let node_type = self.map_node_type(node_kind);
2549
let text = node.utf8_text(source.as_bytes()).unwrap_or("");
26-
50+
51+
let mut attributes = HashMap::new();
52+
53+
// Extract name/identifier information based on node type
54+
self.extract_node_attributes(node, source, &mut attributes);
55+
56+
// Add basic node information
57+
attributes.insert("kind".to_string(), node_kind.to_string());
58+
if !text.trim().is_empty() && text.len() < 100 { // Avoid storing very long text
59+
attributes.insert("text".to_string(), text.trim().to_string());
60+
}
61+
2762
let metadata = NodeMetadata {
28-
line: node.start_position().row,
29-
column: node.start_position().column,
63+
line: node.start_position().row + 1, // Convert to 1-based line numbers
64+
column: node.start_position().column + 1, // Convert to 1-based column numbers
3065
original_text: text.to_string(),
31-
attributes: HashMap::new(),
66+
attributes,
3267
};
33-
68+
3469
let mut ast_node = ASTNode::new(node_type, metadata);
35-
36-
// Convert children
70+
71+
// Convert children, filtering out some noise nodes
3772
for i in 0..node.child_count() {
3873
if let Some(child) = node.child(i) {
39-
ast_node.add_child(self.convert_tree_sitter_node(&child, source));
74+
// Skip certain noise nodes like punctuation
75+
if !self.should_skip_node(child.kind()) {
76+
ast_node.add_child(self.convert_tree_sitter_node(&child, source));
77+
}
4078
}
4179
}
42-
80+
4381
ast_node
4482
}
83+
84+
/// Check if a node should be skipped during AST conversion
85+
fn should_skip_node(&self, kind: &str) -> bool {
86+
matches!(kind,
87+
"(" | ")" | "{" | "}" | "[" | "]" | ";" | "," | "." |
88+
"whitespace" | "comment" // We handle comments separately
89+
)
90+
}
91+
92+
/// Collect parse errors from the tree
93+
fn collect_parse_errors(&self, node: &tree_sitter::Node, source: &str, errors: &mut Vec<String>) {
94+
if node.is_error() {
95+
let text = node.utf8_text(source.as_bytes()).unwrap_or("<error>");
96+
errors.push(format!(
97+
"Parse error at line {}, column {}: {}",
98+
node.start_position().row + 1,
99+
node.start_position().column + 1,
100+
text
101+
));
102+
}
103+
104+
if node.is_missing() {
105+
errors.push(format!(
106+
"Missing node at line {}, column {}",
107+
node.start_position().row + 1,
108+
node.start_position().column + 1
109+
));
110+
}
111+
112+
// Recursively check children
113+
for i in 0..node.child_count() {
114+
if let Some(child) = node.child(i) {
115+
self.collect_parse_errors(&child, source, errors);
116+
}
117+
}
118+
}
45119

46120
fn map_node_type(&self, kind: &str) -> NodeType {
47-
match kind {
48-
"program" | "source_file" => NodeType::Program,
49-
"class_declaration" | "class_definition" => NodeType::Class,
50-
"function_declaration" | "function_definition" | "method_declaration" => NodeType::Function,
51-
"if_statement" => NodeType::IfStatement,
52-
"while_statement" => NodeType::WhileLoop,
53-
"for_statement" => NodeType::ForLoop,
54-
"block" | "compound_statement" => NodeType::Block,
55-
"binary_expression" => NodeType::BinaryExpression,
56-
"unary_expression" => NodeType::UnaryExpression,
57-
"call_expression" => NodeType::CallExpression,
58-
"identifier" => NodeType::Identifier,
59-
"string_literal" | "number_literal" | "boolean_literal" => NodeType::Literal,
60-
"comment" => NodeType::Comment,
61-
_ => NodeType::Unknown,
121+
use crate::language_config::NODE_TYPE_MAPPINGS;
122+
NODE_TYPE_MAPPINGS.get(kind)
123+
.copied()
124+
.unwrap_or(NodeType::Unknown)
125+
}
126+
127+
/// Extract attributes from a tree-sitter node
128+
fn extract_node_attributes(&self, node: &tree_sitter::Node, source: &str, attributes: &mut HashMap<String, String>) {
129+
let node_kind = node.kind();
130+
131+
// Try to extract name/identifier from common field names
132+
for field_name in &["name", "identifier", "declarator", "property"] {
133+
if let Some(name_node) = node.child_by_field_name(field_name) {
134+
if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
135+
attributes.insert("name".to_string(), name.to_string());
136+
break;
137+
}
138+
}
139+
}
140+
141+
// Special handling for different node types
142+
match node_kind {
143+
"call_expression" => {
144+
// Extract function name from call expression
145+
if let Some(function_node) = node.child_by_field_name("function") {
146+
if let Ok(name) = function_node.utf8_text(source.as_bytes()) {
147+
attributes.insert("function_name".to_string(), name.to_string());
148+
}
149+
}
150+
151+
// Extract arguments count
152+
let args_count = node.children(&mut node.walk())
153+
.filter(|child| child.kind() == "arguments")
154+
.map(|args_node| args_node.child_count())
155+
.next()
156+
.unwrap_or(0);
157+
attributes.insert("args_count".to_string(), args_count.to_string());
158+
}
159+
160+
"method_declaration" | "function_declaration" | "function_definition" => {
161+
// Extract parameter count
162+
if let Some(params_node) = node.child_by_field_name("parameters") {
163+
let param_count = params_node.child_count();
164+
attributes.insert("param_count".to_string(), param_count.to_string());
165+
}
166+
167+
// Extract return type if available
168+
if let Some(type_node) = node.child_by_field_name("type") {
169+
if let Ok(return_type) = type_node.utf8_text(source.as_bytes()) {
170+
attributes.insert("return_type".to_string(), return_type.to_string());
171+
}
172+
}
173+
}
174+
175+
"variable_declaration" | "field_declaration" => {
176+
// Extract variable type
177+
if let Some(type_node) = node.child_by_field_name("type") {
178+
if let Ok(var_type) = type_node.utf8_text(source.as_bytes()) {
179+
attributes.insert("type".to_string(), var_type.to_string());
180+
}
181+
}
182+
}
183+
184+
"class_declaration" | "class_definition" => {
185+
// Extract superclass if available
186+
if let Some(superclass_node) = node.child_by_field_name("superclass") {
187+
if let Ok(superclass) = superclass_node.utf8_text(source.as_bytes()) {
188+
attributes.insert("superclass".to_string(), superclass.to_string());
189+
}
190+
}
191+
}
192+
193+
_ => {}
62194
}
63195
}
64196
}
65197

66198
impl Parser for TreeSitterParser {
67199
fn parse(&self, content: &str, language: Language) -> Result<ParseResult, ParseError> {
68-
// Placeholder implementation
69-
// In a real implementation, this would use the appropriate tree-sitter parser
70-
Err(ParseError::UnsupportedLanguage(language))
200+
let parser = self.parsers.get(&language)
201+
.ok_or_else(|| ParseError::UnsupportedLanguage(language.clone()))?;
202+
203+
// Parse the content
204+
let tree = parser.parse(content, None)
205+
.ok_or_else(|| ParseError::ParseFailed("Failed to parse content".to_string()))?;
206+
207+
let root_node = tree.root_node();
208+
209+
// Convert tree-sitter tree to our AST
210+
let ast = self.convert_tree_sitter_node(&root_node, content);
211+
212+
// Collect any parse errors
213+
let mut errors = Vec::new();
214+
let mut warnings = Vec::new();
215+
216+
if root_node.has_error() {
217+
self.collect_parse_errors(&root_node, content, &mut errors);
218+
}
219+
220+
Ok(ParseResult {
221+
ast,
222+
language,
223+
errors,
224+
warnings,
225+
})
71226
}
72-
227+
73228
fn parse_file<P: AsRef<std::path::Path>>(&self, path: P) -> Result<ParseResult, ParseError> {
74229
let content = std::fs::read_to_string(&path)?;
75230
let language = crate::language::LanguageDetector::detect(&path, &content);
76231
self.parse(&content, language)
77232
}
78-
233+
79234
fn supported_languages(&self) -> Vec<Language> {
80-
vec![
81-
Language::Java,
82-
Language::Python,
83-
Language::JavaScript,
84-
Language::Cpp,
85-
Language::CSharp,
86-
]
235+
Self::supported_languages()
87236
}
88237
}

0 commit comments

Comments
 (0)