diff --git a/desloppify/languages/csharp/_parse_helpers.py b/desloppify/languages/csharp/_parse_helpers.py index 1703f3bd9..6df98573e 100644 --- a/desloppify/languages/csharp/_parse_helpers.py +++ b/desloppify/languages/csharp/_parse_helpers.py @@ -28,20 +28,39 @@ def find_matching_brace(content: str, open_pos: int) -> int | None: depth = 0 in_string: str | None = None escape = False - for i in range(open_pos, len(content)): + i = open_pos + length = len(content) + while i < length: ch = content[i] if in_string: if escape: escape = False + i += 1 continue if ch == "\\": escape = True + i += 1 continue if ch == in_string: in_string = None + i += 1 continue if ch in ("'", '"'): in_string = ch + i += 1 + continue + if ch == "/" and i + 1 < length and content[i + 1] == "*": + i += 2 + while i + 1 < length: + if content[i] == "*" and content[i + 1] == "/": + i += 2 + break + i += 1 + continue + if ch == "/" and i + 1 < length and content[i + 1] == "/": + i += 2 + while i < length and content[i] != "\n": + i += 1 continue if ch == "{": depth += 1 @@ -49,6 +68,7 @@ def find_matching_brace(content: str, open_pos: int) -> int | None: depth -= 1 if depth == 0: return i + i += 1 return None diff --git a/desloppify/languages/csharp/tests/test_csharp_parse_helpers.py b/desloppify/languages/csharp/tests/test_csharp_parse_helpers.py index 1585fbb82..00cac8b47 100644 --- a/desloppify/languages/csharp/tests/test_csharp_parse_helpers.py +++ b/desloppify/languages/csharp/tests/test_csharp_parse_helpers.py @@ -50,6 +50,14 @@ def test_empty_body(self): content = "{}" assert find_matching_brace(content, 0) == 1 + def test_skips_block_comments_with_unbalanced_braces(self): + content = "{ /* } }} {{{ */ return 1; }" + assert find_matching_brace(content, 0) == len(content) - 1 + + def test_skips_line_comments_with_unbalanced_braces(self): + content = "{ // } }}\n return 1; }" + assert find_matching_brace(content, 0) == len(content) - 1 + class TestFindExpressionEnd: def test_simple(self): diff --git a/desloppify/languages/cxx/_parse_helpers.py b/desloppify/languages/cxx/_parse_helpers.py index 8cbc9b30d..477d8fe69 100644 --- a/desloppify/languages/cxx/_parse_helpers.py +++ b/desloppify/languages/cxx/_parse_helpers.py @@ -8,20 +8,39 @@ def find_matching_brace(content: str, open_pos: int) -> int | None: depth = 0 in_string: str | None = None escape = False - for i in range(open_pos, len(content)): + i = open_pos + length = len(content) + while i < length: ch = content[i] if in_string: if escape: escape = False + i += 1 continue if ch == "\\": escape = True + i += 1 continue if ch == in_string: in_string = None + i += 1 continue if ch in ("'", '"'): in_string = ch + i += 1 + continue + if ch == "/" and i + 1 < length and content[i + 1] == "*": + i += 2 + while i + 1 < length: + if content[i] == "*" and content[i + 1] == "/": + i += 2 + break + i += 1 + continue + if ch == "/" and i + 1 < length and content[i + 1] == "/": + i += 2 + while i < length and content[i] != "\n": + i += 1 continue if ch == "{": depth += 1 @@ -29,6 +48,7 @@ def find_matching_brace(content: str, open_pos: int) -> int | None: depth -= 1 if depth == 0: return i + i += 1 return None diff --git a/desloppify/languages/cxx/tests/test_extractors.py b/desloppify/languages/cxx/tests/test_extractors.py index cb00f64a7..a785453ba 100644 --- a/desloppify/languages/cxx/tests/test_extractors.py +++ b/desloppify/languages/cxx/tests/test_extractors.py @@ -43,4 +43,30 @@ def test_find_cxx_files_includes_common_header_only_extensions(tmp_path): def test_cxx_extractors_use_local_brace_helper(): - assert cxx_extractors.find_matching_brace.__module__ == "desloppify.languages.cxx._parse_helpers" \ No newline at end of file + assert cxx_extractors.find_matching_brace.__module__ == "desloppify.languages.cxx._parse_helpers" + + +def test_extract_function_with_unbalanced_brace_in_comment(tmp_path): + source = tmp_path / "test.cpp" + source.write_text( + """void validateInput() { + /* Validate against old schema: + { "type": "required" } + See ticket #1234 for context } + */ + if (input.isValid()) { + return; + } + throw std::runtime_error("Invalid"); +} +""", + encoding="utf-8", + ) + + functions = extract_all_cxx_functions([str(source)]) + + assert len(functions) == 1 + func = functions[0] + assert func.name == "validateInput" + assert func.end_line == 10 + assert "input.isValid()" in func.body diff --git a/desloppify/languages/dart/tests/test_extractors.py b/desloppify/languages/dart/tests/test_extractors.py new file mode 100644 index 000000000..a328fd2a3 --- /dev/null +++ b/desloppify/languages/dart/tests/test_extractors.py @@ -0,0 +1,32 @@ +"""Tests for Dart regex-based function extraction.""" + +from __future__ import annotations + +from desloppify.languages.dart.extractors import extract_dart_functions + + +def test_extract_dart_functions_ignores_unbalanced_braces_inside_comments(tmp_path): + source = tmp_path / "lib" / "app.dart" + source.parent.mkdir(parents=True, exist_ok=True) + source.write_text( + """void validateInput() { + /* Validate against old schema: + { "type": "required" } + See ticket #1234 for context } + */ + if (input.isValid()) { + return; + } + throw StateError("Invalid"); +} +""", + encoding="utf-8", + ) + + functions = extract_dart_functions(str(source)) + + assert len(functions) == 1 + func = functions[0] + assert func.name == "validateInput" + assert func.end_line == 10 + assert "input.isValid()" in func.body diff --git a/desloppify/languages/rust/extractors.py b/desloppify/languages/rust/extractors.py index dc51a5e1f..8efa15ca4 100644 --- a/desloppify/languages/rust/extractors.py +++ b/desloppify/languages/rust/extractors.py @@ -81,7 +81,8 @@ def _find_matching_brace(content: str, open_pos: int) -> int | None: depth = 0 in_string: str | None = None i = open_pos - while i < len(content): + length = len(content) + while i < length: char = content[i] if in_string: if char == "\\": @@ -95,6 +96,19 @@ def _find_matching_brace(content: str, open_pos: int) -> int | None: in_string = char i += 1 continue + if char == "/" and i + 1 < length and content[i + 1] == "*": + i += 2 + while i + 1 < length: + if content[i] == "*" and content[i + 1] == "/": + i += 2 + break + i += 1 + continue + if char == "/" and i + 1 < length and content[i + 1] == "/": + i += 2 + while i < length and content[i] != "\n": + i += 1 + continue if char == "{": depth += 1 elif char == "}": diff --git a/desloppify/languages/rust/tests/test_extractors.py b/desloppify/languages/rust/tests/test_extractors.py index de23a3449..5a11c7e92 100644 --- a/desloppify/languages/rust/tests/test_extractors.py +++ b/desloppify/languages/rust/tests/test_extractors.py @@ -75,3 +75,30 @@ def test_normalize_rust_body_strips_comments_and_logging(): assert "// comment" not in normalized assert "println!" not in normalized assert "let value = 1;" in normalized + + +def test_extract_rust_functions_ignores_unbalanced_braces_inside_comments(tmp_path): + filepath = _write( + tmp_path, + "src/lib.rs", + """ +pub fn validate_input() { + /* Validate against old schema: + { "type": "required" } + See ticket #1234 for context } + */ + if input_is_valid() { + return; + } + panic!("Invalid"); +} +""", + ) + + functions = extract_rust_functions(filepath) + + assert len(functions) == 1 + func = functions[0] + assert func.name == "validate_input" + assert func.end_line == 11 + assert "input_is_valid()" in func.body