Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions codebase_rag/parsers/call_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,9 +501,19 @@ def _process_calls_in_functions(
call_name_cache=call_name_cache,
)
continue
if func_qn := self._build_nested_qualified_name(
func_node, module_qn, func_name, lang_config
):
# (H) A C++ free function inside a namespace is bound by the definition
# (H) pass via build_qualified_name (qn `module.ns.fn`); _build_nested...
# (H) ignores namespace_definition ancestors and would drop the namespace
# (H) (`module.fn`), dangling the CALLS source. Use the same builder so
# (H) caller and node qns agree.
func_qn = (
cpp_utils.build_qualified_name(func_node, module_qn, func_name)
if language == cs.SupportedLanguage.CPP
else self._build_nested_qualified_name(
func_node, module_qn, func_name, lang_config
)
)
if func_qn:
filtered = (
self._filter_calls_in_node(all_call_nodes, call_starts, func_node)
if all_call_nodes is not None and call_starts is not None
Expand Down Expand Up @@ -744,7 +754,15 @@ def _process_calls_in_classes(
class_name = self._get_class_name_for_node(class_node, language)
if not class_name:
continue
class_qn = f"{module_qn}{cs.SEPARATOR_DOT}{class_name}"
# (H) A C++ class inside a namespace is bound by the definition pass via
# (H) build_qualified_name (qn `module.ns.Class`); the bare join would drop
# (H) the namespace, dangling every inline method's CALLS source. Use the
# (H) same builder so the class qn (and thus method caller qns) agree.
class_qn = (
cpp_utils.build_qualified_name(class_node, module_qn, class_name)
if language == cs.SupportedLanguage.CPP
else f"{module_qn}{cs.SEPARATOR_DOT}{class_name}"
)
if body_node := class_node.child_by_field_name(cs.FIELD_BODY):
self._process_methods_in_class(
body_node,
Expand Down
77 changes: 77 additions & 0 deletions codebase_rag/tests/test_cpp_namespace_call_caller_qn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from __future__ import annotations

from pathlib import Path
from unittest.mock import MagicMock

from codebase_rag.tests.conftest import (
get_nodes,
get_qualified_names,
get_relationships,
run_updater,
)

# (H) A free function and an inline class method, both inside a namespace, each
# (H) calling a namespaced free function. The definition pass binds their nodes
# (H) WITH the enclosing namespace (qn `...ns.free_caller`, `...ns.K.method`), but
# (H) the call pass built the caller qn WITHOUT the namespace (`...free_caller`,
# (H) `...K.method`), so every such CALLS edge's source dangled (matched no node)
# (H) and the call was lost. On real namespaced C++ (e.g. all of leveldb, in
# (H) `namespace leveldb`) this silently dropped the bulk of cross-file calls. The
# (H) caller qn must include the enclosing namespace, matching the node.
CPP_SOURCE = """
namespace acme {

int callee(int x) { return x + 1; }

int free_caller(int a) { return callee(a); }

class K {
public:
int method(int b) { return callee(b); }
};

} // namespace acme
"""


def test_namespaced_callers_attribute_calls_to_namespaced_qn(
temp_repo: Path,
mock_ingestor: MagicMock,
) -> None:
project = temp_repo / "cpp_ns_calls"
project.mkdir()
(project / "sample.cpp").write_text(CPP_SOURCE, encoding="utf-8")

run_updater(project, mock_ingestor)

free_qn = next(
(
q
for q in get_qualified_names(get_nodes(mock_ingestor, "Function"))
if q.endswith(".acme.free_caller")
),
None,
)
method_qn = next(
(
q
for q in get_qualified_names(get_nodes(mock_ingestor, "Method"))
if q.endswith(".acme.K.method")
),
None,
)
assert free_qn is not None, "no ns.free_caller Function node"
assert method_qn is not None, "no ns.K.method Method node"

calls = get_relationships(mock_ingestor, "CALLS")
# (H) ensure_relationship_batch(from_spec, rel_type, to_spec): from_spec[2] is
# (H) the caller qn, to_spec[2] the callee qn.
callers_of_callee = {
c.args[0][2] for c in calls if str(c.args[2][2]).endswith(".callee")
}
assert free_qn in callers_of_callee, (
f"expected CALLS from {free_qn} to callee; got {sorted(callers_of_callee)}"
)
assert method_qn in callers_of_callee, (
f"expected CALLS from {method_qn} to callee; got {sorted(callers_of_callee)}"
)
75 changes: 75 additions & 0 deletions codebase_rag/tests/test_cpp_retrieval_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from pathlib import Path

import pytest

from evals import constants as ec
from evals.cpp_retrieval import (
cgr_cpp_call_edges,
oracle_cpp_call_edges,
score_cpp_retrieval,
)
from evals.oracles import cpp_available

needs_clang = pytest.mark.skipif(
not cpp_available(), reason="libclang (clang.cindex) not importable"
)


def _make_project(root: Path) -> None:
root.mkdir(parents=True, exist_ok=True)
# (H) No #includes: the fixture parses cleanly regardless of whether an SDK
# (H) libc++ is discoverable, so coverage is deterministic in any CI. All decls
# (H) live inside a namespace, exercising the namespaced caller-qn path (free
# (H) functions and an inline method) that the libclang oracle grades cgr against.
(root / "lib.cc").write_text(
"namespace demo {\n"
"int add(int a, int b) { return a + b; }\n"
"int mul(int a, int b) { return a * b; }\n"
"int orphan(int a) { return a; }\n"
"}\n",
encoding="utf-8",
)
(root / "main.cc").write_text(
"namespace demo {\n"
"int add(int a, int b);\n"
"int mul(int a, int b);\n"
"int compute(int x) { return add(x, x) + mul(x, x); }\n"
"class Runner {\n"
" public:\n"
" int run(int x) { return compute(x); }\n"
"};\n"
"}\n",
encoding="utf-8",
)


@needs_clang
def test_oracle_captures_first_party_cpp_calls(tmp_path: Path) -> None:
_make_project(tmp_path)
edges, declared, covered = oracle_cpp_call_edges(tmp_path)

# (H) add(), mul() (in compute), compute() (in Runner::run) are first-party.
assert ("main.cc", "add") in edges
assert ("main.cc", "mul") in edges
assert ("main.cc", "compute") in edges
# (H) orphan is defined but never called -> never a call edge.
assert ("lib.cc", "orphan") not in edges
assert {"add", "mul", "compute", "run", "orphan"} <= declared
# (H) Both header-free sources parse cleanly, so both are graded.
assert {"main.cc", "lib.cc"} <= covered


@needs_clang
def test_cgr_matches_oracle_on_clean_cpp_project(tmp_path: Path) -> None:
_make_project(tmp_path)
oracle, declared, covered = oracle_cpp_call_edges(tmp_path)
cgr = cgr_cpp_call_edges(tmp_path, tmp_path.name, declared, covered)
assert cgr == oracle


def test_score_cpp_retrieval_prf() -> None:
result = score_cpp_retrieval(
{("a.cc", "f"), ("a.cc", "g")}, {("a.cc", "f"), ("b.cc", "h")}
)
row = next(r for r in result.rows if r["label"] == ec.CPP_RETRIEVAL_LABEL)
assert (row["tp"], row["fp"], row["fn"]) == (1, 1, 1)
70 changes: 70 additions & 0 deletions evals/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,76 @@ parse and drops the following declaration, present through the latest 0.24.2);
tracked in issue #555 for an upstream report. It is rooted in the grammar, not in
cgr's resolution logic, so it is reported here, not hidden.

## Multi-language retrieval (C++) — C++ CALLS vs `libclang`

The same harness applied to C++: for each first-party C++ function or member
function, which files call it. cgr's C++ `CALLS` edges, reduced to
`(caller_file, callee_simple_name)`, are graded against call sites extracted by
`libclang`, over the same first-party name universe (free functions, function
templates, and member functions; constructors/destructors are excluded because
cgr models object creation as `INSTANTIATES`). cgr parses C++ with tree-sitter by
default (`CPP_FRONTEND=libclang` is off), so `libclang` is an independent oracle.
Overloads collapse under the `(file, simple-name)` metric, so they need no
disambiguation.

```bash
uv run python -m evals.cpp_retrieval --target <cpp-sources> --define LEVELDB_PLATFORM_POSIX=1
```

Requires `libclang`. C++ standard headers must be parsed by a `libclang` whose
clang version matches the active SDK's `libc++`; the bundled pip wheel's older
clang cannot, so the oracle prefers a system `libclang`
(`/Library/Developer/CommandLineTools/usr/lib/libclang.dylib` on macOS) and pins
it before the first parse. No `compile_commands.json` is needed: each source is
parsed directly with the SDK sysroot, the SDK's `libc++` headers (which must
precede clang's builtin resource headers), and every first-party header directory
added as an include path. A build normally supplies platform macros (e.g.
`LEVELDB_PLATFORM_POSIX`); pass them with `--define`. A translation unit that still
emits an error diagnostic **abstains** (left out of the covered set; the cgr side
is held to the same files, the graded count logged). To avoid crediting or
penalizing calls whose simple name merely collides with a first-party symbol, the
oracle grades a call only when `libclang` resolves its callee to a **first-party
declaration** (`child.referenced`), so a `std::string::size()` call is never
counted as a first-party `size` edge. Pinned by
`codebase_rag/tests/test_cpp_retrieval_eval.py`, where cgr's C++ call graph matches
the `libclang` oracle on a header-free namespaced fixture.

Running it on a real project (`leveldb`, 40 of 42 core sources parsed cleanly; the
other two are Windows-only or need gmock) gives precision **0.96**, recall
**0.82**, F1 0.88 — recall up from **0.54** before the fix below.

**The dominant gap was a real cgr bug: the call pass dropped the namespace from
the caller qn.** The definition pass binds a C++ free function or class inside a
`namespace` to a namespaced qualified name (`module.ns.fn`, `module.ns.Class`),
but the call pass built the enclosing caller's qn without the namespace
(`module.fn`, `module.Class.method`). Every such `CALLS` edge's source therefore
pointed at a node that does not exist (904 of 1227 C++ call sources dangled on
`leveldb`, all of it in `namespace leveldb`), so the call never attached. The fix
routes both the free-function and class qns through the same
`cpp_utils.build_qualified_name` the definition pass uses, so caller and node qns
always agree (RED test `test_cpp_namespace_call_caller_qn.py`). Dangling sources
fell to 251 and recall rose 0.54 → 0.82.

The remaining tail is documented, not scoped away:

- **Operator overloads** (`operator=` ×25, `operator[]`, `operator==`/`!=`):
`libclang` records `a = b` and `a[i]` as calls to the overloaded operator
methods, while cgr models them as `builtin.cpp.*` operator calls — a metric
difference, not a misresolution.
- **Trie-fallback misresolution of external calls** (the ~30 false positives:
`size`, `data`, `empty`, `clear`, `begin`, `end`): when a call's simple name
collides with a first-party method, cgr's name-only trie fallback binds the
external `std::` call to the same-named first-party method. The oracle correctly
treats it as external, so it surfaces as a cgr false positive.
- **Receiver-type method dispatch and out-of-line static methods** (`DB::Open`):
resolving `obj->method()` to the right class needs C++ receiver type inference
(C++ is not yet in the typed-language set that builds a local-variable type
map), the same deeper gap as the Go/Java/Rust tails.

The last two share one root cause: cgr has no C++ receiver type inference, so it
resolves member calls by name alone. The eval keeps surfacing it; it is a
follow-on, not hidden.

## Semantic search — query to function relevance

cgr's semantic search embeds each function's source and retrieves by cosine
Expand Down
23 changes: 23 additions & 0 deletions evals/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,29 @@ class Category(StrEnum):
C_RETRIEVAL_TITLE = "cgr multi-language retrieval: C CALLS vs libclang oracle"
C_CALL_EDGE_REPR = "{file} -> {name}"

CPP_SOURCE_GLOBS: tuple[str, ...] = ("*.cc", "*.cpp", "*.cxx")
CPP_HEADER_GLOBS: tuple[str, ...] = ("*.h", "*.hpp", "*.hh", "*.hxx")
CPP_SUFFIXES: tuple[str, ...] = (".cc", ".cpp", ".cxx", ".h", ".hpp", ".hh", ".hxx")
CLANG_CPP_STD = "-std=c++17"
CLANG_CPP_LANG_FLAG = "-x"
CLANG_CPP_LANG = "c++"
CLANG_DEFINE_FLAG = "-D"
# (H) Apple ships a libclang whose version matches the active macOS SDK's libc++,
# (H) which the pip `libclang` wheel does not; C++ standard headers need that match
# (H) to parse. Probed in order; first existing path wins, else the bundled default.
LIBCLANG_CANDIDATES: tuple[str, ...] = (
"/Library/Developer/CommandLineTools/usr/lib/libclang.dylib",
)
# (H) libc++ headers live under <sdk>/usr/include/c++/v1 and MUST precede the clang
# (H) builtin resource headers, else libc++'s <cstddef> finds the C <stddef.h> first.
CLANG_LIBCXX_SUBPATH = "usr/include/c++/v1"
CPP_RETRIEVAL_SCORES_FILENAME = "cpp_retrieval_scores.csv"
CPP_RETRIEVAL_DIFF_FILENAME = "cpp_retrieval_diff.json"
CPP_RETRIEVAL_DIFF_PREFIX = "cpp-retrieval:"
CPP_RETRIEVAL_LABEL = "graph"
CPP_RETRIEVAL_TITLE = "cgr multi-language retrieval: C++ CALLS vs libclang oracle"
CPP_CALL_EDGE_REPR = "{file} -> {name}"

# (H) Semantic-search relevance eval: does cgr's embedding ranking retrieve the
# (H) right function for a natural-language query? Uses cgr's own embedder over
# (H) function source extracted from the captured graph; graded as recall@k on
Expand Down
Loading
Loading