Skip to content

Commit 82ef4ef

Browse files
committed
feat: add estimated_functions to analyze response (function-level indexing)
Indexing is function-level, not file-level. Tier limits are function-based (2K free, 20K pro, 500K enterprise). But the analyze endpoint only returned file counts -- users couldn't compare against their limits. Now each directory entry includes estimated_functions (file_count * 25, same multiplier RepoValidator uses for tier checks). Response also includes total_estimated_functions for the whole repo. Effect-TS example: packages/effect: 958 files, ~23,950 functions packages/schema: 203 files, ~5,075 functions Total: 1,767 files, ~44,175 functions User on Pro tier (20K limit) can immediately see they need to pick a subset. 24 tests pass (1 new for function estimation).
1 parent 44cce98 commit 82ef4ef

2 files changed

Lines changed: 45 additions & 3 deletions

File tree

backend/routes/repos.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,15 +227,32 @@ async def _fetch_directory_tree(
227227
key = top
228228
dir_counts[key] = dir_counts.get(key, 0) + 1
229229

230-
# Build sorted directory list
230+
# Indexing is function-level, not file-level. Estimate function counts
231+
# using the same multiplier the tier system uses for limit checks.
232+
avg_fn = RepoValidator.AVG_FUNCTIONS_PER_FILE # 25
233+
234+
# Build sorted directory list with estimated function counts
231235
directories = sorted(
232-
[{"name": d, "path": d, "file_count": c} for d, c in dir_counts.items() if d != "(root)"],
236+
[
237+
{
238+
"name": d, "path": d,
239+
"file_count": c,
240+
"estimated_functions": c * avg_fn,
241+
}
242+
for d, c in dir_counts.items() if d != "(root)"
243+
],
233244
key=lambda x: -x["file_count"],
234245
)
235246

236247
root_files = dir_counts.get("(root)", 0)
237248
if root_files > 0:
238-
directories.append({"name": "(root files)", "path": ".", "file_count": root_files})
249+
directories.append({
250+
"name": "(root files)", "path": ".",
251+
"file_count": root_files,
252+
"estimated_functions": root_files * avg_fn,
253+
})
254+
255+
total_estimated = total_files * avg_fn
239256

240257
# Suggest directory picker for large repos
241258
suggestion = None
@@ -245,6 +262,7 @@ async def _fetch_directory_tree(
245262
return {
246263
"directories": directories,
247264
"total_files": total_files,
265+
"total_estimated_functions": total_estimated,
248266
"total_directories": len(directories),
249267
"truncated": truncated,
250268
"suggestion": suggestion,

backend/tests/test_analyze_repo.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,30 @@ async def test_flat_repo_groups_by_top_dir(self):
130130
assert "src" in dir_names
131131
assert result["total_files"] == 3 # README.md has no code ext
132132

133+
@pytest.mark.asyncio
134+
async def test_includes_estimated_functions(self):
135+
tree = _make_tree([
136+
"src/main.py",
137+
"src/utils.py",
138+
"lib/helpers.ts",
139+
])
140+
with patch("routes.repos.httpx.AsyncClient") as mock_client:
141+
mock_resp = MagicMock()
142+
mock_resp.status_code = 200
143+
mock_resp.json.return_value = tree
144+
mock_client.return_value.__aenter__ = AsyncMock(return_value=MagicMock(
145+
get=AsyncMock(return_value=mock_resp)
146+
))
147+
148+
result = await _fetch_directory_tree("owner", "repo", "main")
149+
150+
# 3 files * 25 avg functions per file = 75
151+
assert result["total_estimated_functions"] == 75
152+
src_dir = next(d for d in result["directories"] if d["name"] == "src")
153+
assert src_dir["estimated_functions"] == 50 # 2 files * 25
154+
lib_dir = next(d for d in result["directories"] if d["name"] == "lib")
155+
assert lib_dir["estimated_functions"] == 25 # 1 file * 25
156+
133157
@pytest.mark.asyncio
134158
async def test_monorepo_groups_at_package_level(self):
135159
tree = _make_tree([

0 commit comments

Comments
 (0)