feat: add estimated_functions to analyze response (function-level indexing)

DevanshuNEU · DevanshuNEU · commit 82ef4ef43362 · 2026-02-28T17:42:29.000-05:00
Indexing is function-level, not file-level. Tier limits are function-based
(2K free, 20K pro, 500K enterprise). But the analyze endpoint only returned
file counts -- users couldn't compare against their limits.

Now each directory entry includes estimated_functions (file_count * 25,
same multiplier RepoValidator uses for tier checks). Response also includes
total_estimated_functions for the whole repo.

Effect-TS example:
  packages/effect: 958 files, ~23,950 functions
  packages/schema: 203 files, ~5,075 functions
  Total: 1,767 files, ~44,175 functions

User on Pro tier (20K limit) can immediately see they need to pick a subset.

24 tests pass (1 new for function estimation).
diff --git a/backend/routes/repos.py b/backend/routes/repos.py
@@ -227,15 +227,32 @@ async def _fetch_directory_tree(
                 key = top
             dir_counts[key] = dir_counts.get(key, 0) + 1
 
-    # Build sorted directory list
+    # Indexing is function-level, not file-level. Estimate function counts
+    # using the same multiplier the tier system uses for limit checks.
+    avg_fn = RepoValidator.AVG_FUNCTIONS_PER_FILE  # 25
+
+    # Build sorted directory list with estimated function counts
     directories = sorted(
-        [{"name": d, "path": d, "file_count": c} for d, c in dir_counts.items() if d != "(root)"],
+        [
+            {
+                "name": d, "path": d,
+                "file_count": c,
+                "estimated_functions": c * avg_fn,
+            }
+            for d, c in dir_counts.items() if d != "(root)"
+        ],
         key=lambda x: -x["file_count"],
     )
 
     root_files = dir_counts.get("(root)", 0)
     if root_files > 0:
-        directories.append({"name": "(root files)", "path": ".", "file_count": root_files})
+        directories.append({
+            "name": "(root files)", "path": ".",
+            "file_count": root_files,
+            "estimated_functions": root_files * avg_fn,
+        })
+
+    total_estimated = total_files * avg_fn
 
     # Suggest directory picker for large repos
     suggestion = None
@@ -245,6 +262,7 @@ async def _fetch_directory_tree(
     return {
         "directories": directories,
         "total_files": total_files,
+        "total_estimated_functions": total_estimated,
         "total_directories": len(directories),
         "truncated": truncated,
         "suggestion": suggestion,
diff --git a/backend/tests/test_analyze_repo.py b/backend/tests/test_analyze_repo.py
@@ -130,6 +130,30 @@ async def test_flat_repo_groups_by_top_dir(self):
         assert "src" in dir_names
         assert result["total_files"] == 3  # README.md has no code ext
 
+    @pytest.mark.asyncio
+    async def test_includes_estimated_functions(self):
+        tree = _make_tree([
+            "src/main.py",
+            "src/utils.py",
+            "lib/helpers.ts",
+        ])
+        with patch("routes.repos.httpx.AsyncClient") as mock_client:
+            mock_resp = MagicMock()
+            mock_resp.status_code = 200
+            mock_resp.json.return_value = tree
+            mock_client.return_value.__aenter__ = AsyncMock(return_value=MagicMock(
+                get=AsyncMock(return_value=mock_resp)
+            ))
+
+            result = await _fetch_directory_tree("owner", "repo", "main")
+
+        # 3 files * 25 avg functions per file = 75
+        assert result["total_estimated_functions"] == 75
+        src_dir = next(d for d in result["directories"] if d["name"] == "src")
+        assert src_dir["estimated_functions"] == 50  # 2 files * 25
+        lib_dir = next(d for d in result["directories"] if d["name"] == "lib")
+        assert lib_dir["estimated_functions"] == 25  # 1 file * 25
+
     @pytest.mark.asyncio
     async def test_monorepo_groups_at_package_level(self):
         tree = _make_tree([