Skip to content

Commit 6354dcc

Browse files
committed
Merge branch 'main' into feat/repo-cards-redesign
2 parents a29e143 + cdc7c47 commit 6354dcc

9 files changed

Lines changed: 999 additions & 43 deletions

backend/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ hiredis>=2.3.0
1818
tree-sitter>=0.23.0
1919
tree-sitter-python>=0.23.0
2020
tree-sitter-javascript>=0.23.0
21+
tree-sitter-typescript>=0.23.0
2122

2223
# AI/ML
2324
openai>=1.54.0

backend/routes/analysis.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,21 @@ class ImpactRequest(BaseModel):
2121
@router.get("/{repo_id}/dependencies")
2222
async def get_dependency_graph(
2323
repo_id: str,
24+
force: bool = False,
2425
auth: AuthContext = Depends(require_auth)
2526
):
26-
"""Get dependency graph for repository."""
27+
"""Get dependency graph for repository. Use force=true to rebuild from scratch."""
2728
try:
2829
repo = get_repo_or_404(repo_id, auth.user_id)
2930

30-
cached_graph = dependency_analyzer.load_from_cache(repo_id)
31-
if cached_graph:
32-
logger.debug("Using cached dependency graph", repo_id=repo_id)
33-
return {**cached_graph, "cached": True}
31+
if not force:
32+
cached_graph = dependency_analyzer.load_from_cache(repo_id)
33+
if cached_graph:
34+
logger.debug("Using cached dependency graph", repo_id=repo_id)
35+
return {**cached_graph, "cached": True}
3436

35-
logger.info("Building fresh dependency graph", repo_id=repo_id)
36-
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
37+
logger.info("Building fresh dependency graph", repo_id=repo_id, include_paths=repo.get("include_paths"))
38+
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
3739
dependency_analyzer.save_to_cache(repo_id, graph_data)
3840

3941
return {**graph_data, "cached": False}
@@ -64,7 +66,7 @@ async def analyze_impact(
6466
graph_data = dependency_analyzer.load_from_cache(repo_id)
6567
if not graph_data:
6668
logger.info("Building dependency graph for impact analysis", repo_id=repo_id)
67-
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
69+
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
6870
dependency_analyzer.save_to_cache(repo_id, graph_data)
6971

7072
impact = dependency_analyzer.get_file_impact(
@@ -94,7 +96,7 @@ async def get_repository_insights(
9496
graph_data = dependency_analyzer.load_from_cache(repo_id)
9597
if not graph_data:
9698
logger.info("Building dependency graph for insights", repo_id=repo_id)
97-
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"])
99+
graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"], include_paths=repo.get("include_paths"))
98100
dependency_analyzer.save_to_cache(repo_id, graph_data)
99101

100102
return {

backend/routes/repos.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,14 @@ async def _run_async_indexing(
602602

603603
repo_manager.update_status(repo_id, "indexing")
604604

605+
# Persist include_paths (or clear it if re-indexing full repo)
606+
from services.supabase_service import get_supabase_service
607+
db = get_supabase_service()
608+
db.update_repository(repo_id, {"include_paths": include_paths})
609+
610+
# Clear stale dependency cache so next graph build uses new include_paths
611+
db.clear_file_dependencies(repo_id)
612+
605613
# Publish initial progress to confirm connection
606614
if publisher:
607615
publisher.publish_progress(repo_id, 0, 1, 0, "Starting...")

backend/services/dependency_analyzer.py

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
# Tree-sitter
1010
import tree_sitter_python as tspython
1111
import tree_sitter_javascript as tsjavascript
12+
try:
13+
import tree_sitter_typescript as tstypescript
14+
_HAS_TS_PARSER = True
15+
except ModuleNotFoundError:
16+
_HAS_TS_PARSER = False
1217
from tree_sitter import Language, Parser
1318

1419
from services.observability import logger, metrics
@@ -18,13 +23,24 @@ class DependencyAnalyzer:
1823
"""Analyze code dependencies and build dependency graph"""
1924

2025
def __init__(self):
21-
# Initialize parsers
26+
js_lang = Language(tsjavascript.language())
27+
# Use proper TS parser if available, fall back to JS parser
28+
if _HAS_TS_PARSER:
29+
ts_lang = Language(tstypescript.language_typescript())
30+
tsx_lang = Language(tstypescript.language_tsx())
31+
else:
32+
logger.warning("tree-sitter-typescript not installed, falling back to JS parser for TS/TSX")
33+
ts_lang = js_lang
34+
tsx_lang = js_lang
35+
36+
self.has_ts_parser = _HAS_TS_PARSER
2237
self.parsers = {
2338
'python': Parser(Language(tspython.language())),
24-
'javascript': Parser(Language(tsjavascript.language())),
25-
'typescript': Parser(Language(tsjavascript.language())),
39+
'javascript': Parser(js_lang),
40+
'typescript': Parser(ts_lang),
41+
'tsx': Parser(tsx_lang),
2642
}
27-
logger.info("DependencyAnalyzer initialized")
43+
logger.info("DependencyAnalyzer initialized", ts_parser=self.has_ts_parser)
2844

2945
def _detect_language(self, file_path: str) -> str:
3046
"""Detect language from file extension"""
@@ -34,7 +50,7 @@ def _detect_language(self, file_path: str) -> str:
3450
'.js': 'javascript',
3551
'.jsx': 'javascript',
3652
'.ts': 'typescript',
37-
'.tsx': 'typescript',
53+
'.tsx': 'tsx',
3854
}
3955
return lang_map.get(ext, 'unknown')
4056

@@ -122,10 +138,22 @@ def analyze_file_dependencies(self, file_path: str) -> Dict:
122138
logger.error("Error analyzing file", file_path=file_path, error=str(e))
123139
return {"file": str(file_path), "imports": [], "language": language, "error": str(e)}
124140

125-
def build_dependency_graph(self, repo_path: str) -> Dict:
126-
"""Build complete dependency graph for repository"""
141+
def build_dependency_graph(self, repo_path: str, include_paths: List[str] = None) -> Dict:
142+
"""Build dependency graph. If include_paths set, only analyze those dirs."""
127143
repo_path = Path(repo_path)
128144

145+
# Sanitize include_paths from DB (could be corrupt jsonb)
146+
if include_paths:
147+
cleaned = []
148+
for p in include_paths:
149+
if not isinstance(p, str):
150+
continue
151+
p = p.replace('\\', '/').strip().strip('/')
152+
if not p or '..' in p.split('/'):
153+
continue
154+
cleaned.append(p)
155+
include_paths = cleaned or None
156+
129157
# Discover code files
130158
code_files = []
131159
extensions = {'.py', '.js', '.jsx', '.ts', '.tsx'}
@@ -136,8 +164,16 @@ def build_dependency_graph(self, repo_path: str) -> Dict:
136164
continue
137165
if any(skip in file_path.parts for skip in skip_dirs):
138166
continue
139-
if file_path.suffix in extensions:
140-
code_files.append(file_path)
167+
if file_path.suffix not in extensions:
168+
continue
169+
if include_paths:
170+
rel_parts = file_path.relative_to(repo_path).parts
171+
if not any(
172+
rel_parts[:len(Path(p).parts)] == Path(p).parts
173+
for p in include_paths
174+
):
175+
continue
176+
code_files.append(file_path)
141177

142178
logger.info("Building dependency graph", file_count=len(code_files))
143179

@@ -236,6 +272,10 @@ def _resolve_import_to_file(
236272
source_path = Path(source_file)
237273
source_dir = source_path.parent
238274

275+
# TS imports use .js extension but actual file is .ts on disk
276+
if import_path.endswith('.js') or import_path.endswith('.jsx'):
277+
import_path = re.sub(r'\.(jsx?)$', '', import_path)
278+
239279
# Relative imports
240280
if import_path.startswith('.'):
241281
clean_import = import_path.lstrip('./')
@@ -250,7 +290,7 @@ def _resolve_import_to_file(
250290
else:
251291
potential_base = source_dir / clean_import
252292

253-
extensions = ['', '.ts', '.tsx', '.js', '.jsx', '.py']
293+
extensions = ['', '.ts', '.tsx', '.d.ts', '.js', '.jsx', '.py']
254294

255295
for ext in extensions:
256296
# Build the potential path
@@ -269,7 +309,7 @@ def _resolve_import_to_file(
269309
if not import_path.startswith('.'):
270310
module_path = import_path.replace('.', '/')
271311

272-
for ext in ['.py', '.js', '.ts']:
312+
for ext in ['', '.ts', '.tsx', '.d.ts', '.js', '.jsx', '.py']:
273313
test_path = module_path + ext
274314
if test_path in internal_files:
275315
return test_path

backend/services/indexer_optimized.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -58,30 +58,29 @@ def __init__(self):
5858
# Initialize search enhancer
5959
self.search_enhancer = SearchEnhancer(self.openai_client)
6060

61-
# Initialize Pinecone
62-
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
63-
64-
index_name = os.getenv("PINECONE_INDEX_NAME", "codeintel")
65-
66-
# Check if index exists and has correct dimensions
67-
existing_indexes = pc.list_indexes().names()
68-
if index_name in existing_indexes:
69-
# Use existing index (dimension already set)
70-
index_info = pc.describe_index(index_name)
71-
logger.info("Using existing Pinecone index", index=index_name, dimension=index_info.dimension)
72-
else:
73-
logger.info("Creating Pinecone index", index=index_name, dimension=EMBEDDING_DIMENSIONS)
74-
pc.create_index(
75-
name=index_name,
76-
dimension=EMBEDDING_DIMENSIONS,
77-
metric="cosine",
78-
spec=ServerlessSpec(
79-
cloud="aws",
80-
region="us-east-1"
61+
# Initialize Pinecone (lazy-safe: app starts even if Pinecone is unreachable)
62+
self.index = None
63+
self._pinecone_index_name = os.getenv("PINECONE_INDEX_NAME", "codeintel")
64+
try:
65+
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
66+
existing_indexes = pc.list_indexes().names()
67+
if self._pinecone_index_name in existing_indexes:
68+
index_info = pc.describe_index(self._pinecone_index_name)
69+
logger.info("Using existing Pinecone index", index=self._pinecone_index_name, dimension=index_info.dimension)
70+
else:
71+
logger.info("Creating Pinecone index", index=self._pinecone_index_name, dimension=EMBEDDING_DIMENSIONS)
72+
pc.create_index(
73+
name=self._pinecone_index_name,
74+
dimension=EMBEDDING_DIMENSIONS,
75+
metric="cosine",
76+
spec=ServerlessSpec(
77+
cloud="aws",
78+
region="us-east-1"
79+
)
8180
)
82-
)
83-
84-
self.index = pc.Index(index_name)
81+
self.index = pc.Index(self._pinecone_index_name)
82+
except Exception as e:
83+
logger.error("Pinecone initialization failed -- search/indexing unavailable until reconnect", error=str(e))
8584

8685
# Initialize tree-sitter parsers
8786
self.parsers = {

0 commit comments

Comments
 (0)