diff --git a/.env.example b/.env.example index 5e19c82..460dccd 100644 --- a/.env.example +++ b/.env.example @@ -34,3 +34,8 @@ ALLOWED_ORIGINS=http://localhost:3000 # Redis (auto-configured in Docker, set REDIS_URL in Railway) REDIS_HOST=redis REDIS_PORT=6379 + +# Sentry Error Tracking (Optional but recommended for production) +# Get DSN from: https://sentry.io → Settings → Projects → Client Keys +SENTRY_DSN= +ENVIRONMENT=development # development, staging, production diff --git a/backend/.env.example b/backend/.env.example index 6ab244f..752713f 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -18,3 +18,8 @@ ALLOWED_ORIGINS=http://localhost:3000 # Redis Cache REDIS_HOST=localhost REDIS_PORT=6379 + +# Sentry Error Tracking (Optional) +# Get DSN from https://sentry.io → Settings → Projects → Client Keys +SENTRY_DSN= +ENVIRONMENT=development diff --git a/backend/main.py b/backend/main.py index 7ea4307..a901345 100644 --- a/backend/main.py +++ b/backend/main.py @@ -10,6 +10,10 @@ from starlette.responses import JSONResponse import os +# Initialize Sentry FIRST (before other imports to catch all errors) +from services.sentry import init_sentry +init_sentry() + # Import API config (single source of truth for versioning) from config.api import API_PREFIX, API_VERSION @@ -108,3 +112,18 @@ async def rate_limit_handler(request: Request, exc): status_code=429, content={"detail": "Rate limit exceeded. Please try again later."} ) + + +@app.exception_handler(Exception) +async def generic_exception_handler(request: Request, exc: Exception): + """ + Catch-all handler for unhandled exceptions. + Captures to Sentry and returns 500. + """ + from services.sentry import capture_http_exception + capture_http_exception(request, exc, 500) + + return JSONResponse( + status_code=500, + content={"detail": "Internal server error"} + ) diff --git a/backend/middleware/auth.py b/backend/middleware/auth.py index 92d590f..d8a3a1e 100644 --- a/backend/middleware/auth.py +++ b/backend/middleware/auth.py @@ -122,11 +122,17 @@ def _authenticate(token: str) -> AuthContext: # Try JWT (Supabase tokens) ctx = _validate_jwt(token) if ctx: + # Set Sentry user context for error tracking + from services.sentry import set_user_context + set_user_context(user_id=ctx.user_id, email=ctx.email) return ctx # Try API key ctx = _validate_api_key(token) if ctx: + # Set Sentry user context for error tracking + from services.sentry import set_user_context + set_user_context(user_id=ctx.user_id or ctx.api_key_name) return ctx # Neither worked diff --git a/backend/requirements.txt b/backend/requirements.txt index f22e096..d066650 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -36,3 +36,6 @@ pyjwt>=2.8.0 # JWT token verification for Supabase Auth pytest>=8.0.0 pytest-asyncio>=0.24.0 pytest-cov>=6.0.0 + +# Observability +sentry-sdk[fastapi]>=2.0.0 diff --git a/backend/routes/analysis.py b/backend/routes/analysis.py index cca4356..2d194fb 100644 --- a/backend/routes/analysis.py +++ b/backend/routes/analysis.py @@ -8,6 +8,7 @@ ) from services.input_validator import InputValidator from middleware.auth import require_auth, AuthContext +from services.observability import logger, metrics router = APIRouter(prefix="/repos", tags=["Analysis"]) @@ -29,11 +30,11 @@ async def get_dependency_graph( # Try cache first cached_graph = dependency_analyzer.load_from_cache(repo_id) if cached_graph: - print(f"✅ Using cached dependency graph for {repo_id}") + logger.debug("Using cached dependency graph", repo_id=repo_id) return {**cached_graph, "cached": True} # Build fresh - print(f"🔄 Building fresh dependency graph for {repo_id}") + logger.info("Building fresh dependency graph", repo_id=repo_id) graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"]) dependency_analyzer.save_to_cache(repo_id, graph_data) @@ -62,7 +63,7 @@ async def analyze_impact( # Get or build graph graph_data = dependency_analyzer.load_from_cache(repo_id) if not graph_data: - print(f"🔄 Building dependency graph for impact analysis") + logger.info("Building dependency graph for impact analysis", repo_id=repo_id) graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"]) dependency_analyzer.save_to_cache(repo_id, graph_data) @@ -89,7 +90,7 @@ async def get_repository_insights( # Get or build graph graph_data = dependency_analyzer.load_from_cache(repo_id) if not graph_data: - print(f"🔄 Building dependency graph for insights") + logger.info("Building dependency graph for insights", repo_id=repo_id) graph_data = dependency_analyzer.build_dependency_graph(repo["local_path"]) dependency_analyzer.save_to_cache(repo_id, graph_data) @@ -121,11 +122,11 @@ async def get_style_analysis( # Try cache first cached_style = style_analyzer.load_from_cache(repo_id) if cached_style: - print(f"✅ Using cached code style for {repo_id}") + logger.debug("Using cached code style", repo_id=repo_id) return {**cached_style, "cached": True} # Analyze fresh - print(f"🔄 Analyzing code style for {repo_id}") + logger.info("Analyzing code style", repo_id=repo_id) style_data = style_analyzer.analyze_repository_style(repo["local_path"]) style_analyzer.save_to_cache(repo_id, style_data) diff --git a/backend/routes/playground.py b/backend/routes/playground.py index 51aa40d..2f3b2b0 100644 --- a/backend/routes/playground.py +++ b/backend/routes/playground.py @@ -6,6 +6,7 @@ from dependencies import indexer, cache, repo_manager from services.input_validator import InputValidator +from services.observability import logger router = APIRouter(prefix="/playground", tags=["Playground"]) @@ -39,9 +40,9 @@ async def load_demo_repos(): DEMO_REPO_IDS["express"] = repo["id"] elif "react" in name_lower: DEMO_REPO_IDS["react"] = repo["id"] - print(f"📦 Loaded demo repos: {list(DEMO_REPO_IDS.keys())}") + logger.info("Loaded demo repos", repos=list(DEMO_REPO_IDS.keys())) except Exception as e: - print(f"⚠️ Could not load demo repos: {e}") + logger.warning("Could not load demo repos", error=str(e)) def _check_rate_limit(ip: str) -> tuple[bool, int]: diff --git a/backend/routes/repos.py b/backend/routes/repos.py index 4bc0bea..42041ce 100644 --- a/backend/routes/repos.py +++ b/backend/routes/repos.py @@ -12,6 +12,7 @@ ) from services.input_validator import InputValidator from middleware.auth import require_auth, AuthContext +from services.observability import logger, capture_exception router = APIRouter(prefix="/repos", tags=["Repositories"]) @@ -100,7 +101,7 @@ async def index_repository( last_commit = repo_manager.get_last_indexed_commit(repo_id) if incremental and last_commit: - print(f"🔄 Using INCREMENTAL indexing (last: {last_commit[:8]})") + logger.info("Using INCREMENTAL indexing", repo_id=repo_id, last_commit=last_commit[:8]) total_functions = await indexer.incremental_index_repository( repo_id, repo["local_path"], @@ -108,7 +109,7 @@ async def index_repository( ) index_type = "incremental" else: - print(f"📦 Using FULL indexing") + logger.info("Using FULL indexing", repo_id=repo_id) total_functions = await indexer.index_repository(repo_id, repo["local_path"]) index_type = "full" @@ -204,8 +205,10 @@ async def progress_callback(files_processed: int, functions_indexed: int, total_ pass except WebSocketDisconnect: - print(f"WebSocket disconnected for repo {repo_id}") + logger.debug("WebSocket disconnected", repo_id=repo_id) except Exception as e: + logger.error("WebSocket indexing error", repo_id=repo_id, error=str(e)) + capture_exception(e, operation="websocket_indexing", repo_id=repo_id) try: await websocket.send_json({"type": "error", "message": str(e)}) except Exception: diff --git a/backend/services/cache.py b/backend/services/cache.py index 11f0c29..a59e74f 100644 --- a/backend/services/cache.py +++ b/backend/services/cache.py @@ -9,6 +9,8 @@ import os from dotenv import load_dotenv +from services.observability import logger, metrics + load_dotenv() # Configuration @@ -30,7 +32,7 @@ def __init__(self): socket_connect_timeout=5, socket_timeout=5 ) - print(f"✅ Redis connected via URL!") + logger.info("Redis connected via URL") else: self.redis = redis.Redis( host=REDIS_HOST, @@ -40,12 +42,12 @@ def __init__(self): socket_connect_timeout=5, socket_timeout=5 ) - print(f"✅ Redis connected to {REDIS_HOST}:{REDIS_PORT}") + logger.info("Redis connected", host=REDIS_HOST, port=REDIS_PORT) # Test connection self.redis.ping() except redis.ConnectionError as e: - print(f"⚠️ Redis not available - running without cache: {e}") + logger.warning("Redis not available - running without cache", error=str(e)) self.redis = None def _make_key(self, prefix: str, *args) -> str: @@ -64,9 +66,12 @@ def get_search_results(self, query: str, repo_id: str) -> Optional[List[Dict]]: key = self._make_key("search", repo_id, query) cached = self.redis.get(key) if cached: + metrics.increment("cache_hits") return json.loads(cached) + metrics.increment("cache_misses") except Exception as e: - print(f"Cache read error: {e}") + logger.error("Cache read error", operation="get_search_results", error=str(e)) + metrics.increment("cache_errors") return None @@ -85,7 +90,8 @@ def set_search_results( key = self._make_key("search", repo_id, query) self.redis.setex(key, ttl, json.dumps(results)) except Exception as e: - print(f"Cache write error: {e}") + logger.error("Cache write error", operation="set_search_results", error=str(e)) + metrics.increment("cache_errors") def get_embedding(self, text: str) -> Optional[List[float]]: """Get cached embedding""" @@ -98,7 +104,8 @@ def get_embedding(self, text: str) -> Optional[List[float]]: if cached: return json.loads(cached) except Exception as e: - print(f"Cache read error: {e}") + logger.error("Cache read error", operation="get_embedding", error=str(e)) + metrics.increment("cache_errors") return None @@ -111,7 +118,8 @@ def set_embedding(self, text: str, embedding: List[float], ttl: int = 86400): key = self._make_key("emb", text[:100]) self.redis.setex(key, ttl, json.dumps(embedding)) except Exception as e: - print(f"Cache write error: {e}") + logger.error("Cache write error", operation="set_embedding", error=str(e)) + metrics.increment("cache_errors") def invalidate_repo(self, repo_id: str): """Invalidate all cache for a repository""" @@ -123,6 +131,6 @@ def invalidate_repo(self, repo_id: str): keys = self.redis.keys(pattern) if keys: self.redis.delete(*keys) - print(f"Invalidated {len(keys)} cache entries") + logger.info("Cache invalidated", repo_id=repo_id, keys_removed=len(keys)) except Exception as e: - print(f"Cache invalidation error: {e}") + logger.error("Cache invalidation error", repo_id=repo_id, error=str(e)) diff --git a/backend/services/dependency_analyzer.py b/backend/services/dependency_analyzer.py index f95319d..47d57aa 100644 --- a/backend/services/dependency_analyzer.py +++ b/backend/services/dependency_analyzer.py @@ -11,6 +11,8 @@ import tree_sitter_javascript as tsjavascript from tree_sitter import Language, Parser +from services.observability import logger, capture_exception, track_time, metrics + class DependencyAnalyzer: """Analyze code dependencies and build dependency graph""" @@ -22,7 +24,7 @@ def __init__(self): 'javascript': Parser(Language(tsjavascript.language())), 'typescript': Parser(Language(tsjavascript.language())), } - print("✅ DependencyAnalyzer initialized!") + logger.info("DependencyAnalyzer initialized") def _detect_language(self, file_path: str) -> str: """Detect language from file extension""" @@ -117,7 +119,7 @@ def analyze_file_dependencies(self, file_path: str) -> Dict: } except Exception as e: - print(f"Error analyzing {file_path}: {e}") + logger.error("Error analyzing file", file_path=file_path, error=str(e)) return {"file": str(file_path), "imports": [], "language": language, "error": str(e)} def build_dependency_graph(self, repo_path: str) -> Dict: @@ -137,7 +139,7 @@ def build_dependency_graph(self, repo_path: str) -> Dict: if file_path.suffix in extensions: code_files.append(file_path) - print(f"📊 Building dependency graph for {len(code_files)} files...") + logger.info("Building dependency graph", file_count=len(code_files)) # Analyze each file file_dependencies = {} @@ -157,12 +159,12 @@ def build_dependency_graph(self, repo_path: str) -> Dict: # DEBUG: Show sample of what we're working with sample_files = list(internal_files)[:3] - print(f"📁 Sample internal files: {sample_files}") + logger.debug("Sample internal files", sample=sample_files) # Find a file with imports to debug for f, imports in list(file_dependencies.items())[:5]: if imports: - print(f"📄 {f} imports: {imports[:3]}") + logger.debug("Sample file imports", file=f, imports=imports[:3]) break # Create nodes @@ -198,12 +200,13 @@ def build_dependency_graph(self, repo_path: str) -> Dict: else: failed_count += 1 - print(f"🔗 Resolved {resolved_count} internal imports, {failed_count} external") + logger.info("Import resolution complete", resolved=resolved_count, external=failed_count) # Calculate metrics graph_metrics = self._calculate_graph_metrics(file_dependencies, edges) - print(f"✅ Graph built: {len(nodes)} nodes, {len(edges)} edges") + logger.info("Dependency graph built", nodes=len(nodes), edges=len(edges)) + metrics.increment("dependency_graphs_built") return { "nodes": nodes, @@ -440,7 +443,7 @@ def save_to_cache(self, repo_id: str, graph_data: Dict): db.clear_file_dependencies(repo_id) # Bulk insert new dependencies - print(f"💾 Saving {len(file_deps)} file dependencies to Supabase") + logger.info("Saving file dependencies to Supabase", repo_id=repo_id, count=len(file_deps)) db.upsert_file_dependencies(repo_id, file_deps) # Save repository insights @@ -457,7 +460,7 @@ def save_to_cache(self, repo_id: str, graph_data: Dict): } db.upsert_repository_insights(repo_id, insights) - print(f"✅ Cached dependency graph for {repo_id} in Supabase") + logger.info("Cached dependency graph in Supabase", repo_id=repo_id) def load_from_cache(self, repo_id: str) -> Dict: """Load dependency graph from Supabase cache""" @@ -467,7 +470,7 @@ def load_from_cache(self, repo_id: str) -> Dict: # Get file dependencies file_deps = db.get_file_dependencies(repo_id) - print(f"🔍 Loading cache for {repo_id}: found {len(file_deps) if file_deps else 0} file dependencies") + logger.debug("Loading cache", repo_id=repo_id, found=len(file_deps) if file_deps else 0) if not file_deps: return None @@ -492,7 +495,7 @@ def load_from_cache(self, repo_id: str) -> Dict: "total_edges": len(edges) } - print(f"✅ Loaded cached dependency graph for {repo_id} from Supabase") + logger.info("Loaded cached dependency graph", repo_id=repo_id) return { "dependencies": dependencies, diff --git a/backend/services/indexer_optimized.py b/backend/services/indexer_optimized.py index 06f1f07..df579ee 100644 --- a/backend/services/indexer_optimized.py +++ b/backend/services/indexer_optimized.py @@ -31,6 +31,9 @@ # Search enhancement from services.search_enhancer import SearchEnhancer +# Observability +from services.observability import logger, trace_operation, track_time, capture_exception, add_breadcrumb, metrics + load_dotenv() # Configuration @@ -64,9 +67,9 @@ def __init__(self): if index_name in existing_indexes: # Use existing index (dimension already set) index_info = pc.describe_index(index_name) - print(f"📊 Using existing Pinecone index: {index_name} (dim={index_info.dimension})") + logger.info("Using existing Pinecone index", index=index_name, dimension=index_info.dimension) else: - print(f"Creating Pinecone index: {index_name} with dimension {EMBEDDING_DIMENSIONS}") + logger.info("Creating Pinecone index", index=index_name, dimension=EMBEDDING_DIMENSIONS) pc.create_index( name=index_name, dimension=EMBEDDING_DIMENSIONS, @@ -86,7 +89,7 @@ def __init__(self): 'typescript': self._create_parser(Language(tsjavascript.language())), } - print(f"✅ OptimizedCodeIndexer initialized! (model: {EMBEDDING_MODEL})") + logger.info("OptimizedCodeIndexer initialized", model=EMBEDDING_MODEL) def _create_parser(self, language) -> Parser: """Create a tree-sitter parser""" @@ -149,7 +152,8 @@ async def _create_embeddings_batch(self, texts: List[str]) -> List[List[float]]: return [item.embedding for item in response.data] except Exception as e: - print(f"❌ Error creating batch embeddings: {e}") + logger.error("Error creating batch embeddings", error=str(e), batch_size=len(texts)) + capture_exception(e, operation="create_embeddings", batch_size=len(texts)) # Return zero vectors on error return [[0.0] * EMBEDDING_DIMENSIONS for _ in texts] @@ -194,22 +198,26 @@ def _extract_functions(self, tree_node, source_code: bytes) -> List[Dict]: async def index_repository(self, repo_id: str, repo_path: str): """Index all code in a repository - OPTIMIZED VERSION""" + from services.observability import set_operation_context + + set_operation_context("indexing", repo_id=repo_id) + add_breadcrumb("Starting repository indexing", category="indexing", repo_id=repo_id) + start_time = time.time() - print(f"\n🚀 Starting optimized indexing for repo: {repo_id}") - print(f"📂 Path: {repo_path}") + logger.info("Starting optimized indexing", repo_id=repo_id, path=repo_path) # Discover code files code_files = self._discover_code_files(repo_path) - print(f"📄 Found {len(code_files)} code files") + logger.info("Code files discovered", repo_id=repo_id, file_count=len(code_files)) if not code_files: - print("⚠️ No code files found") + logger.warning("No code files found", repo_id=repo_id) return 0 # Extract all functions from all files (parallel) all_functions_data = [] - print(f"\n🔍 Extracting functions from files...") + add_breadcrumb("Extracting functions", category="indexing", file_count=len(code_files)) for i in range(0, len(code_files), self.FILE_BATCH_SIZE): batch = code_files[i:i + self.FILE_BATCH_SIZE] @@ -225,18 +233,18 @@ async def index_repository(self, repo_id: str, repo_path: str): if isinstance(result, list): all_functions_data.extend(result) - print(f" Processed {min(i + self.FILE_BATCH_SIZE, len(code_files))}/{len(code_files)} files, " - f"{len(all_functions_data)} functions extracted") + processed = min(i + self.FILE_BATCH_SIZE, len(code_files)) + logger.debug("File batch processed", processed=processed, total=len(code_files), functions=len(all_functions_data)) if not all_functions_data: - print("⚠️ No functions extracted") + logger.warning("No functions extracted", repo_id=repo_id) return 0 - print(f"\n✅ Total functions extracted: {len(all_functions_data)}") + logger.info("Functions extracted", repo_id=repo_id, count=len(all_functions_data)) + add_breadcrumb("Functions extracted", category="indexing", count=len(all_functions_data)) # Generate embeddings in BATCHES (this is the key optimization) - print(f"\n🧠 Generating embeddings in batches of {self.EMBEDDING_BATCH_SIZE}...") - print(f" Using model: {EMBEDDING_MODEL}") + logger.info("Generating embeddings", batch_size=self.EMBEDDING_BATCH_SIZE, model=EMBEDDING_MODEL) # Create rich embedding texts using search enhancer embedding_texts = [ @@ -245,15 +253,16 @@ async def index_repository(self, repo_id: str, repo_path: str): ] all_embeddings = [] - for i in range(0, len(embedding_texts), self.EMBEDDING_BATCH_SIZE): - batch_texts = embedding_texts[i:i + self.EMBEDDING_BATCH_SIZE] - batch_embeddings = await self._create_embeddings_batch(batch_texts) - all_embeddings.extend(batch_embeddings) - - print(f" Generated {len(all_embeddings)}/{len(embedding_texts)} embeddings") + with track_time("embedding_generation", repo_id=repo_id, total=len(embedding_texts)): + for i in range(0, len(embedding_texts), self.EMBEDDING_BATCH_SIZE): + batch_texts = embedding_texts[i:i + self.EMBEDDING_BATCH_SIZE] + batch_embeddings = await self._create_embeddings_batch(batch_texts) + all_embeddings.extend(batch_embeddings) + + logger.debug("Embeddings generated", progress=len(all_embeddings), total=len(embedding_texts)) # Prepare vectors for Pinecone - print(f"\n💾 Preparing vectors for Pinecone...") + add_breadcrumb("Uploading to Pinecone", category="indexing", vector_count=len(all_functions_data)) vectors_to_upsert = [] for func_data, embedding in zip(all_functions_data, all_embeddings): @@ -277,17 +286,24 @@ async def index_repository(self, repo_id: str, repo_path: str): }) # Upsert to Pinecone in batches - print(f"\n☁️ Uploading to Pinecone in batches of {self.PINECONE_UPSERT_BATCH}...") - for i in range(0, len(vectors_to_upsert), self.PINECONE_UPSERT_BATCH): - batch = vectors_to_upsert[i:i + self.PINECONE_UPSERT_BATCH] - self.index.upsert(vectors=batch) - print(f" Uploaded {min(i + self.PINECONE_UPSERT_BATCH, len(vectors_to_upsert))}/{len(vectors_to_upsert)} vectors") + with track_time("pinecone_upload", repo_id=repo_id, vectors=len(vectors_to_upsert)): + for i in range(0, len(vectors_to_upsert), self.PINECONE_UPSERT_BATCH): + batch = vectors_to_upsert[i:i + self.PINECONE_UPSERT_BATCH] + self.index.upsert(vectors=batch) + logger.debug("Vectors uploaded", progress=min(i + self.PINECONE_UPSERT_BATCH, len(vectors_to_upsert)), total=len(vectors_to_upsert)) elapsed = time.time() - start_time - print(f"\n✅ Indexing complete!") - print(f" • Total functions: {len(all_functions_data)}") - print(f" • Time taken: {elapsed:.2f}s") - print(f" • Speed: {len(all_functions_data)/elapsed:.1f} functions/sec") + speed = len(all_functions_data) / elapsed if elapsed > 0 else 0 + + logger.info( + "Indexing complete", + repo_id=repo_id, + functions=len(all_functions_data), + duration_s=round(elapsed, 2), + speed=round(speed, 1) + ) + metrics.increment("indexing_completed") + metrics.timing("indexing_duration_s", elapsed) return len(all_functions_data) @@ -321,7 +337,7 @@ async def _extract_functions_from_file( return functions except Exception as e: - print(f"❌ Error processing {file_path}: {e}") + logger.error("Error processing file", file_path=file_path, error=str(e)) return [] async def semantic_search( @@ -342,12 +358,15 @@ async def semantic_search( use_query_expansion: Expand query with related terms use_reranking: Rerank results with keyword boosting """ + start_time = time.time() + metrics.increment("search_requests") + try: # Step 1: Query expansion (adds related programming terms) search_query = query if use_query_expansion: search_query = await self.search_enhancer.expand_query(query) - print(f"🔍 Expanded query: {search_query[:100]}...") + logger.debug("Query expanded", original=query[:50], expanded=search_query[:100]) # Step 2: Generate query embedding query_embeddings = await self._create_embeddings_batch([search_query]) @@ -383,10 +402,16 @@ async def semantic_search( formatted_results ) + elapsed = time.time() - start_time + logger.info("Search completed", repo_id=repo_id, results=len(formatted_results), duration_ms=round(elapsed*1000, 2)) + metrics.timing("search_latency_ms", elapsed * 1000) + return formatted_results[:max_results] except Exception as e: - print(f"❌ Error searching: {e}") + capture_exception(e, operation="search", repo_id=repo_id, query=query[:100]) + logger.error("Search failed", repo_id=repo_id, error=str(e)) + metrics.increment("search_errors") return [] async def explain_code( @@ -434,7 +459,8 @@ async def explain_code( return response.choices[0].message.content except Exception as e: - print(f"❌ Error explaining code: {e}") + logger.error("Error explaining code", file_path=file_path, error=str(e)) + capture_exception(e, operation="explain_code", file_path=file_path) return f"Error: {str(e)}" async def index_repository_with_progress( @@ -445,12 +471,12 @@ async def index_repository_with_progress( ): """Index repository with real-time progress updates""" start_time = time.time() - print(f"\n🚀 Starting optimized indexing with progress for repo: {repo_id}") + logger.info("Starting optimized indexing with progress", repo_id=repo_id) # Discover code files code_files = self._discover_code_files(repo_path) total_files = len(code_files) - print(f"📄 Found {total_files} code files") + logger.info("Found code files", repo_id=repo_id, total_files=total_files) if not code_files: await progress_callback(0, 0, 0) @@ -460,7 +486,7 @@ async def index_repository_with_progress( all_functions_data = [] files_processed = 0 - print(f"\n🔍 Extracting functions from files...") + logger.debug("Extracting functions from files") for i in range(0, len(code_files), self.FILE_BATCH_SIZE): batch = code_files[i:i + self.FILE_BATCH_SIZE] @@ -481,14 +507,16 @@ async def index_repository_with_progress( # Send progress update await progress_callback(files_processed, len(all_functions_data), total_files) - print(f" Processed {files_processed}/{total_files} files, " - f"{len(all_functions_data)} functions extracted") + logger.debug("Processing files", + processed=files_processed, + total=total_files, + functions_extracted=len(all_functions_data)) if not all_functions_data: return 0 # Generate embeddings in BATCHES - print(f"\n🧠 Generating embeddings in batches of {self.EMBEDDING_BATCH_SIZE}...") + logger.debug("Generating embeddings in batches", batch_size=self.EMBEDDING_BATCH_SIZE) # Create rich embedding texts using search enhancer embedding_texts = [ @@ -502,10 +530,10 @@ async def index_repository_with_progress( batch_embeddings = await self._create_embeddings_batch(batch_texts) all_embeddings.extend(batch_embeddings) - print(f" Generated {len(all_embeddings)}/{len(embedding_texts)} embeddings") + logger.debug("Embeddings generated", completed=len(all_embeddings), total=len(embedding_texts)) # Prepare vectors for Pinecone - print(f"\n💾 Uploading to Pinecone...") + logger.debug("Uploading to Pinecone") vectors_to_upsert = [] for func_data, embedding in zip(all_functions_data, all_embeddings): @@ -534,10 +562,11 @@ async def index_repository_with_progress( self.index.upsert(vectors=batch) elapsed = time.time() - start_time - print(f"\n✅ Indexing complete!") - print(f" • Total functions: {len(all_functions_data)}") - print(f" • Time taken: {elapsed:.2f}s") - print(f" • Speed: {len(all_functions_data)/elapsed:.1f} functions/sec") + logger.info("Indexing with progress complete", + repo_id=repo_id, + total_functions=len(all_functions_data), + duration_s=round(elapsed, 2), + speed=round(len(all_functions_data)/elapsed, 1) if elapsed > 0 else 0) return len(all_functions_data) @@ -552,14 +581,13 @@ async def incremental_index_repository( import time start_time = time.time() - print(f"\n🔄 Starting INCREMENTAL indexing for repo: {repo_id}") - print(f"📍 Last indexed commit: {last_commit_sha[:8]}") + logger.info("Starting INCREMENTAL indexing", repo_id=repo_id, last_commit=last_commit_sha[:8]) try: repo = git.Repo(repo_path) current_commit = repo.head.commit.hexsha - print(f"📍 Current commit: {current_commit[:8]}") + logger.debug("Current commit", current_commit=current_commit[:8]) # Get changed files if last_commit_sha: @@ -567,7 +595,7 @@ async def incremental_index_repository( changed_files = diff.split('\n') if diff else [] else: # No previous commit, index everything - print("⚠️ No previous commit - doing full index") + logger.warning("No previous commit - doing full index") return await self.index_repository(repo_id, repo_path) # Filter for code files only @@ -577,10 +605,10 @@ async def incremental_index_repository( if Path(f).suffix in code_extensions ] - print(f"📄 Found {len(changed_files)} total changes, {len(changed_code_files)} code files") + logger.info("Found changed files", total_changes=len(changed_files), code_files=len(changed_code_files)) if not changed_code_files: - print("✅ No code changes detected - skipping indexing") + logger.info("No code changes detected - skipping indexing") return 0 # Extract functions from changed files @@ -589,19 +617,19 @@ async def incremental_index_repository( for file_path in changed_code_files: full_path = Path(repo_path) / file_path if not full_path.exists(): - print(f"⚠️ File deleted: {file_path} - skipping") + logger.debug("File deleted - skipping", file_path=file_path) continue functions = await self._extract_functions_from_file(repo_id, str(full_path)) all_functions_data.extend(functions) - print(f" Processed {file_path}: {len(functions)} functions") + logger.debug("Processed changed file", file_path=file_path, functions=len(functions)) if not all_functions_data: - print("✅ No functions to index") + logger.info("No functions to index") return 0 # Generate embeddings in batches - print(f"\n🧠 Generating embeddings for {len(all_functions_data)} functions...") + logger.debug("Generating embeddings", function_count=len(all_functions_data)) # Create rich embedding texts using search enhancer embedding_texts = [ @@ -645,16 +673,17 @@ async def incremental_index_repository( elapsed = time.time() - start_time - print(f"\n✅ Incremental indexing complete!") - print(f" • Changed files: {len(changed_code_files)}") - print(f" • Functions updated: {len(all_functions_data)}") - print(f" • Time taken: {elapsed:.2f}s") - print(f" • Speed: {len(all_functions_data)/elapsed:.1f} functions/sec") - print(f" • 🚀 INCREMENTAL SPEEDUP: ~{100/elapsed:.0f}x faster than full re-index!") + logger.info("Incremental indexing complete", + repo_id=repo_id, + changed_files=len(changed_code_files), + functions_updated=len(all_functions_data), + duration_s=round(elapsed, 2), + speed=round(len(all_functions_data)/elapsed, 1) if elapsed > 0 else 0) return len(all_functions_data) except Exception as e: - print(f"❌ Incremental indexing error: {e}") - print("Falling back to full index...") + logger.error("Incremental indexing error - falling back to full index", + repo_id=repo_id, error=str(e)) + capture_exception(e, operation="incremental_indexing", repo_id=repo_id) return await self.index_repository(repo_id, repo_path) diff --git a/backend/services/observability.py b/backend/services/observability.py new file mode 100644 index 0000000..5ccc910 --- /dev/null +++ b/backend/services/observability.py @@ -0,0 +1,367 @@ +""" +Observability Module +Centralized logging, tracing, and metrics for CodeIntel + +Usage: + from services.observability import logger, trace_operation, track_time + + logger.info("Starting indexing", repo_id="abc", files=100) + + @trace_operation("indexing") + async def index_repo(repo_id: str): + ... + + with track_time("embedding_batch"): + embeddings = await create_embeddings(texts) +""" +import os +import sys +import time +import logging +import json +from typing import Optional, Any, Dict +from functools import wraps +from contextlib import contextmanager +from datetime import datetime + +# Environment +ENVIRONMENT = os.getenv("ENVIRONMENT", "development") +IS_PRODUCTION = ENVIRONMENT == "production" +LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO" if IS_PRODUCTION else "DEBUG") + + +# ============================================================================= +# STRUCTURED LOGGER +# ============================================================================= + +class StructuredLogger: + """ + Structured logger that outputs JSON in production, pretty logs in development. + + Usage: + logger.info("User logged in", user_id="abc", ip="1.2.3.4") + logger.error("Failed to index", repo_id="xyz", error=str(e)) + """ + + def __init__(self, name: str = "codeintel"): + self.name = name + self.level = getattr(logging, LOG_LEVEL.upper(), logging.INFO) + self._context: Dict[str, Any] = {} + + def _format_message(self, level: str, message: str, **kwargs) -> str: + """Format log message based on environment""" + data = { + "timestamp": datetime.utcnow().isoformat(), + "level": level, + "service": self.name, + "message": message, + **self._context, + **kwargs + } + + if IS_PRODUCTION: + # JSON for production (easy to parse in log aggregators) + return json.dumps(data) + else: + # Pretty format for development + extras = " | ".join(f"{k}={v}" for k, v in kwargs.items()) + ctx = " | ".join(f"{k}={v}" for k, v in self._context.items()) + parts = [f"[{level}] {message}"] + if ctx: + parts.append(f"[ctx: {ctx}]") + if extras: + parts.append(extras) + return " ".join(parts) + + def _log(self, level: str, level_num: int, message: str, **kwargs): + """Internal log method""" + if level_num < self.level: + return + + formatted = self._format_message(level, message, **kwargs) + + # Use stderr for errors, stdout for rest + output = sys.stderr if level_num >= logging.ERROR else sys.stdout + print(formatted, file=output) + + def set_context(self, **kwargs): + """Set persistent context for all subsequent logs""" + self._context.update(kwargs) + + def clear_context(self): + """Clear all context""" + self._context = {} + + def debug(self, message: str, **kwargs): + self._log("DEBUG", logging.DEBUG, message, **kwargs) + + def info(self, message: str, **kwargs): + self._log("INFO", logging.INFO, message, **kwargs) + + def warning(self, message: str, **kwargs): + self._log("WARNING", logging.WARNING, message, **kwargs) + + def error(self, message: str, **kwargs): + self._log("ERROR", logging.ERROR, message, **kwargs) + + def critical(self, message: str, **kwargs): + self._log("CRITICAL", logging.CRITICAL, message, **kwargs) + + +# Global logger instance +logger = StructuredLogger() + + +# ============================================================================= +# SENTRY INTEGRATION HELPERS +# ============================================================================= + +def set_operation_context(operation: str, **kwargs): + """ + Set Sentry context for current operation. + + Args: + operation: Type of operation (indexing, search, analysis, etc.) + **kwargs: Additional context (repo_id, user_id, etc.) + """ + try: + import sentry_sdk + sentry_sdk.set_tag("operation", operation) + for key, value in kwargs.items(): + sentry_sdk.set_tag(key, str(value)) + sentry_sdk.set_context("operation_details", { + "type": operation, + **kwargs + }) + except ImportError: + pass + + +def add_breadcrumb(message: str, category: str = "custom", level: str = "info", **data): + """ + Add breadcrumb for Sentry error context. + + Breadcrumbs show the trail of events leading to an error. + """ + try: + import sentry_sdk + sentry_sdk.add_breadcrumb( + message=message, + category=category, + level=level, + data=data + ) + except ImportError: + pass + + +def capture_exception(error: Exception, **context): + """ + Capture exception with additional context. + + Args: + error: The exception to capture + **context: Additional context to attach + """ + try: + import sentry_sdk + with sentry_sdk.push_scope() as scope: + for key, value in context.items(): + scope.set_extra(key, value) + sentry_sdk.capture_exception(error) + + # Also log it + logger.error( + f"Exception captured: {type(error).__name__}: {str(error)}", + **context + ) + except ImportError: + logger.error(f"Exception: {error}", **context) + + +def capture_message(message: str, level: str = "info", **context): + """Capture a message (not exception) to Sentry""" + try: + import sentry_sdk + with sentry_sdk.push_scope() as scope: + for key, value in context.items(): + scope.set_extra(key, value) + sentry_sdk.capture_message(message, level=level) + except ImportError: + pass + + +# ============================================================================= +# PERFORMANCE TRACKING +# ============================================================================= + +@contextmanager +def track_time(operation: str, **tags): + """ + Context manager to track operation duration. + + Usage: + with track_time("embedding_batch", batch_size=100): + embeddings = await create_embeddings(texts) + + Logs duration and creates Sentry span if available. + """ + start = time.perf_counter() + + # Start Sentry span if available + span = None + try: + import sentry_sdk + span = sentry_sdk.start_span(op=operation, description=operation) + for key, value in tags.items(): + span.set_tag(key, str(value)) + except ImportError: + pass + + add_breadcrumb(f"Started: {operation}", category="performance", **tags) + + try: + yield + finally: + duration = time.perf_counter() - start + duration_ms = round(duration * 1000, 2) + + # Log completion + logger.debug(f"{operation} completed", duration_ms=duration_ms, **tags) + + # Finish Sentry span + if span: + span.finish() + + add_breadcrumb( + f"Completed: {operation}", + category="performance", + duration_ms=duration_ms, + **tags + ) + + +def trace_operation(operation: str): + """ + Decorator to trace an entire function/method. + + Usage: + @trace_operation("index_repository") + async def index_repository(repo_id: str): + ... + """ + def decorator(func): + @wraps(func) + async def async_wrapper(*args, **kwargs): + # Extract useful context from kwargs + context = {k: v for k, v in kwargs.items() + if k in ('repo_id', 'user_id', 'query', 'file_path')} + + set_operation_context(operation, **context) + add_breadcrumb(f"Starting {operation}", category="function", **context) + + start = time.perf_counter() + try: + result = await func(*args, **kwargs) + duration = time.perf_counter() - start + logger.info( + f"{operation} completed successfully", + duration_s=round(duration, 2), + **context + ) + return result + except Exception as e: + duration = time.perf_counter() - start + capture_exception(e, operation=operation, duration_s=round(duration, 2), **context) + raise + + @wraps(func) + def sync_wrapper(*args, **kwargs): + context = {k: v for k, v in kwargs.items() + if k in ('repo_id', 'user_id', 'query', 'file_path')} + + set_operation_context(operation, **context) + add_breadcrumb(f"Starting {operation}", category="function", **context) + + start = time.perf_counter() + try: + result = func(*args, **kwargs) + duration = time.perf_counter() - start + logger.info( + f"{operation} completed successfully", + duration_s=round(duration, 2), + **context + ) + return result + except Exception as e: + duration = time.perf_counter() - start + capture_exception(e, operation=operation, duration_s=round(duration, 2), **context) + raise + + # Return appropriate wrapper based on function type + import asyncio + if asyncio.iscoroutinefunction(func): + return async_wrapper + return sync_wrapper + + return decorator + + +# ============================================================================= +# SIMPLE METRICS (in-memory counters) +# ============================================================================= + +class Metrics: + """ + Simple in-memory metrics counters. + + Usage: + metrics.increment("search_requests", repo_id="abc") + metrics.timing("search_latency_ms", 150) + metrics.get_stats() # Returns all metrics + """ + + def __init__(self): + self._counters: Dict[str, int] = {} + self._timings: Dict[str, list] = {} + + def increment(self, name: str, value: int = 1, **tags): + """Increment a counter""" + key = f"{name}" + self._counters[key] = self._counters.get(key, 0) + value + + def timing(self, name: str, value_ms: float): + """Record a timing measurement""" + if name not in self._timings: + self._timings[name] = [] + self._timings[name].append(value_ms) + # Keep only last 1000 timings + if len(self._timings[name]) > 1000: + self._timings[name] = self._timings[name][-1000:] + + def get_stats(self) -> Dict: + """Get all metrics with basic stats""" + stats = { + "counters": self._counters.copy(), + "timings": {} + } + + for name, values in self._timings.items(): + if values: + stats["timings"][name] = { + "count": len(values), + "avg_ms": round(sum(values) / len(values), 2), + "min_ms": round(min(values), 2), + "max_ms": round(max(values), 2) + } + + return stats + + def reset(self): + """Reset all metrics""" + self._counters = {} + self._timings = {} + + +# Global metrics instance +metrics = Metrics() diff --git a/backend/services/performance_metrics.py b/backend/services/performance_metrics.py index 31516b8..77363ec 100644 --- a/backend/services/performance_metrics.py +++ b/backend/services/performance_metrics.py @@ -7,6 +7,8 @@ from collections import deque import time +from services.observability import logger + class PerformanceMetrics: """Track performance metrics for monitoring""" @@ -19,7 +21,7 @@ def __init__(self): self.cache_misses = 0 self.total_searches = 0 - print("✅ PerformanceMetrics initialized!") + logger.debug("PerformanceMetrics initialized") def record_indexing(self, repo_id: str, duration: float, function_count: int): """Record indexing performance""" diff --git a/backend/services/repo_manager.py b/backend/services/repo_manager.py index c846122..bcdd6b7 100644 --- a/backend/services/repo_manager.py +++ b/backend/services/repo_manager.py @@ -8,6 +8,7 @@ import git from pathlib import Path from services.supabase_service import get_supabase_service +from services.observability import logger, capture_exception, metrics class RepositoryManager: @@ -29,7 +30,7 @@ def _sync_existing_repos(self): if not self.repos_dir.exists(): return - print("🔄 Syncing repositories...") + logger.info("Syncing repositories from disk") for repo_path in self.repos_dir.iterdir(): if not repo_path.is_dir() or repo_path.name.startswith('.'): @@ -39,7 +40,7 @@ def _sync_existing_repos(self): # Check if already in DB existing = self.db.get_repository(repo_path.name) if existing: - print(f"✅ Repo exists in DB: {existing['name']}") + logger.debug("Repo exists in DB", name=existing['name']) continue # Try to open as git repo @@ -73,10 +74,10 @@ def _sync_existing_repos(self): file_count * 20 # Estimate function count ) - print(f"✅ Synced repo from disk: {name} ({repo_path.name})") + logger.info("Synced repo from disk", name=name, repo_id=repo_path.name) except Exception as e: - print(f"⚠️ Error syncing {repo_path.name}: {e}") + logger.warning("Error syncing repo", repo=repo_path.name, error=str(e)) def list_repos(self) -> List[dict]: """List all repositories from Supabase""" @@ -106,7 +107,8 @@ def add_repo(self, name: str, git_url: str, branch: str = "main", user_id: Optio try: # Clone the repository - print(f"Cloning {git_url} to {local_path}...") + logger.info("Cloning repository", git_url=git_url, local_path=str(local_path)) + metrics.increment("repos_cloned") git.Repo.clone_from(git_url, local_path, branch=branch, depth=1) # Create DB record with ownership diff --git a/backend/services/search_enhancer.py b/backend/services/search_enhancer.py index f2ec11a..fb25dff 100644 --- a/backend/services/search_enhancer.py +++ b/backend/services/search_enhancer.py @@ -8,6 +8,8 @@ from openai import AsyncOpenAI import os +from services.observability import logger, capture_exception + class SearchEnhancer: """Enhances search quality through various techniques""" @@ -56,7 +58,8 @@ async def expand_query(self, query: str) -> str: return f"{query} {expanded}" except Exception as e: - print(f"⚠️ Query expansion failed: {e}") + logger.warning("Query expansion failed", error=str(e), query=query[:50]) + capture_exception(e, operation="query_expansion", query=query[:50]) return query def extract_docstring(self, code: str, language: str) -> str: diff --git a/backend/services/sentry.py b/backend/services/sentry.py new file mode 100644 index 0000000..12508b2 --- /dev/null +++ b/backend/services/sentry.py @@ -0,0 +1,161 @@ +""" +Sentry Error Tracking Integration +Provides production error visibility and performance monitoring + +NOTE: This module initializes Sentry. For logging and tracing, +use the observability module: from services.observability import get_logger, trace_operation +""" +import os +from typing import Optional + + +def init_sentry() -> bool: + """ + Initialize Sentry SDK if SENTRY_DSN is configured. + + Returns: + bool: True if Sentry was initialized, False otherwise + """ + sentry_dsn = os.getenv("SENTRY_DSN") + + if not sentry_dsn: + print("ℹ️ Sentry DSN not configured - error tracking disabled") + return False + + try: + import sentry_sdk + from sentry_sdk.integrations.fastapi import FastApiIntegration + from sentry_sdk.integrations.starlette import StarletteIntegration + + environment = os.getenv("ENVIRONMENT", "development") + + sentry_sdk.init( + dsn=sentry_dsn, + environment=environment, + + # Performance monitoring - sample rate based on environment + traces_sample_rate=0.1 if environment == "production" else 1.0, + + # Profile sampled transactions + profiles_sample_rate=0.1 if environment == "production" else 1.0, + + # Send PII for debugging (user IDs, emails) + send_default_pii=True, + + # Integrations + integrations=[ + FastApiIntegration(transaction_style="endpoint"), + StarletteIntegration(transaction_style="endpoint"), + ], + + # Filter noisy events + before_send=_filter_events, + + # Debug mode for development + debug=environment == "development", + + # Attach stack traces to messages + attach_stacktrace=True, + + # Include local variables in stack traces + include_local_variables=True, + ) + + print(f"✅ Sentry initialized (environment: {environment})") + return True + + except ImportError: + print("⚠️ sentry-sdk not installed - error tracking disabled") + return False + except Exception as e: + print(f"⚠️ Failed to initialize Sentry: {e}") + return False + + +def _filter_events(event, hint): + """Filter out noisy events before sending to Sentry.""" + + # Don't send health check errors + request_url = event.get("request", {}).get("url", "") + if "/health" in request_url: + return None + + # Don't send 404s for common bot paths + exception_values = event.get("exception", {}).get("values", []) + if exception_values: + exception_value = str(exception_values[0].get("value", "")) + bot_paths = ["/wp-admin", "/wp-login", "/.env", "/config", "/admin", "/phpmyadmin", "/.git"] + if any(path in exception_value for path in bot_paths): + return None + + # Don't send validation errors (they're expected) + if exception_values: + exception_type = exception_values[0].get("type", "") + if exception_type in ("RequestValidationError", "ValidationError"): + return None + + return event + + +# ============================================================================ +# LEGACY FUNCTIONS - Use observability module for new code +# ============================================================================ + +def set_user_context(user_id: Optional[str] = None, email: Optional[str] = None): + """ + Set user context for error tracking. + + DEPRECATED: Use from services.observability import set_user_context + """ + try: + import sentry_sdk + sentry_sdk.set_user({"id": user_id, "email": email}) + except ImportError: + pass + + +def capture_exception(error: Exception, **extra_context): + """ + Manually capture an exception with additional context. + + DEPRECATED: Use from services.observability import capture_exception + """ + try: + import sentry_sdk + with sentry_sdk.push_scope() as scope: + for key, value in extra_context.items(): + scope.set_extra(key, value) + sentry_sdk.capture_exception(error) + except ImportError: + pass + + +def capture_message(message: str, level: str = "info", **extra_context): + """ + Capture a message (not an exception) for tracking. + + DEPRECATED: Use from services.observability import get_logger + """ + try: + import sentry_sdk + with sentry_sdk.push_scope() as scope: + for key, value in extra_context.items(): + scope.set_extra(key, value) + sentry_sdk.capture_message(message, level=level) + except ImportError: + pass + + +def set_operation_context(operation: str, **tags): + """ + Set operation context for the current scope. + + DEPRECATED: Use from services.observability import trace_operation + """ + try: + import sentry_sdk + sentry_sdk.set_tag("operation", operation) + for key, value in tags.items(): + sentry_sdk.set_tag(key, str(value)) + except ImportError: + pass diff --git a/backend/services/style_analyzer.py b/backend/services/style_analyzer.py index 9c7d904..28a584b 100644 --- a/backend/services/style_analyzer.py +++ b/backend/services/style_analyzer.py @@ -12,6 +12,8 @@ import tree_sitter_javascript as tsjavascript from tree_sitter import Language, Parser +from services.observability import logger + class StyleAnalyzer: """Analyze code style and team patterns""" @@ -23,7 +25,7 @@ def __init__(self): 'javascript': Parser(Language(tsjavascript.language())), 'typescript': Parser(Language(tsjavascript.language())), } - print("✅ StyleAnalyzer initialized!") + logger.debug("StyleAnalyzer initialized") def _detect_language(self, file_path: str) -> str: """Detect language from extension""" @@ -141,7 +143,7 @@ def analyze_repository_style(self, repo_path: str) -> Dict: """Analyze coding style patterns across repository""" repo_path = Path(repo_path) - print(f"🎨 Analyzing code style for repository...") + logger.info("Analyzing code style for repository") # Discover code files code_files = [] @@ -197,7 +199,7 @@ def analyze_repository_style(self, repo_path: str) -> Dict: language_dist[language] += 1 except Exception as e: - print(f"Error analyzing {file_path}: {e}") + logger.warning("Error analyzing file", file_path=str(file_path), error=str(e)) continue # Analyze naming conventions @@ -212,10 +214,10 @@ def analyze_repository_style(self, repo_path: str) -> Dict: total_functions = len(function_names) total_classes = len(class_names) - print(f"✅ Style analysis complete!") - print(f" • {total_files} files analyzed") - print(f" • {total_functions} functions found") - print(f" • {total_classes} classes found") + logger.info("Style analysis complete", + files_analyzed=total_files, + functions_found=total_functions, + classes_found=total_classes) return { "summary": { @@ -281,7 +283,7 @@ def save_to_cache(self, repo_id: str, style_data: Dict): db.upsert_code_style(repo_id, language, analysis) - print(f"✅ Cached code style analysis for {repo_id} in Supabase") + logger.debug("Cached code style analysis in Supabase", repo_id=repo_id) def load_from_cache(self, repo_id: str) -> Dict: """Load style analysis from Supabase cache""" @@ -314,7 +316,7 @@ def load_from_cache(self, repo_id: str) -> Dict: if style.get("patterns"): patterns = style["patterns"] - print(f"✅ Loaded cached code style for {repo_id} from Supabase") + logger.debug("Loaded cached code style from Supabase", repo_id=repo_id) return { "languages": languages, diff --git a/backend/services/supabase_service.py b/backend/services/supabase_service.py index 75ecd67..59aee91 100644 --- a/backend/services/supabase_service.py +++ b/backend/services/supabase_service.py @@ -9,6 +9,8 @@ from dotenv import load_dotenv import uuid +from services.observability import logger + load_dotenv() @@ -29,7 +31,7 @@ def __init__(self): persist_session=False ) self.client: Client = create_client(supabase_url, supabase_key, options) - print("✅ Supabase service initialized!") + logger.info("Supabase service initialized") # ===== REPOSITORIES ===== @@ -120,17 +122,16 @@ def upsert_file_dependencies(self, repo_id: str, dependencies: List[Dict]) -> No for dep in dependencies: dep["repo_id"] = repo_id - # Upsert with explicit conflict resolution result = self.client.table("file_dependencies").upsert( dependencies, on_conflict="repo_id,file_path" ).execute() - print(f"💾 Upserted {len(result.data) if result.data else 0} file dependencies") + logger.debug("Upserted file dependencies", count=len(result.data) if result.data else 0) def get_file_dependencies(self, repo_id: str) -> List[Dict]: """Get all file dependencies for a repo""" result = self.client.table("file_dependencies").select("*").eq("repo_id", repo_id).execute() - print(f"🔍 Query file_dependencies for {repo_id}: found {len(result.data) if result.data else 0} rows") + logger.debug("Queried file dependencies", repo_id=repo_id, count=len(result.data) if result.data else 0) return result.data or [] def get_file_impact(self, repo_id: str, file_path: str) -> Optional[Dict]: