Skip to content

Commit 84d7cee

Browse files
committed
refactor: consolidate sentry.py + performance_metrics.py into observability.py
Three separate observability systems merged into one: BEFORE: services/sentry.py (175 lines) -- Sentry init, user context, error capture services/performance_metrics.py (79 lines) -- search/indexing stats services/observability.py (366 lines) -- logger, metrics, tracing Two 'metrics' objects with naming collision. 4 duplicate functions across sentry.py and observability.py. 3 files still importing deprecated functions from sentry.py. AFTER: services/observability.py (532 lines) -- everything, one import path Single import: from services.observability import logger, metrics, ... Changes: - Metrics class now includes record_search(), record_indexing(), get_metrics() (absorbed from PerformanceMetrics) - Added init_sentry(), _filter_events(), set_user_context(), capture_http_exception() (moved from sentry.py) - Updated 7 files to import from observability instead of sentry - dependencies.py: removed PerformanceMetrics, uses observability.metrics - Deleted services/sentry.py (175 lines) - Deleted services/performance_metrics.py (79 lines) Net: -92 lines, 2 files deleted, 1 naming collision eliminated. 284 tests pass. Closes OPE-87
1 parent 89375c1 commit 84d7cee

9 files changed

Lines changed: 191 additions & 283 deletions

File tree

backend/dependencies.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
from services.cache import CacheService
1010
from services.dependency_analyzer import DependencyAnalyzer
1111
from services.style_analyzer import StyleAnalyzer
12-
from services.performance_metrics import PerformanceMetrics
1312
from services.dna_extractor import DNAExtractor
1413
from services.rate_limiter import RateLimiter, APIKeyManager
1514
from services.supabase_service import get_supabase_service
1615
from services.input_validator import InputValidator, CostController
1716
from services.user_limits import init_user_limits_service, get_user_limits_service
1817
from services.repo_validator import get_repo_validator
18+
from services.observability import metrics
1919

2020
# Service instances (singleton pattern)
2121
indexer = OptimizedCodeIndexer()
@@ -24,7 +24,6 @@
2424
dependency_analyzer = DependencyAnalyzer()
2525
style_analyzer = StyleAnalyzer()
2626
dna_extractor = DNAExtractor()
27-
metrics = PerformanceMetrics()
2827

2928
# Rate limiting and API key management
3029
rate_limiter = RateLimiter(redis_client=cache.redis if cache.redis else None)

backend/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import os
1616

1717
# Initialize Sentry FIRST (before other imports to catch all errors)
18-
from services.sentry import init_sentry
18+
from services.observability import init_sentry
1919
init_sentry()
2020

2121
# Import API config (single source of truth for versioning)
@@ -146,7 +146,7 @@ async def generic_exception_handler(request: Request, exc: Exception):
146146
Catch-all handler for unhandled exceptions.
147147
Captures to Sentry and returns 500.
148148
"""
149-
from services.sentry import capture_http_exception
149+
from services.observability import capture_http_exception
150150
capture_http_exception(request, exc, 500)
151151

152152
return JSONResponse(

backend/middleware/auth.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,15 @@ def _authenticate(token: str) -> AuthContext:
117117
ctx = _validate_jwt(token)
118118
if ctx:
119119
# Set Sentry user context for error tracking
120-
from services.sentry import set_user_context
120+
from services.observability import set_user_context
121121
set_user_context(user_id=ctx.user_id, email=ctx.email)
122122
return ctx
123123

124124
# Try API key
125125
ctx = _validate_api_key(token)
126126
if ctx:
127127
# Set Sentry user context for error tracking
128-
from services.sentry import set_user_context
128+
from services.observability import set_user_context
129129
set_user_context(user_id=ctx.user_id or ctx.api_key_name)
130130
return ctx
131131

backend/services/observability.py

Lines changed: 183 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""
22
Observability Module
3-
Centralized logging, tracing, and metrics for CodeIntel
3+
Centralized logging, tracing, metrics, and error tracking for CodeIntel
44
5-
Usage:
6-
from services.observability import logger, trace_operation, track_time
5+
Single import for all observability needs:
6+
from services.observability import logger, metrics, capture_exception, track_time
77
88
logger.info("Starting indexing", repo_id="abc", files=100)
9+
metrics.record_search(duration, cached=True)
910
1011
@trace_operation("indexing")
1112
async def index_repo(repo_id: str):
@@ -23,6 +24,7 @@ async def index_repo(repo_id: str):
2324
from functools import wraps
2425
from contextlib import contextmanager
2526
from datetime import datetime
27+
from collections import deque
2628

2729
# Environment
2830
ENVIRONMENT = os.getenv("ENVIRONMENT", "development")
@@ -301,66 +303,230 @@ def sync_wrapper(*args, **kwargs):
301303
return decorator
302304

303305

304-
# SIMPLE METRICS (in-memory counters)
306+
# METRICS (unified counters + performance tracking)
305307

306308
class Metrics:
307309
"""
308-
Simple in-memory metrics counters.
310+
Unified metrics: generic counters/timings/gauges plus
311+
domain-specific search and indexing performance tracking.
309312
310313
Usage:
311-
metrics.increment("search_requests", repo_id="abc")
314+
metrics.increment("search_requests")
312315
metrics.timing("search_latency_ms", 150)
313-
metrics.get_stats() # Returns all metrics
316+
metrics.record_search(duration, cached=True)
317+
metrics.record_indexing(repo_id, duration, function_count)
318+
metrics.get_metrics() # dashboard-friendly summary
319+
metrics.get_stats() # raw counters/timings/gauges
314320
"""
315321

316322
def __init__(self):
317323
self._counters: Dict[str, int] = {}
318324
self._timings: Dict[str, list] = {}
319325
self._gauges: Dict[str, float] = {}
326+
# Domain-specific tracking (replaces PerformanceMetrics)
327+
self._indexing_times: deque = deque(maxlen=100)
328+
self._search_times: deque = deque(maxlen=100)
329+
self._total_searches: int = 0
330+
self._cache_hits: int = 0
331+
self._cache_misses: int = 0
320332

321333
def increment(self, name: str, value: int = 1, **tags):
322334
"""Increment a counter"""
323-
key = f"{name}"
324-
self._counters[key] = self._counters.get(key, 0) + value
335+
self._counters[name] = self._counters.get(name, 0) + value
325336

326337
def timing(self, name: str, value_ms: float):
327338
"""Record a timing measurement"""
328339
if name not in self._timings:
329340
self._timings[name] = []
330341
self._timings[name].append(value_ms)
331-
# Keep only last 1000 timings
332342
if len(self._timings[name]) > 1000:
333343
self._timings[name] = self._timings[name][-1000:]
334344

335345
def gauge(self, name: str, value: float):
336-
"""Record a point-in-time value (like avg score, current queue size)"""
346+
"""Record a point-in-time value"""
337347
self._gauges[name] = value
338348

349+
def record_indexing(self, repo_id: str, duration: float, function_count: int):
350+
"""Record indexing performance for dashboard metrics."""
351+
self._indexing_times.append({
352+
"repo_id": repo_id,
353+
"duration": duration,
354+
"function_count": function_count,
355+
"speed": function_count / duration if duration > 0 else 0,
356+
"timestamp": datetime.now().isoformat(),
357+
})
358+
359+
def record_search(self, duration: float, cached: bool):
360+
"""Record search performance for dashboard metrics."""
361+
self._search_times.append({
362+
"duration": duration,
363+
"cached": cached,
364+
"timestamp": datetime.now().isoformat(),
365+
})
366+
self._total_searches += 1
367+
if cached:
368+
self._cache_hits += 1
369+
else:
370+
self._cache_misses += 1
371+
372+
def get_metrics(self) -> Dict:
373+
"""Dashboard-friendly performance summary (used by /health and /metrics)."""
374+
indexing_speeds = [m["speed"] for m in self._indexing_times]
375+
search_durations = [m["duration"] for m in self._search_times]
376+
cache_hit_rate = (
377+
(self._cache_hits / self._total_searches * 100)
378+
if self._total_searches > 0 else 0
379+
)
380+
381+
return {
382+
"indexing": {
383+
"total_operations": len(self._indexing_times),
384+
"avg_speed_functions_per_sec": (
385+
sum(indexing_speeds) / len(indexing_speeds)
386+
if indexing_speeds else 0
387+
),
388+
"max_speed": max(indexing_speeds) if indexing_speeds else 0,
389+
"min_speed": min(indexing_speeds) if indexing_speeds else 0,
390+
"recent_operations": list(self._indexing_times)[-10:],
391+
},
392+
"search": {
393+
"total_searches": self._total_searches,
394+
"cache_hit_rate": f"{cache_hit_rate:.1f}%",
395+
"cache_hits": self._cache_hits,
396+
"cache_misses": self._cache_misses,
397+
"avg_duration_ms": (
398+
sum(search_durations) / len(search_durations) * 1000
399+
if search_durations else 0
400+
),
401+
"recent_searches": list(self._search_times)[-10:],
402+
},
403+
"summary": {
404+
"health": "healthy",
405+
"cache_working": cache_hit_rate > 0,
406+
"indexing_performance": (
407+
"good" if (
408+
sum(indexing_speeds) / len(indexing_speeds)
409+
if indexing_speeds else 0
410+
) > 10 else "needs_improvement"
411+
),
412+
},
413+
}
414+
339415
def get_stats(self) -> Dict:
340-
"""Get all metrics with basic stats"""
416+
"""Raw counters, timings, and gauges for internal debugging."""
341417
stats = {
342418
"counters": self._counters.copy(),
343419
"gauges": self._gauges.copy(),
344-
"timings": {}
420+
"timings": {},
345421
}
346-
347422
for name, values in self._timings.items():
348423
if values:
349424
stats["timings"][name] = {
350425
"count": len(values),
351426
"avg_ms": round(sum(values) / len(values), 2),
352427
"min_ms": round(min(values), 2),
353-
"max_ms": round(max(values), 2)
428+
"max_ms": round(max(values), 2),
354429
}
355-
356430
return stats
357431

358432
def reset(self):
359433
"""Reset all metrics"""
360434
self._counters = {}
361435
self._timings = {}
362436
self._gauges = {}
437+
self._indexing_times.clear()
438+
self._search_times.clear()
439+
self._total_searches = 0
440+
self._cache_hits = 0
441+
self._cache_misses = 0
442+
443+
444+
# SENTRY INITIALIZATION (moved from services/sentry.py)
445+
446+
def init_sentry() -> bool:
447+
"""Initialize Sentry SDK if SENTRY_DSN is configured."""
448+
sentry_dsn = os.getenv("SENTRY_DSN")
449+
450+
if not sentry_dsn:
451+
print("[INFO] Sentry DSN not configured - error tracking disabled")
452+
return False
453+
454+
try:
455+
import sentry_sdk
456+
from sentry_sdk.integrations.fastapi import FastApiIntegration
457+
from sentry_sdk.integrations.starlette import StarletteIntegration
458+
459+
environment = os.getenv("ENVIRONMENT", "development")
460+
461+
sentry_sdk.init(
462+
dsn=sentry_dsn,
463+
environment=environment,
464+
traces_sample_rate=0.1 if environment == "production" else 1.0,
465+
profiles_sample_rate=0.1 if environment == "production" else 1.0,
466+
send_default_pii=True,
467+
integrations=[
468+
FastApiIntegration(transaction_style="endpoint"),
469+
StarletteIntegration(transaction_style="endpoint"),
470+
],
471+
before_send=_filter_events,
472+
debug=environment == "development",
473+
attach_stacktrace=True,
474+
include_local_variables=True,
475+
)
476+
477+
print(f"[OK] Sentry initialized (environment: {environment})")
478+
return True
479+
480+
except ImportError:
481+
print("[WARN] sentry-sdk not installed - error tracking disabled")
482+
return False
483+
except Exception as e:
484+
print(f"[WARN] Failed to initialize Sentry: {e}")
485+
return False
486+
487+
488+
def _filter_events(event, hint):
489+
"""Filter out noisy events before sending to Sentry."""
490+
request_url = event.get("request", {}).get("url", "")
491+
if "/health" in request_url:
492+
return None
493+
494+
exception_values = event.get("exception", {}).get("values", [])
495+
if exception_values:
496+
exception_value = str(exception_values[0].get("value", ""))
497+
bot_paths = ["/wp-admin", "/wp-login", "/.env", "/config", "/admin", "/phpmyadmin", "/.git"]
498+
if any(path in exception_value for path in bot_paths):
499+
return None
500+
501+
if exception_values:
502+
exception_type = exception_values[0].get("type", "")
503+
if exception_type in ("RequestValidationError", "ValidationError"):
504+
return None
505+
506+
return event
507+
508+
509+
def set_user_context(user_id: Optional[str] = None, email: Optional[str] = None):
510+
"""Set Sentry user context for error attribution."""
511+
try:
512+
import sentry_sdk
513+
sentry_sdk.set_user({"id": user_id, "email": email})
514+
except ImportError:
515+
pass
516+
517+
518+
def capture_http_exception(request, exc: Exception, status_code: int):
519+
"""Capture HTTP exception with request context for Sentry."""
520+
try:
521+
import sentry_sdk
522+
with sentry_sdk.push_scope() as scope:
523+
scope.set_extra("status_code", status_code)
524+
scope.set_extra("path", str(request.url.path))
525+
scope.set_extra("method", request.method)
526+
sentry_sdk.capture_exception(exc)
527+
except ImportError:
528+
pass
363529

364530

365-
# Global metrics instance
531+
# Global instances
366532
metrics = Metrics()

0 commit comments

Comments
 (0)