Skip to content

Commit d6d88a0

Browse files
committed
fix: review round 4 -- type accuracy, PII enforcement, indexing counter
1. _timings type: Dict[str, list] -> Dict[str, Deque[float]] to match actual deque storage. Added Deque to typing imports. 2. set_user_context: email now only sent to Sentry when SENTRY_SEND_PII is explicitly enabled. Was bypassing the PII flag we added. 3. startup_checks: added SENTRY_SEND_PII and SENTRY_INCLUDE_LOCAL_VARS to OPTIONAL_VARS so missing config gets logged at startup. 4. get_metrics/get_stats: return type -> Dict[str, Any] for precision. 5. _total_indexing_ops: persistent counter replaces len(deque) which was capped at 100. Dashboard now shows true total. Skipped: lazy singleton (4th time), print->logger in init_sentry (runs before logger is ready at bootstrap). 284 tests pass.
1 parent b0acc9c commit d6d88a0

2 files changed

Lines changed: 15 additions & 7 deletions

File tree

backend/config/startup_checks.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
("COHERE_API_KEY", "Cohere API key for reranking", "Search reranking disabled"),
2424
("VOYAGE_API_KEY", "Voyage AI key for code embeddings", "Using OpenAI embeddings"),
2525
("SENTRY_DSN", "Sentry DSN for error tracking", "Error tracking disabled"),
26+
("SENTRY_SEND_PII", "Send user emails to Sentry", "PII disabled (privacy safe)"),
27+
("SENTRY_INCLUDE_LOCAL_VARS", "Include local vars in Sentry traces", "Local vars excluded"),
2628
("REDIS_HOST", "Redis host for caching", "Using default localhost"),
2729
]
2830

backend/services/observability.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ async def index_repo(repo_id: str):
2020
import time
2121
import logging
2222
import json
23-
from typing import Optional, Any, Dict
23+
from typing import Optional, Any, Dict, Deque
2424
from functools import wraps
2525
from contextlib import contextmanager
2626
from datetime import datetime, timezone
@@ -315,12 +315,13 @@ class Metrics:
315315

316316
def __init__(self) -> None:
317317
self._counters: Dict[str, int] = {}
318-
self._timings: Dict[str, list] = {}
318+
self._timings: Dict[str, Deque[float]] = {}
319319
self._gauges: Dict[str, float] = {}
320320
# Domain-specific tracking (replaces PerformanceMetrics)
321321
self._indexing_times: deque = deque(maxlen=100)
322322
self._search_times: deque = deque(maxlen=100)
323323
self._total_searches: int = 0
324+
self._total_indexing_ops: int = 0
324325

325326
def increment(self, name: str, value: int = 1, **tags) -> None:
326327
"""Increment a counter"""
@@ -345,6 +346,7 @@ def record_indexing(self, repo_id: str, duration: float, function_count: int) ->
345346
"speed": function_count / duration if duration > 0 else 0,
346347
"timestamp": datetime.now(timezone.utc).isoformat(),
347348
})
349+
self._total_indexing_ops += 1
348350

349351
def record_search(self, duration: float, cached: bool) -> None:
350352
"""Record search performance for dashboard metrics."""
@@ -357,22 +359,21 @@ def record_search(self, duration: float, cached: bool) -> None:
357359
# cache hit/miss counting handled by cache.py via metrics.increment()
358360
# to avoid double counting now that we're a single Metrics instance
359361

360-
def get_metrics(self) -> Dict:
362+
def get_metrics(self) -> Dict[str, Any]:
361363
"""Dashboard-friendly performance summary (used by /health and /metrics)."""
362364
indexing_speeds = [m["speed"] for m in self._indexing_times]
363365
search_durations = [m["duration"] for m in self._search_times]
364366
avg_indexing_speed = (
365367
sum(indexing_speeds) / len(indexing_speeds) if indexing_speeds else 0
366368
)
367-
# Cache hits/misses come from _counters (set by cache.py)
368369
cache_hits = self._counters.get("cache_hits", 0)
369370
cache_misses = self._counters.get("cache_misses", 0)
370371
cache_total = cache_hits + cache_misses
371372
cache_hit_rate = (cache_hits / cache_total * 100) if cache_total > 0 else 0
372373

373374
return {
374375
"indexing": {
375-
"total_operations": len(self._indexing_times),
376+
"total_operations": self._total_indexing_ops,
376377
"avg_speed_functions_per_sec": avg_indexing_speed,
377378
"max_speed": max(indexing_speeds) if indexing_speeds else 0,
378379
"min_speed": min(indexing_speeds) if indexing_speeds else 0,
@@ -398,7 +399,7 @@ def get_metrics(self) -> Dict:
398399
},
399400
}
400401

401-
def get_stats(self) -> Dict:
402+
def get_stats(self) -> Dict[str, Any]:
402403
"""Raw counters, timings, and gauges for internal debugging."""
403404
stats = {
404405
"counters": self._counters.copy(),
@@ -423,6 +424,7 @@ def reset(self) -> None:
423424
self._indexing_times.clear()
424425
self._search_times.clear()
425426
self._total_searches = 0
427+
self._total_indexing_ops = 0
426428

427429

428430
# SENTRY INITIALIZATION (moved from services/sentry.py)
@@ -497,7 +499,11 @@ def set_user_context(user_id: Optional[str] = None, email: Optional[str] = None)
497499
"""Set Sentry user context for error attribution."""
498500
try:
499501
import sentry_sdk
500-
sentry_sdk.set_user({"id": user_id, "email": email})
502+
user_data: Dict[str, Any] = {"id": user_id}
503+
# Only include email if PII opt-in is enabled
504+
if email and os.getenv("SENTRY_SEND_PII", "false").lower() in ("true", "1"):
505+
user_data["email"] = email
506+
sentry_sdk.set_user(user_data)
501507
except ImportError:
502508
pass
503509

0 commit comments

Comments
 (0)