Skip to content

Commit 85f229f

Browse files
committed
fix(indexing): WebSocket race condition, NaN stats, file streaming
Fixes: - Add delay before WS close to ensure client receives completion event - Align stat field names between backend and frontend - Add current_file to progress events for streaming display - Preserve completed state when WebSocket disconnects Root cause: Backend closed WebSocket immediately after sending completion event, causing race condition where frontend's onclose fired before onmessage could process the completion. Closes #154
1 parent d2f4527 commit 85f229f

6 files changed

Lines changed: 50 additions & 28 deletions

File tree

backend/routes/ws_playground.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ async def websocket_playground_index(websocket: WebSocket, job_id: str):
152152
job_id=job_id[:12],
153153
event_type=event_type
154154
)
155+
# Small delay to ensure client processes message before close
156+
# This prevents race condition where onclose fires before onmessage
157+
await asyncio.sleep(0.2)
155158
break
156159

157160
except json.JSONDecodeError:

backend/services/anonymous_indexer.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def to_dict(self) -> dict:
4343
@dataclass
4444
class JobStats:
4545
"""Final stats for completed job."""
46-
files_indexed: int = 0
47-
functions_found: int = 0
48-
time_taken_seconds: float = 0
46+
files_processed: int = 0
47+
functions_indexed: int = 0
48+
indexing_time_seconds: float = 0
4949

5050
def to_dict(self) -> dict:
5151
return asdict(self)
@@ -345,12 +345,18 @@ async def run_indexing_job(
345345
job_manager.update_status(job_id, JobStatus.PROCESSING)
346346

347347
# Progress callback for real-time updates
348-
async def progress_callback(files_processed: int, functions_found: int, total: int):
348+
async def progress_callback(
349+
files_processed: int,
350+
functions_found: int,
351+
total: int,
352+
current_file: Optional[str] = None
353+
):
349354
job_manager.update_progress(
350355
job_id,
351356
files_processed=files_processed,
352357
functions_found=functions_found,
353-
files_total=total
358+
files_total=total,
359+
current_file=current_file
354360
)
355361

356362
# Run indexing with timeout
@@ -370,9 +376,9 @@ async def progress_callback(files_processed: int, functions_found: int, total: i
370376
# --- Step 3: Mark complete ---
371377
elapsed = time.time() - start_time
372378
stats = JobStats(
373-
files_indexed=file_count,
374-
functions_found=total_functions,
375-
time_taken_seconds=round(elapsed, 2)
379+
files_processed=file_count,
380+
functions_indexed=total_functions,
381+
indexing_time_seconds=round(elapsed, 2)
376382
)
377383

378384
job_manager.update_status(

backend/services/indexer_optimized.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -672,8 +672,11 @@ async def index_repository_with_progress(
672672

673673
files_processed = min(i + self.FILE_BATCH_SIZE, total_files)
674674

675-
# Send progress update
676-
await progress_callback(files_processed, len(all_functions_data), total_files)
675+
# Get the last file in this batch for display
676+
current_file = batch[-1].name if batch else None
677+
678+
# Send progress update with current file
679+
await progress_callback(files_processed, len(all_functions_data), total_files, current_file)
677680

678681
logger.debug("Processing files",
679682
processed=files_processed,

backend/tests/test_anonymous_indexing.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,9 @@ def test_update_status_completed_with_stats(self, job_manager, mock_redis):
199199
})
200200

201201
stats = JobStats(
202-
files_indexed=100,
203-
functions_found=500,
204-
time_taken_seconds=45.5
202+
files_processed=100,
203+
functions_indexed=500,
204+
indexing_time_seconds=45.5
205205
)
206206

207207
result = job_manager.update_status(
@@ -259,13 +259,13 @@ def test_job_progress_none_excluded(self):
259259
def test_job_stats_to_dict(self):
260260
"""JobStats converts to dict correctly."""
261261
stats = JobStats(
262-
files_indexed=100,
263-
functions_found=500,
264-
time_taken_seconds=45.5
262+
files_processed=100,
263+
functions_indexed=500,
264+
indexing_time_seconds=45.5
265265
)
266266
d = stats.to_dict()
267-
assert d["files_indexed"] == 100
268-
assert d["time_taken_seconds"] == 45.5
267+
assert d["files_processed"] == 100
268+
assert d["indexing_time_seconds"] == 45.5
269269

270270

271271
# =============================================================================
@@ -608,9 +608,9 @@ def test_completed_job_returns_repo_id(self, mock_job_class, client):
608608
"created_at": "2024-01-01T00:00:00Z",
609609
"updated_at": "2024-01-01T00:01:00Z",
610610
"stats": {
611-
"files_indexed": 100,
612-
"functions_found": 500,
613-
"time_taken_seconds": 45.2
611+
"files_processed": 100,
612+
"functions_indexed": 500,
613+
"indexing_time_seconds": 45.2
614614
}
615615
}
616616
mock_job_class.return_value = mock_job_manager
@@ -621,7 +621,7 @@ def test_completed_job_returns_repo_id(self, mock_job_class, client):
621621
data = response.json()
622622
assert data["status"] == "completed"
623623
assert data["repo_id"] == "anon_idx_test123456"
624-
assert data["stats"]["files_indexed"] == 100
624+
assert data["stats"]["files_processed"] == 100
625625

626626
@patch('routes.playground.AnonymousIndexingJob')
627627
def test_failed_job_returns_error(self, mock_job_class, client):

backend/tests/test_ws_playground.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def test_websocket_handles_already_completed_job(self):
6666
"job_id": "idx_test123",
6767
"status": "completed",
6868
"repo_id": "anon_test123",
69-
"stats": {"files_indexed": 100, "functions_found": 500}
69+
"stats": {"files_processed": 100, "functions_indexed": 500}
7070
})
7171

7272
with patch('routes.ws_playground.redis_client', mock_redis):
@@ -172,9 +172,9 @@ def test_completed_event_includes_stats(self):
172172
job_manager = AnonymousIndexingJob(mock_redis)
173173

174174
stats = JobStats(
175-
files_indexed=100,
176-
functions_found=500,
177-
time_taken_seconds=45.2
175+
files_processed=100,
176+
functions_indexed=500,
177+
indexing_time_seconds=45.2
178178
)
179179

180180
job_manager.update_status(
@@ -189,7 +189,7 @@ def test_completed_event_includes_stats(self):
189189

190190
assert event_data["type"] == "completed"
191191
assert event_data["repo_id"] == "anon_test123"
192-
assert event_data["stats"]["functions_found"] == 500
192+
assert event_data["stats"]["functions_indexed"] == 500
193193

194194
def test_processing_status_skips_duplicate_publish(self):
195195
"""PROCESSING status should not publish (handled by update_progress)."""

frontend/src/hooks/useIndexingWebSocket.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,18 @@ export function useIndexingWebSocket(
248248
if (jobId) {
249249
connect(jobId);
250250
} else {
251+
// Only cleanup connection, DON'T reset state!
252+
// This preserves completedStats when jobId becomes null after completion
251253
cleanup();
252-
setState(INITIAL_STATE);
254+
// Only reset if we were never completed (e.g., user navigated away during indexing)
255+
setState(prev => {
256+
if (prev.phase === 'completed') {
257+
// Keep completed state - just disconnect
258+
return { ...prev, connectionState: 'disconnected' };
259+
}
260+
// Reset if we were mid-indexing (user cancelled, navigated away, etc.)
261+
return INITIAL_STATE;
262+
});
253263
}
254264
return cleanup;
255265
}, [jobId, connect, cleanup]);

0 commit comments

Comments
 (0)