Skip to content

Commit a4c8e11

Browse files
committed
fix(progress): report embedding progress (was showing 100% instantly)
Root cause: Progress was only reported during file extraction (~2s). Embedding generation takes ~35s with no progress updates. Fix: - Added functions_total to progress model - Percent now weighted: 20% file extraction, 80% embedding - Reports progress during embedding phase with function count - UI will now show gradual 0-100% over full indexing duration
1 parent dfe614a commit a4c8e11

3 files changed

Lines changed: 31 additions & 8 deletions

File tree

backend/routes/repos.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,8 @@ async def progress_callback(
292292
files_processed: int,
293293
functions_found: int,
294294
total_files: int,
295-
current_file: str = None
295+
current_file: str = None,
296+
functions_total: int = 0
296297
):
297298
nonlocal tracked_total_files
298299
tracked_total_files = total_files
@@ -301,15 +302,16 @@ async def progress_callback(
301302
"Publishing progress event",
302303
repo_id=repo_id,
303304
files=f"{files_processed}/{total_files}",
304-
functions=functions_found,
305+
functions=f"{functions_found}/{functions_total}" if functions_total else str(functions_found),
305306
file=current_file
306307
)
307308
publisher.publish_progress(
308309
repo_id,
309310
files_processed,
310311
total_files,
311312
functions_found,
312-
current_file
313+
current_file,
314+
functions_total
313315
)
314316

315317
total_functions = await indexer.index_repository_with_progress(

backend/services/indexer_optimized.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -768,12 +768,23 @@ async def index_repository_with_progress(
768768
]
769769

770770
all_embeddings = []
771+
total_to_embed = len(embedding_texts)
771772
for i in range(0, len(embedding_texts), self.EMBEDDING_BATCH_SIZE):
772773
batch_texts = embedding_texts[i:i + self.EMBEDDING_BATCH_SIZE]
773774
batch_embeddings = await self._create_embeddings_batch(batch_texts)
774775
all_embeddings.extend(batch_embeddings)
775776

776-
logger.debug("Embeddings generated", completed=len(all_embeddings), total=len(embedding_texts))
777+
# Report embedding progress - this is where most time is spent
778+
embedded_count = len(all_embeddings)
779+
await progress_callback(
780+
total_files, # Files done
781+
embedded_count, # Functions embedded so far
782+
total_files,
783+
f"Embedding functions ({embedded_count}/{total_to_embed})...",
784+
total_to_embed # Total functions to embed
785+
)
786+
787+
logger.debug("Embeddings generated", completed=embedded_count, total=total_to_embed)
777788

778789
# Prepare vectors for Pinecone
779790
logger.debug("Uploading to Pinecone")

backend/services/indexing_events.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,18 @@ class IndexingProgress:
3333
functions_found: int
3434
current_file: Optional[str] = None
3535
percent: int = 0
36+
functions_total: int = 0 # Total functions to embed (set during embedding phase)
3637

3738
def __post_init__(self):
38-
if self.files_total > 0:
39-
self.percent = int((self.files_processed / self.files_total) * 100)
39+
# If we have functions_total, we're in embedding phase (slow) - weight it 80%
40+
# File extraction is fast, weight it 20%
41+
if self.functions_total > 0 and self.files_total > 0:
42+
file_progress = (self.files_processed / self.files_total) * 20 # 0-20%
43+
embed_progress = (self.functions_found / self.functions_total) * 80 # 0-80%
44+
self.percent = int(file_progress + embed_progress)
45+
elif self.files_total > 0:
46+
# Still in file extraction phase (0-20%)
47+
self.percent = int((self.files_processed / self.files_total) * 20)
4048

4149

4250
@dataclass
@@ -118,14 +126,16 @@ def publish_progress(
118126
files_processed: int,
119127
files_total: int,
120128
functions_found: int,
121-
current_file: Optional[str] = None
129+
current_file: Optional[str] = None,
130+
functions_total: int = 0
122131
) -> bool:
123132
"""Publish indexing progress update."""
124133
progress = IndexingProgress(
125134
files_processed=files_processed,
126135
files_total=files_total,
127136
functions_found=functions_found,
128-
current_file=current_file
137+
current_file=current_file,
138+
functions_total=functions_total
129139
)
130140

131141
return self._publish(entity_id, {

0 commit comments

Comments
 (0)