diff --git a/backend/app/alembic/versions/5a59c6c29a82_add_error_message_column_in_collections_.py b/backend/app/alembic/versions/5a59c6c29a82_add_error_message_column_in_collections_.py new file mode 100644 index 000000000..6be001ce0 --- /dev/null +++ b/backend/app/alembic/versions/5a59c6c29a82_add_error_message_column_in_collections_.py @@ -0,0 +1,28 @@ +"""add error message column in collections table + +Revision ID: 5a59c6c29a82 +Revises: e9dd35eff62c +Create Date: 2025-08-11 15:40:40.127161 + +""" +from alembic import op +import sqlalchemy as sa +import sqlmodel.sql.sqltypes + + +# revision identifiers, used by Alembic. +revision = "5a59c6c29a82" +down_revision = "e9dd35eff62c" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column( + "collection", + sa.Column("error_message", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + ) + + +def downgrade(): + op.drop_column("collection", "error_message") diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py index 8682cb69f..2a5b49fbd 100644 --- a/backend/app/api/routes/collections.py +++ b/backend/app/api/routes/collections.py @@ -1,6 +1,9 @@ import inspect import logging import time +import json +import ast +import re from uuid import UUID, uuid4 from typing import Any, List, Optional from dataclasses import dataclass, field, fields, asdict, replace @@ -13,6 +16,7 @@ from app.api.deps import CurrentUser, SessionDep, CurrentUserOrgProject from app.core.cloud import AmazonCloudStorage +from app.api.routes.responses import handle_openai_error from app.core.util import now, post_callback from app.crud import ( DocumentCrud, @@ -28,6 +32,32 @@ router = APIRouter(prefix="/collections", tags=["collections"]) +def extract_error_message(err: Exception) -> str: + err_str = str(err).strip() + + body = re.sub(r"^Error code:\s*\d+\s*-\s*", "", err_str) + message = None + try: + payload = json.loads(body) + if isinstance(payload, dict): + message = payload.get("error", {}).get("message") + except Exception: + pass + + if message is None: + try: + payload = ast.literal_eval(body) + if isinstance(payload, dict): + message = payload.get("error", {}).get("message") + except Exception: + pass + + if not message: + message = body + + return message.strip()[:1000] + + @dataclass class ResponsePayload: status: str @@ -246,6 +276,9 @@ def do_create_collection( collection = collection_crud.read_one(UUID(payload.key)) collection.status = CollectionStatus.failed collection.updated_at = now() + message = extract_error_message(err) + collection.error_message = message + collection_crud._update(collection) except Exception as suberr: logger.warning( @@ -283,7 +316,6 @@ def create_collection( collection_crud = CollectionCrud(session, current_user.id) collection_crud.create(collection) - # 2. Launch background task background_tasks.add_task( do_create_collection, session, current_user, request, payload, client ) diff --git a/backend/app/crud/document.py b/backend/app/crud/document.py index f2f2e3a02..4d504c59a 100644 --- a/backend/app/crud/document.py +++ b/backend/app/crud/document.py @@ -81,13 +81,15 @@ def read_each(self, doc_ids: List[UUID]): ) results = self.session.exec(statement).all() - (m, n) = map(len, (results, doc_ids)) - if m != n: + (retrieved_count, requested_count) = map(len, (results, doc_ids)) + if retrieved_count != requested_count: try: - raise ValueError(f"Requested {n} retrieved {m}") + raise ValueError( + f"Requested atleast {requested_count} document retrieved {retrieved_count}" + ) except ValueError as err: logger.error( - f"[DocumentCrud.read_each] Mismatch in retrieved documents | {{'owner_id': {self.owner_id}, 'requested_count': {n}, 'retrieved_count': {m}}}", + f"[DocumentCrud.read_each] Mismatch in retrieved documents | {{'owner_id': {self.owner_id}, 'requested_count': {requested_count}, 'retrieved_count': {retrieved_count}}}", exc_info=True, ) raise diff --git a/backend/app/models/collection.py b/backend/app/models/collection.py index 965771a33..5b9119c6c 100644 --- a/backend/app/models/collection.py +++ b/backend/app/models/collection.py @@ -43,6 +43,7 @@ class Collection(SQLModel, table=True): llm_service_name: Optional[str] = Field(default=None, nullable=True) status: CollectionStatus = Field(default=CollectionStatus.processing) + error_message: Optional[str] = Field(default=None, nullable=True) created_at: datetime = Field(default_factory=now) updated_at: datetime = Field(default_factory=now)