From d0de1ce2867b88a3cfae686e2bfff841c89d3e76 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Tue, 5 May 2026 11:34:06 +0530 Subject: [PATCH 1/8] v1.0 readiness database cleanups --- .../versions/055_v1_query_optimization.py | 198 ++++++++++++++++++ .../versions/056_drop_redundant_indexes.py | 98 +++++++++ backend/app/api/deps.py | 2 +- backend/app/core/security.py | 2 +- backend/app/crud/api_key.py | 5 +- backend/app/crud/assistants.py | 5 +- backend/app/crud/auth.py | 2 +- .../crud/document/doc_transformation_job.py | 4 +- backend/app/crud/document/document.py | 7 +- backend/app/crud/fine_tuning.py | 56 ++--- backend/app/crud/model_evaluation.py | 4 +- backend/app/crud/openai_conversation.py | 11 +- backend/app/crud/thread_results.py | 10 +- backend/app/models/__init__.py | 4 +- backend/app/models/api_key.py | 6 - backend/app/models/assistants.py | 6 - backend/app/models/document.py | 4 - backend/app/models/fine_tuning.py | 10 +- backend/app/models/model_evaluation.py | 9 +- backend/app/models/openai_conversation.py | 6 - backend/app/models/project.py | 4 +- backend/app/models/threads.py | 4 +- backend/app/seed_data/seed_data.json | 2 - backend/app/seed_data/seed_data.py | 2 - .../documents/test_route_document_remove.py | 2 +- .../app/tests/api/routes/test_fine_tuning.py | 4 +- backend/app/tests/api/routes/test_threads.py | 14 +- backend/app/tests/core/test_security.py | 3 +- .../collection/test_crud_collection_delete.py | 2 +- .../documents/test_crud_document_delete.py | 2 +- .../documents/test_crud_document_read_many.py | 2 +- .../documents/test_crud_document_update.py | 2 +- .../documents/test_doc_transformation_job.py | 7 +- backend/app/tests/crud/test_api_key.py | 4 +- backend/app/tests/crud/test_assistants.py | 5 +- backend/app/tests/crud/test_fine_tuning.py | 2 +- backend/app/tests/crud/test_onboarding.py | 2 +- .../tests/crud/test_openai_conversation.py | 4 +- backend/app/tests/seed_data/seed_data.json | 2 - backend/app/tests/seed_data/seed_data.py | 2 - backend/app/tests/utils/auth.py | 2 +- backend/app/tests/utils/document.py | 1 - backend/app/tests/utils/test_data.py | 6 +- backend/app/tests/utils/utils.py | 2 +- 44 files changed, 390 insertions(+), 141 deletions(-) create mode 100644 backend/app/alembic/versions/055_v1_query_optimization.py create mode 100644 backend/app/alembic/versions/056_drop_redundant_indexes.py diff --git a/backend/app/alembic/versions/055_v1_query_optimization.py b/backend/app/alembic/versions/055_v1_query_optimization.py new file mode 100644 index 000000000..ce4821b70 --- /dev/null +++ b/backend/app/alembic/versions/055_v1_query_optimization.py @@ -0,0 +1,198 @@ +"""v1.0 query optimization: project_id + composite indexes, drop is_deleted + +Revision ID: 055 +Revises: 054 +Create Date: 2026-05-05 12:00:00.000000 + +Bundles three coordinated changes for v1.0 lock: + +1. Single-column `project_id` btree indexes on every table-mapped model + that filters by project_id (the dominant tenant filter). + organization_id-only access is rare and intentionally deferred. + Tables already covered by a leading-column index are skipped: + - openai_assistant: UNIQUE(project_id, assistant_id) leads with project_id + - batch_job: ix_batch_job_project_id (migration 036) + +2. Composite + partial indexes for hot list/pagination paths matching: + WHERE project_id = ? [AND deleted_at IS NULL] ORDER BY DESC + +3. Drop the redundant `is_deleted` boolean from every table that also + carries `deleted_at`. `deleted_at IS NULL` becomes the single source + of truth for soft-delete: same query cost when paired with a partial + index, preserves audit timestamp, no dual-write drift. + Affected tables: openai_assistant, apikey, document, + openai_conversation, fine_tuning, model_evaluation. + +Execution model: + Phase A (transactional): backfill deleted_at where is_deleted was true + but deleted_at was never set, then drop the is_deleted columns. + Phase B (autocommit_block): CREATE INDEX CONCURRENTLY for every index + so no AccessExclusiveLock is taken on hot tables. +""" + +import sqlalchemy as sa +from alembic import op + + +revision = "055" +down_revision = "054" +branch_labels = None +depends_on = None + + +# Tables that currently carry both `is_deleted` and `deleted_at`. +IS_DELETED_TABLES = [ + "openai_assistant", + "apikey", + "document", + "openai_conversation", + "fine_tuning", + "model_evaluation", +] + + +# Single-column FK / multi-tenant filter indexes (P0). +# (table_name, column_name, index_name) +FK_INDEXES: list[tuple[str, str, str]] = [ + # project_id across tables that filter by tenant + ("apikey", "project_id", "ix_apikey_project_id"), + ("credential", "project_id", "ix_credential_project_id"), + ("collection", "project_id", "ix_collection_project_id"), + ("collection_jobs", "project_id", "ix_collection_jobs_project_id"), + ("document", "project_id", "ix_document_project_id"), + ("evaluation_dataset", "project_id", "ix_evaluation_dataset_project_id"), + ("evaluation_run", "project_id", "ix_evaluation_run_project_id"), + ("file", "project_id", "ix_file_project_id"), + ("fine_tuning", "project_id", "ix_fine_tuning_project_id"), + ("job", "project_id", "ix_job_project_id"), + ("llm_call", "project_id", "ix_llm_call_project_id"), + ("llm_chain", "project_id", "ix_llm_chain_project_id"), + ("model_evaluation", "project_id", "ix_model_evaluation_project_id"), + ("openai_conversation", "project_id", "ix_openai_conversation_project_id"), + ("stt_result", "project_id", "ix_stt_result_project_id"), + ("stt_sample", "project_id", "ix_stt_sample_project_id"), + ("tts_result", "project_id", "ix_tts_result_project_id"), + ("user_project", "project_id", "ix_user_project_project_id"), + # Other un-indexed FKs surfaced by the audit + ("apikey", "user_id", "ix_apikey_user_id"), + ("collection_jobs", "collection_id", "ix_collection_jobs_collection_id"), + ( + "doc_transformation_job", + "source_document_id", + "ix_doc_transformation_job_source_document_id", + ), + ( + "doc_transformation_job", + "transformed_document_id", + "ix_doc_transformation_job_transformed_document_id", + ), + ("evaluation_run", "dataset_id", "ix_evaluation_run_dataset_id"), +] + + +# Composite + partial indexes (P1). (index_name, body_after_INDEX_NAME) +COMPOSITE_INDEXES: list[tuple[str, str]] = [ + ( + "ix_document_project_inserted_at_active", + 'ON "document" ("project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL', + ), + ( + "ix_openai_conversation_project_inserted_at_active", + 'ON "openai_conversation" ("project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL', + ), + ( + "ix_openai_conversation_ancestor_project_inserted_at_active", + 'ON "openai_conversation" ("ancestor_response_id", "project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL', + ), + ( + "ix_openai_conversation_response_project_active", + 'ON "openai_conversation" ("response_id", "project_id") WHERE "deleted_at" IS NULL', + ), + ( + "ix_collection_jobs_project_status_inserted_at", + 'ON "collection_jobs" ("project_id", "status", "inserted_at" DESC)', + ), + ( + "ix_evaluation_run_org_project_type_inserted_at", + 'ON "evaluation_run" ("organization_id", "project_id", "type", "inserted_at" DESC)', + ), + ( + "ix_evaluation_dataset_org_project_type_inserted_at", + 'ON "evaluation_dataset" ("organization_id", "project_id", "type", "inserted_at" DESC)', + ), + ( + "ix_llm_call_job_created_at_active", + 'ON "llm_call" ("job_id", "created_at" DESC) WHERE "deleted_at" IS NULL', + ), + ( + "ix_model_evaluation_document_project_updated_at", + 'ON "model_evaluation" ("document_id", "project_id", "updated_at" DESC) WHERE "deleted_at" IS NULL', + ), + ( + "ix_model_config_active_provider_name", + 'ON "global"."model_config" ("is_active", "provider", "model_name")', + ), + ( + "ix_collection_project_active", + 'ON "collection" ("project_id") WHERE "deleted_at" IS NULL', + ), + # Composite FK indexes that match the actual query shape + ( + "ix_fine_tuning_document_project", + 'ON "fine_tuning" ("document_id", "project_id")', + ), + ( + "ix_model_evaluation_fine_tuning_project", + 'ON "model_evaluation" ("fine_tuning_id", "project_id")', + ), + # Partial index for active-key listing on apikey + ( + "ix_apikey_project_active", + 'ON "apikey" ("project_id") WHERE "deleted_at" IS NULL', + ), +] + + +def upgrade(): + # Phase A (transactional): preserve audit timestamp, drop redundant column. + for table in IS_DELETED_TABLES: + op.execute( + f"UPDATE {table} " + f"SET deleted_at = NOW() " + f"WHERE is_deleted = TRUE AND deleted_at IS NULL" + ) + op.drop_column(table, "is_deleted") + + # Phase B (autocommit): CONCURRENTLY index creation. Each statement + # runs in its own implicit transaction, required by the CONCURRENTLY + # variant. + with op.get_context().autocommit_block(): + for table, column, index in FK_INDEXES: + op.execute( + f'CREATE INDEX CONCURRENTLY IF NOT EXISTS "{index}" ' + f'ON "{table}" ("{column}")' + ) + for index, body in COMPOSITE_INDEXES: + op.execute(f'CREATE INDEX CONCURRENTLY IF NOT EXISTS "{index}" {body}') + + +def downgrade(): + with op.get_context().autocommit_block(): + for index, body in COMPOSITE_INDEXES: + schema_qualified = '"global".' if '"global"."model_config"' in body else "" + op.execute(f'DROP INDEX CONCURRENTLY IF EXISTS {schema_qualified}"{index}"') + for _table, _column, index in FK_INDEXES: + op.execute(f'DROP INDEX CONCURRENTLY IF EXISTS "{index}"') + + for table in IS_DELETED_TABLES: + op.add_column( + table, + sa.Column( + "is_deleted", + sa.Boolean(), + nullable=False, + server_default=sa.text("false"), + comment="Soft delete flag", + ), + ) + op.execute(f"UPDATE {table} SET is_deleted = TRUE WHERE deleted_at IS NOT NULL") diff --git a/backend/app/alembic/versions/056_drop_redundant_indexes.py b/backend/app/alembic/versions/056_drop_redundant_indexes.py new file mode 100644 index 000000000..fbe76be1a --- /dev/null +++ b/backend/app/alembic/versions/056_drop_redundant_indexes.py @@ -0,0 +1,98 @@ +"""drop redundant indexes superseded by 055 composites + +Revision ID: 056 +Revises: 055 +Create Date: 2026-05-05 14:00:00.000000 + +Drops indexes that are now redundant after migration 055 added the +real composite/partial indexes that match actual query shapes: + + ix_project_name + Subsumed by uq_project_name_org_id (name is leading column). + No code path queries Project.name without organization_id. + + ix_credential_provider + Subsumed by uq_credential_org_project_provider. All four CRUD + paths in crud/credentials.py filter (org, project, provider) — never + provider alone. + + ix_openai_conversation_previous_response_id + Zero query consumers; previous_response_id is read but never + filtered on in any WHERE clause. + + ix_openai_conversation_response_id + Superseded by ix_openai_conversation_response_project_active + (project-scoped partial), which exactly matches CRUD predicates + in crud/openai_conversation.py:get_conversation_by_response_id. + + ix_openai_conversation_ancestor_response_id + Superseded by + ix_openai_conversation_ancestor_project_inserted_at_active, which + matches the (ancestor_response_id, project_id) + ORDER BY shape + used in crud/openai_conversation.py:get_conversation_by_ancestor_id + and the /responses thread reconstruction path. + + idx_file_type + Low cardinality (4 enum values) and the only consumer in + crud/file.py:147 always pairs file_type with (organization_id, + project_id). idx_file_org_project covers the query; an extra + in-memory filter on file_type is cheaper than a second index hit. + + idx_eval_run_status_org / idx_eval_run_status_project + Both lead with low-cardinality status. Real CRUD queries lead with + (organization_id, project_id, type), now covered by + ix_evaluation_run_org_project_type_inserted_at. + +Uses DROP INDEX CONCURRENTLY so no AccessExclusiveLock is taken. +Downgrade recreates the original indexes (also concurrently) so the +schema can be restored bit-for-bit if needed. +""" + +from alembic import op + + +revision = "056" +down_revision = "055" +branch_labels = None +depends_on = None + + +# (index_name, recreate_sql_body) +# recreate_sql_body is "ON \"\" ()" used by downgrade only. +INDEXES_TO_DROP: list[tuple[str, str]] = [ + ("ix_project_name", 'ON "project" ("name")'), + ("ix_credential_provider", 'ON "credential" ("provider")'), + ( + "ix_openai_conversation_previous_response_id", + 'ON "openai_conversation" ("previous_response_id")', + ), + ( + "ix_openai_conversation_response_id", + 'ON "openai_conversation" ("response_id")', + ), + ( + "ix_openai_conversation_ancestor_response_id", + 'ON "openai_conversation" ("ancestor_response_id")', + ), + ("idx_file_type", 'ON "file" ("file_type")'), + ( + "idx_eval_run_status_org", + 'ON "evaluation_run" ("status", "organization_id")', + ), + ( + "idx_eval_run_status_project", + 'ON "evaluation_run" ("status", "project_id")', + ), +] + + +def upgrade(): + with op.get_context().autocommit_block(): + for index_name, _body in INDEXES_TO_DROP: + op.execute(f'DROP INDEX CONCURRENTLY IF EXISTS "{index_name}"') + + +def downgrade(): + with op.get_context().autocommit_block(): + for index_name, body in INDEXES_TO_DROP: + op.execute(f'CREATE INDEX CONCURRENTLY IF NOT EXISTS "{index_name}" {body}') diff --git a/backend/app/api/deps.py b/backend/app/api/deps.py index 526c5877b..eb7dd9d9a 100644 --- a/backend/app/api/deps.py +++ b/backend/app/api/deps.py @@ -117,7 +117,7 @@ def _authenticate_with_jwt(session: Session, token: str) -> AuthContext: and_( APIKey.user_id == user.id, APIKey.project_id == project.id, - APIKey.is_deleted.is_(False), + APIKey.deleted_at.is_(None), ) ) .limit(1) diff --git a/backend/app/core/security.py b/backend/app/core/security.py index ef2db7396..27481a256 100644 --- a/backend/app/core/security.py +++ b/backend/app/core/security.py @@ -317,7 +317,7 @@ def verify(cls, session: Session, raw_key: str) -> AuthContext | None: .where( and_( APIKey.key_prefix == key_prefix, - APIKey.is_deleted.is_(False), + APIKey.deleted_at.is_(None), ) ) .join(User, User.id == APIKey.user_id) diff --git a/backend/app/crud/api_key.py b/backend/app/crud/api_key.py index 374b496e9..23c0f583e 100644 --- a/backend/app/crud/api_key.py +++ b/backend/app/crud/api_key.py @@ -30,7 +30,7 @@ def read_one(self, key_id: UUID) -> APIKey | None: and_( APIKey.id == key_id, APIKey.project_id == self.project_id, - APIKey.is_deleted.is_(False), + APIKey.deleted_at.is_(None), ) ) return self.session.exec(statement).one_or_none() @@ -44,7 +44,7 @@ def read_all(self, skip: int = 0, limit: int = 100) -> list[APIKey]: .where( and_( APIKey.project_id == self.project_id, - APIKey.is_deleted.is_(False), + APIKey.deleted_at.is_(None), ) ) .offset(skip) @@ -105,7 +105,6 @@ def delete(self, key_id: UUID) -> None: if not api_key: raise HTTPException(status_code=404, detail="API Key not found") - api_key.is_deleted = True api_key.deleted_at = now() api_key.updated_at = now() self.session.add(api_key) diff --git a/backend/app/crud/assistants.py b/backend/app/crud/assistants.py index c32f820da..950ef62b7 100644 --- a/backend/app/crud/assistants.py +++ b/backend/app/crud/assistants.py @@ -24,7 +24,7 @@ def get_assistant_by_id( and_( Assistant.assistant_id == assistant_id, Assistant.project_id == project_id, - Assistant.is_deleted == False, + Assistant.deleted_at.is_(None), ) ) return session.exec(statement).first() @@ -43,7 +43,7 @@ def get_assistants_by_project( select(Assistant) .where( Assistant.project_id == project_id, - Assistant.is_deleted == False, + Assistant.deleted_at.is_(None), ) .offset(skip) .limit(limit) @@ -272,7 +272,6 @@ def delete_assistant( ) raise HTTPException(status_code=404, detail="Assistant not found.") - existing_assistant.is_deleted = True existing_assistant.deleted_at = now() session.add(existing_assistant) session.commit() diff --git a/backend/app/crud/auth.py b/backend/app/crud/auth.py index 39147b86e..27e703fe9 100644 --- a/backend/app/crud/auth.py +++ b/backend/app/crud/auth.py @@ -41,7 +41,7 @@ def get_user_accessible_projects(*, session: Session, user_id: int) -> list[dict .where( and_( APIKey.user_id == user_id, - APIKey.is_deleted.is_(False), + APIKey.deleted_at.is_(None), Organization.is_active.is_(True), Project.is_active.is_(True), ) diff --git a/backend/app/crud/document/doc_transformation_job.py b/backend/app/crud/document/doc_transformation_job.py index 0fd278013..3e329df70 100644 --- a/backend/app/crud/document/doc_transformation_job.py +++ b/backend/app/crud/document/doc_transformation_job.py @@ -41,7 +41,7 @@ def read_one(self, job_id: UUID) -> DocTransformationJob: and_( DocTransformationJob.id == job_id, Document.project_id == self.project_id, - Document.is_deleted.is_(False), + Document.deleted_at.is_(None), ) ) ) @@ -62,7 +62,7 @@ def read_each(self, job_ids: set[UUID]) -> list[DocTransformationJob]: and_( DocTransformationJob.id.in_(list(job_ids)), Document.project_id == self.project_id, - Document.is_deleted.is_(False), + Document.deleted_at.is_(None), ) ) ) diff --git a/backend/app/crud/document/document.py b/backend/app/crud/document/document.py index 35e4d86fb..fbadea255 100644 --- a/backend/app/crud/document/document.py +++ b/backend/app/crud/document/document.py @@ -20,7 +20,7 @@ def read_one(self, doc_id: UUID) -> Document: and_( Document.id == doc_id, Document.project_id == self.project_id, - Document.is_deleted.is_(False), + Document.deleted_at.is_(None), ) ) @@ -39,7 +39,7 @@ def read_many( limit: int | None = None, ) -> tuple[list[Document], bool]: statement = select(Document).where( - and_(Document.project_id == self.project_id, Document.is_deleted.is_(False)) + and_(Document.project_id == self.project_id, Document.deleted_at.is_(None)) ) statement = statement.order_by(Document.inserted_at.desc()) @@ -81,7 +81,7 @@ def read_each(self, doc_ids: list[UUID]): and_( Document.project_id == self.project_id, Document.id.in_(doc_ids), - Document.is_deleted.is_(False), + Document.deleted_at.is_(None), ) ) results = self.session.exec(statement).all() @@ -130,7 +130,6 @@ def update(self, document: Document): def delete(self, doc_id: UUID): document = self.read_one(doc_id) - document.is_deleted = True document.deleted_at = now() document.updated_at = now() diff --git a/backend/app/crud/fine_tuning.py b/backend/app/crud/fine_tuning.py index 61a0ccc6a..8d9e5fed8 100644 --- a/backend/app/crud/fine_tuning.py +++ b/backend/app/crud/fine_tuning.py @@ -7,7 +7,7 @@ from app.core.util import now from app.models import ( - Fine_Tuning, + FineTuning, FineTuningJobCreate, FineTuningUpdate, FineTuningStatus, @@ -24,7 +24,7 @@ def create_fine_tuning_job( status: FineTuningStatus = FineTuningStatus.pending, project_id: int = None, organization_id: int = None, -) -> tuple[Fine_Tuning, bool]: +) -> tuple[FineTuning, bool]: active_jobs = fetch_active_jobs_by_document_id( session=session, document_id=request.document_id, @@ -53,7 +53,7 @@ def create_fine_tuning_job( "status": status, } - fine_tune = Fine_Tuning(**base_data) + fine_tune = FineTuning(**base_data) fine_tune.updated_at = now() session.add(fine_tune) @@ -68,11 +68,11 @@ def create_fine_tuning_job( def fetch_by_provider_job_id( session: Session, provider_job_id: str, project_id: int -) -> Fine_Tuning: +) -> FineTuning: job = session.exec( - select(Fine_Tuning).where( - Fine_Tuning.provider_job_id == provider_job_id, - Fine_Tuning.project_id == project_id, + select(FineTuning).where( + FineTuning.provider_job_id == provider_job_id, + FineTuning.project_id == project_id, ) ).one_or_none() @@ -85,10 +85,10 @@ def fetch_by_provider_job_id( return job -def fetch_by_id(session: Session, job_id: int, project_id: int) -> Fine_Tuning: +def fetch_by_id(session: Session, job_id: int, project_id: int) -> FineTuning: job = session.exec( - select(Fine_Tuning).where( - Fine_Tuning.id == job_id, Fine_Tuning.project_id == project_id + select(FineTuning).where( + FineTuning.id == job_id, FineTuning.project_id == project_id ) ).one_or_none() @@ -110,15 +110,15 @@ def fetch_by_document_id( project_id: int, split_ratio: float = None, base_model: Optional[str] = None, -) -> list[Fine_Tuning]: - query = select(Fine_Tuning).where( - Fine_Tuning.document_id == document_id, Fine_Tuning.project_id == project_id +) -> list[FineTuning]: + query = select(FineTuning).where( + FineTuning.document_id == document_id, FineTuning.project_id == project_id ) if split_ratio is not None: - query = query.where(Fine_Tuning.split_ratio == split_ratio) + query = query.where(FineTuning.split_ratio == split_ratio) if base_model is not None: - query = query.where(Fine_Tuning.base_model == base_model) + query = query.where(FineTuning.base_model == base_model) jobs = session.exec(query).all() logger.info( @@ -134,39 +134,39 @@ def fetch_active_jobs_by_document_id( split_ratio: Optional[float] = None, base_model: Optional[str] = None, exclude_job_id: Optional[int] = None, -) -> list["Fine_Tuning"]: +) -> list["FineTuning"]: """ Return all ACTIVE jobs for the given document & project. - Active = status != failed AND is_deleted is false. + Active = status != failed AND not soft-deleted. """ stmt = ( - select(Fine_Tuning) + select(FineTuning) .where( - Fine_Tuning.document_id == document_id, - Fine_Tuning.project_id == project_id, - Fine_Tuning.is_deleted.is_(False), - Fine_Tuning.status != FineTuningStatus.failed, + FineTuning.document_id == document_id, + FineTuning.project_id == project_id, + FineTuning.deleted_at.is_(None), + FineTuning.status != FineTuningStatus.failed, ) - .order_by(Fine_Tuning.inserted_at.desc()) + .order_by(FineTuning.inserted_at.desc()) ) if split_ratio is not None: - stmt = stmt.where(Fine_Tuning.split_ratio == split_ratio) + stmt = stmt.where(FineTuning.split_ratio == split_ratio) if base_model is not None: - stmt = stmt.where(Fine_Tuning.base_model == base_model) + stmt = stmt.where(FineTuning.base_model == base_model) if exclude_job_id is not None: - stmt = stmt.where(Fine_Tuning.id != exclude_job_id) + stmt = stmt.where(FineTuning.id != exclude_job_id) return session.exec(stmt).all() def update_finetune_job( session: Session, - job: Fine_Tuning, + job: FineTuning, update: FineTuningUpdate, -) -> Fine_Tuning: +) -> FineTuning: for key, value in update.model_dump(exclude_unset=True).items(): setattr(job, key, value) diff --git a/backend/app/crud/model_evaluation.py b/backend/app/crud/model_evaluation.py index 51fa7a486..10cfbe667 100644 --- a/backend/app/crud/model_evaluation.py +++ b/backend/app/crud/model_evaluation.py @@ -154,14 +154,14 @@ def fetch_active_model_evals( ) -> list["ModelEvaluation"]: """ Return all ACTIVE model evaluations for the given document & project. - Active = status != failed AND is_deleted is false. + Active = status != failed AND not soft-deleted. """ stmt = ( select(ModelEvaluation) .where( ModelEvaluation.fine_tuning_id == fine_tuning_id, ModelEvaluation.project_id == project_id, - ModelEvaluation.is_deleted.is_(False), + ModelEvaluation.deleted_at.is_(None), ModelEvaluation.status != "failed", ) .order_by(ModelEvaluation.inserted_at.desc()) diff --git a/backend/app/crud/openai_conversation.py b/backend/app/crud/openai_conversation.py index 7ef127b47..83f3316c4 100644 --- a/backend/app/crud/openai_conversation.py +++ b/backend/app/crud/openai_conversation.py @@ -17,7 +17,7 @@ def get_conversation_by_id( statement = select(OpenAIConversation).where( OpenAIConversation.id == conversation_id, OpenAIConversation.project_id == project_id, - OpenAIConversation.is_deleted == False, + OpenAIConversation.deleted_at.is_(None), ) result = session.exec(statement).first() return result @@ -32,7 +32,7 @@ def get_conversation_by_response_id( statement = select(OpenAIConversation).where( OpenAIConversation.response_id == response_id, OpenAIConversation.project_id == project_id, - OpenAIConversation.is_deleted == False, + OpenAIConversation.deleted_at.is_(None), ) result = session.exec(statement).first() return result @@ -49,7 +49,7 @@ def get_conversation_by_ancestor_id( .where( OpenAIConversation.ancestor_response_id == ancestor_response_id, OpenAIConversation.project_id == project_id, - OpenAIConversation.is_deleted == False, + OpenAIConversation.deleted_at.is_(None), ) .order_by(OpenAIConversation.inserted_at.desc()) .limit(1) @@ -108,7 +108,7 @@ def get_conversations_count_by_project( """ statement = select(func.count(OpenAIConversation.id)).where( OpenAIConversation.project_id == project_id, - OpenAIConversation.is_deleted == False, + OpenAIConversation.deleted_at.is_(None), ) result = session.exec(statement).one() return result @@ -127,7 +127,7 @@ def get_conversations_by_project( select(OpenAIConversation) .where( OpenAIConversation.project_id == project_id, - OpenAIConversation.is_deleted == False, + OpenAIConversation.deleted_at.is_(None), ) .order_by(OpenAIConversation.inserted_at.desc()) .offset(skip) @@ -175,7 +175,6 @@ def delete_conversation( if not db_conversation: return None - db_conversation.is_deleted = True db_conversation.deleted_at = now() session.add(db_conversation) session.commit() diff --git a/backend/app/crud/thread_results.py b/backend/app/crud/thread_results.py index 7a2691ff2..3f9c05c5a 100644 --- a/backend/app/crud/thread_results.py +++ b/backend/app/crud/thread_results.py @@ -1,14 +1,14 @@ import logging from sqlmodel import Session, select from datetime import datetime -from app.models import OpenAIThreadCreate, OpenAI_Thread +from app.models import OpenAIThreadCreate, OpenAIThread from app.utils import mask_string logger = logging.getLogger(__name__) def upsert_thread_result(session: Session, data: OpenAIThreadCreate): - statement = select(OpenAI_Thread).where(OpenAI_Thread.thread_id == data.thread_id) + statement = select(OpenAIThread).where(OpenAIThread.thread_id == data.thread_id) existing = session.exec(statement).first() if existing: @@ -21,7 +21,7 @@ def upsert_thread_result(session: Session, data: OpenAIThreadCreate): f"[upsert_thread_result] Updated existing thread result in the db with ID: {mask_string(data.thread_id)}" ) else: - new_thread = OpenAI_Thread(**data.dict()) + new_thread = OpenAIThread(**data.dict()) session.add(new_thread) logger.info( f"[upsert_thread_result] Created new thread result in the db with ID: {mask_string(new_thread.thread_id)}" @@ -29,6 +29,6 @@ def upsert_thread_result(session: Session, data: OpenAIThreadCreate): session.commit() -def get_thread_result(session: Session, thread_id: str) -> OpenAI_Thread | None: - statement = select(OpenAI_Thread).where(OpenAI_Thread.thread_id == thread_id) +def get_thread_result(session: Session, thread_id: str) -> OpenAIThread | None: + statement = select(OpenAIThread).where(OpenAIThread.thread_id == thread_id) return session.exec(statement).first() diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index 05f39032e..888c1c891 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -91,7 +91,7 @@ ) from .file import AudioUploadResponse, File, FilePublic, FileType from .fine_tuning import ( - Fine_Tuning, + FineTuning, FineTuningJobBase, FineTuningJobCreate, FineTuningJobPublic, @@ -161,7 +161,7 @@ ResponsesAPIRequest, ResponsesSyncAPIRequest, ) -from .threads import OpenAI_Thread, OpenAIThreadBase, OpenAIThreadCreate +from .threads import OpenAIThread, OpenAIThreadBase, OpenAIThreadCreate from .user import ( NewPassword, UpdatePassword, diff --git a/backend/app/models/api_key.py b/backend/app/models/api_key.py index e8bd6c1b0..5780a8029 100644 --- a/backend/app/models/api_key.py +++ b/backend/app/models/api_key.py @@ -73,12 +73,6 @@ class APIKey(APIKeyBase, table=True): nullable=False, sa_column_kwargs={"comment": "Bcrypt hash of the secret of the API key"}, ) - is_deleted: bool = Field( - default=False, - nullable=False, - sa_column_kwargs={"comment": "Soft delete flag"}, - ) - # Timestamps inserted_at: datetime = Field( default_factory=now, diff --git a/backend/app/models/assistants.py b/backend/app/models/assistants.py index bb9b3318b..bdd5c4293 100644 --- a/backend/app/models/assistants.py +++ b/backend/app/models/assistants.py @@ -74,12 +74,6 @@ class Assistant(AssistantBase, table=True): "comment": "Parameter that controls maximum number of results to return" }, ) - is_deleted: bool = Field( - default=False, - nullable=False, - sa_column_kwargs={"comment": "Soft delete flag"}, - ) - # Foreign keys project_id: int = Field( foreign_key="project.id", diff --git a/backend/app/models/document.py b/backend/app/models/document.py index 12843e72a..c0aa6c8b6 100644 --- a/backend/app/models/document.py +++ b/backend/app/models/document.py @@ -37,10 +37,6 @@ class Document(DocumentBase, table=True): object_store_url: str = Field( sa_column_kwargs={"comment": "Cloud storage URL for the document"}, ) - is_deleted: bool = Field( - default=False, - sa_column_kwargs={"comment": "Soft delete flag"}, - ) file_size_kb: float | None = Field( default=None, description="The size of the document in kilobytes", diff --git a/backend/app/models/fine_tuning.py b/backend/app/models/fine_tuning.py index d16576fdb..8283caef8 100644 --- a/backend/app/models/fine_tuning.py +++ b/backend/app/models/fine_tuning.py @@ -52,9 +52,11 @@ def check_prompt(cls, v): return v.strip() -class Fine_Tuning(FineTuningJobBase, table=True): +class FineTuning(FineTuningJobBase, table=True): """Database model for tracking fine-tuning jobs.""" + __tablename__ = "fine_tuning" + id: int = Field( primary_key=True, sa_column_kwargs={"comment": "Unique identifier for the fine-tuning job"}, @@ -107,12 +109,6 @@ class Fine_Tuning(FineTuningJobBase, table=True): description="Error message for when something failed", sa_column_kwargs={"comment": "Error message if the job failed"}, ) - is_deleted: bool = Field( - default=False, - nullable=False, - sa_column_kwargs={"comment": "Soft delete flag"}, - ) - # Foreign keys document_id: UUID = Field( foreign_key="document.id", diff --git a/backend/app/models/model_evaluation.py b/backend/app/models/model_evaluation.py index 3dbadb1b3..4ae80fc4c 100644 --- a/backend/app/models/model_evaluation.py +++ b/backend/app/models/model_evaluation.py @@ -89,13 +89,6 @@ class ModelEvaluation(ModelEvaluationBase, table=True): description="Error message if evaluation failed", sa_column_kwargs={"comment": "Error message if evaluation failed"}, ) - is_deleted: bool = Field( - default=False, - nullable=False, - description="Soft delete flag", - sa_column_kwargs={"comment": "Soft delete flag"}, - ) - # Foreign keys fine_tuning_id: int = Field( foreign_key="fine_tuning.id", @@ -140,7 +133,7 @@ class ModelEvaluation(ModelEvaluationBase, table=True): # Relationships project: Project = Relationship() - fine_tuning: "Fine_Tuning" = Relationship(back_populates="model_evaluation") + fine_tuning: "FineTuning" = Relationship(back_populates="model_evaluation") class ModelEvaluationUpdate(SQLModel): diff --git a/backend/app/models/openai_conversation.py b/backend/app/models/openai_conversation.py index c319f9de7..297c8fea0 100644 --- a/backend/app/models/openai_conversation.py +++ b/backend/app/models/openai_conversation.py @@ -91,12 +91,6 @@ class OpenAIConversation(OpenAIConversationBase, table=True): primary_key=True, sa_column_kwargs={"comment": "Unique identifier for the conversation record"}, ) - is_deleted: bool = Field( - default=False, - nullable=False, - sa_column_kwargs={"comment": "Soft delete flag"}, - ) - # Timestamps inserted_at: datetime = Field( default_factory=now, diff --git a/backend/app/models/project.py b/backend/app/models/project.py index 66111d0cd..03b8d0f46 100644 --- a/backend/app/models/project.py +++ b/backend/app/models/project.py @@ -10,7 +10,7 @@ from .assistants import Assistant from .collection import Collection from .credentials import Credential - from .fine_tuning import Fine_Tuning + from .fine_tuning import FineTuning from .openai_conversation import OpenAIConversation from .organization import Organization @@ -99,7 +99,7 @@ class Project(ProjectBase, table=True): collections: list["Collection"] = Relationship( back_populates="project", cascade_delete=True ) - fine_tuning: list["Fine_Tuning"] = Relationship( + fine_tuning: list["FineTuning"] = Relationship( back_populates="project", cascade_delete=True ) openai_conversations: list["OpenAIConversation"] = Relationship( diff --git a/backend/app/models/threads.py b/backend/app/models/threads.py index 2753f72cb..d4dff77e9 100644 --- a/backend/app/models/threads.py +++ b/backend/app/models/threads.py @@ -17,9 +17,11 @@ class OpenAIThreadCreate(OpenAIThreadBase): pass # Used for requests, no `id` or timestamps -class OpenAI_Thread(OpenAIThreadBase, table=True): +class OpenAIThread(OpenAIThreadBase, table=True): """Stores OpenAI thread interactions and their responses.""" + __tablename__ = "openai_thread" + id: int = Field( default=None, primary_key=True, diff --git a/backend/app/seed_data/seed_data.json b/backend/app/seed_data/seed_data.json index bfaa929f1..ef0cbcd4a 100644 --- a/backend/app/seed_data/seed_data.json +++ b/backend/app/seed_data/seed_data.json @@ -42,7 +42,6 @@ "user_email": "{{SUPERUSER_EMAIL}}", "project_name": "Glific", "api_key": "ApiKey No3x47A5qoIGhm0kVKjQ77dhCqEdWRIQZlEPzzzh7i8", - "is_deleted": false, "deleted_at": null }, { @@ -50,7 +49,6 @@ "user_email": "{{ADMIN_EMAIL}}", "project_name": "Dalgo", "api_key": "ApiKey Px8y47B6roJHin1lWLkR88eiDrFdXSJRZmFQazzai8j", - "is_deleted": false, "deleted_at": null } ]} diff --git a/backend/app/seed_data/seed_data.py b/backend/app/seed_data/seed_data.py index 8a50279e1..324cac795 100644 --- a/backend/app/seed_data/seed_data.py +++ b/backend/app/seed_data/seed_data.py @@ -42,7 +42,6 @@ class APIKeyData(BaseModel): project_name: str user_email: EmailStr api_key: str - is_deleted: bool deleted_at: Optional[str] = None @@ -167,7 +166,6 @@ def create_api_key(session: Session, api_key_data_raw: dict[str, Any]) -> APIKey user_id=user.id, key_prefix=key_prefix, key_hash=key_hash, - is_deleted=api_key_data.is_deleted, deleted_at=api_key_data.deleted_at, ) session.add(api_key) diff --git a/backend/app/tests/api/routes/documents/test_route_document_remove.py b/backend/app/tests/api/routes/documents/test_route_document_remove.py index 2519de0a0..b99bd5640 100644 --- a/backend/app/tests/api/routes/documents/test_route_document_remove.py +++ b/backend/app/tests/api/routes/documents/test_route_document_remove.py @@ -63,7 +63,7 @@ def test_item_is_soft_removed( statement = select(Document).where(Document.id == document.id) result = db.exec(statement).one() - assert result.is_deleted is True + assert result.deleted_at is not None @openai_responses.mock() @patch("app.api.routes.documents.get_openai_client") diff --git a/backend/app/tests/api/routes/test_fine_tuning.py b/backend/app/tests/api/routes/test_fine_tuning.py index 1696f4505..5654ce4f3 100644 --- a/backend/app/tests/api/routes/test_fine_tuning.py +++ b/backend/app/tests/api/routes/test_fine_tuning.py @@ -11,7 +11,7 @@ from app.tests.utils.test_data import create_test_fine_tuning_jobs from app.tests.utils.utils import get_document from app.models import ( - Fine_Tuning, + FineTuning, FineTuningStatus, ModelEvaluation, ModelEvaluationStatus, @@ -105,7 +105,7 @@ def test_finetune_from_csv_multiple_split_ratio( # Verify that the background task was called for each split ratio assert mock_process_job.call_count == 3 - jobs = db.query(Fine_Tuning).all() + jobs = db.query(FineTuning).all() assert len(jobs) == 3 for job in jobs: diff --git a/backend/app/tests/api/routes/test_threads.py b/backend/app/tests/api/routes/test_threads.py index 9a1f297a6..56284de44 100644 --- a/backend/app/tests/api/routes/test_threads.py +++ b/backend/app/tests/api/routes/test_threads.py @@ -14,7 +14,7 @@ handle_openai_error, poll_run_and_prepare_response, ) -from app.models import OpenAI_Thread +from app.models import OpenAIThread from app.crud import get_thread_result from app.core.langfuse.langfuse import LangfuseTracer @@ -457,8 +457,8 @@ def test_poll_run_and_prepare_response_openai_error_handling( poll_run_and_prepare_response(request, mock_client, db) # Since thread_id is not the primary key, use select query - statement = select(OpenAI_Thread).where( - OpenAI_Thread.thread_id == "test_openai_error" + statement = select(OpenAIThread).where( + OpenAIThread.thread_id == "test_openai_error" ) result = db.exec(statement).first() @@ -488,8 +488,8 @@ def test_poll_run_and_prepare_response_non_completed( poll_run_and_prepare_response(request, mock_client, db) # thread_id is not the primary key, so we query using SELECT - statement = select(OpenAI_Thread).where( - OpenAI_Thread.thread_id == "test_non_complete" + statement = select(OpenAIThread).where( + OpenAIThread.thread_id == "test_non_complete" ) result = db.exec(statement).first() @@ -537,7 +537,7 @@ def test_threads_result_endpoint_success(client, db, user_api_key_header): question = "Capital of France?" message = "Paris." - db.add(OpenAI_Thread(thread_id=thread_id, prompt=question, response=message)) + db.add(OpenAIThread(thread_id=thread_id, prompt=question, response=message)) db.commit() response = client.get( @@ -557,7 +557,7 @@ def test_threads_result_endpoint_processing(client, db, user_api_key_header): thread_id = f"test_processing_{uuid.uuid4()}" question = "What is Glific?" - db.add(OpenAI_Thread(thread_id=thread_id, prompt=question, response=None)) + db.add(OpenAIThread(thread_id=thread_id, prompt=question, response=None)) db.commit() response = client.get( diff --git a/backend/app/tests/core/test_security.py b/backend/app/tests/core/test_security.py index 438bf8b05..daf4bc0bc 100644 --- a/backend/app/tests/core/test_security.py +++ b/backend/app/tests/core/test_security.py @@ -11,6 +11,7 @@ get_encryption_key, APIKeyManager, ) +from app.core.util import now from app.models import APIKey, User, Organization, Project, AuthContext from app.tests.utils.test_data import create_test_api_key @@ -157,7 +158,7 @@ def test_verify_deleted_key(self, db: Session): raw_key = api_key_response.key api_key = db.get(APIKey, api_key_response.id) - api_key.is_deleted = True + api_key.deleted_at = now() db.commit() auth_context = APIKeyManager.verify(db, raw_key) diff --git a/backend/app/tests/crud/collections/collection/test_crud_collection_delete.py b/backend/app/tests/crud/collections/collection/test_crud_collection_delete.py index 5cf4643d6..c0751fe72 100644 --- a/backend/app/tests/crud/collections/collection/test_crud_collection_delete.py +++ b/backend/app/tests/crud/collections/collection/test_crud_collection_delete.py @@ -69,7 +69,7 @@ def test_delete_document_deletes_collections(self, db: Session) -> None: documents = store.fill(1) stmt = select(APIKey).where( - APIKey.project_id == project.id, APIKey.is_deleted == False + APIKey.project_id == project.id, APIKey.deleted_at.is_(None) ) api_key = db.exec(stmt).first() diff --git a/backend/app/tests/crud/documents/documents/test_crud_document_delete.py b/backend/app/tests/crud/documents/documents/test_crud_document_delete.py index 212c513ed..83da6e2b5 100644 --- a/backend/app/tests/crud/documents/documents/test_crud_document_delete.py +++ b/backend/app/tests/crud/documents/documents/test_crud_document_delete.py @@ -27,7 +27,7 @@ def test_delete_is_soft(self, document: Document) -> None: assert document is not None def test_delete_marks_deleted(self, document: Document) -> None: - assert document.is_deleted is True + assert document.deleted_at is not None def test_delete_follows_insert(self, document: Document) -> None: assert document.inserted_at <= document.deleted_at diff --git a/backend/app/tests/crud/documents/documents/test_crud_document_read_many.py b/backend/app/tests/crud/documents/documents/test_crud_document_read_many.py index a193f84bb..5b872a5d7 100644 --- a/backend/app/tests/crud/documents/documents/test_crud_document_read_many.py +++ b/backend/app/tests/crud/documents/documents/test_crud_document_read_many.py @@ -34,7 +34,7 @@ def test_deleted_docs_are_excluded( ) -> None: crud = DocumentCrud(db, store.project.id) docs, _ = crud.read_many() - assert all(x.is_deleted is False for x in docs) + assert all(x.deleted_at is None for x in docs) def test_skip_is_respected( self, diff --git a/backend/app/tests/crud/documents/documents/test_crud_document_update.py b/backend/app/tests/crud/documents/documents/test_crud_document_update.py index 1b63104d1..aa3a8b4f6 100644 --- a/backend/app/tests/crud/documents/documents/test_crud_document_update.py +++ b/backend/app/tests/crud/documents/documents/test_crud_document_update.py @@ -42,7 +42,7 @@ def test_insert_does_not_delete( crud = DocumentCrud(db, documents.project_id) document = crud.update(next(documents)) - assert document.is_deleted is False + assert document.deleted_at is None def test_update_sets_default_owner( self, diff --git a/backend/app/tests/crud/documents/test_doc_transformation_job.py b/backend/app/tests/crud/documents/test_doc_transformation_job.py index b13104b79..f5f5ba74d 100644 --- a/backend/app/tests/crud/documents/test_doc_transformation_job.py +++ b/backend/app/tests/crud/documents/test_doc_transformation_job.py @@ -9,6 +9,7 @@ DocTransformJobUpdate, ) from app.core.exception_handlers import HTTPException +from app.core.util import now from app.tests.utils.document import DocumentStore from app.tests.utils.utils import get_project, SequentialUuidGenerator from app.tests.utils.test_data import create_test_project @@ -58,12 +59,12 @@ def test_cannot_create_job_with_deleted_document( because read filters out deleted documents. """ document = store.put() - document.is_deleted = True + document.deleted_at = now() db.add(document) db.commit() job = crud.create(DocTransformJobCreate(source_document_id=document.id)) - # read_one should 404 due to is_deleted=True on joined document + # read_one should 404 due to soft-deleted joined document with pytest.raises(HTTPException) as exc_info: crud.read_one(job.id) assert exc_info.value.status_code == 404 @@ -100,7 +101,7 @@ def test_cannot_read_job_with_deleted_document( document = store.put() job = crud.create(DocTransformJobCreate(source_document_id=document.id)) - document.is_deleted = True + document.deleted_at = now() db.add(document) db.commit() diff --git a/backend/app/tests/crud/test_api_key.py b/backend/app/tests/crud/test_api_key.py index 028364172..e6ca9e76e 100644 --- a/backend/app/tests/crud/test_api_key.py +++ b/backend/app/tests/crud/test_api_key.py @@ -25,7 +25,6 @@ def test_create_api_key(db: Session) -> None: assert api_key.organization_id == project.organization_id assert api_key.key_prefix is not None assert api_key.key_hash is not None - assert api_key.is_deleted is False assert api_key.deleted_at is None assert raw_key is not None assert len(raw_key) > 0 @@ -216,7 +215,6 @@ def test_delete_api_key(db: Session) -> None: db_key = db.get(APIKey, api_key.id) assert db_key is not None - assert db_key.is_deleted is True assert db_key.deleted_at is not None retrieved_key = api_key_crud.read_one(key_id=api_key.id) @@ -251,7 +249,7 @@ def test_delete_api_key_from_wrong_project(db: Session) -> None: db_key = db.get(APIKey, api_key.id) assert db_key is not None - assert db_key.is_deleted is False + assert db_key.deleted_at is None def test_delete_already_deleted_api_key(db: Session) -> None: diff --git a/backend/app/tests/crud/test_assistants.py b/backend/app/tests/crud/test_assistants.py index 12aa49944..c05b9aa28 100644 --- a/backend/app/tests/crud/test_assistants.py +++ b/backend/app/tests/crud/test_assistants.py @@ -376,7 +376,6 @@ def test_delete_assistant_success(self, db: Session) -> None: result = delete_assistant(db, assistant.assistant_id, assistant.project_id) - assert result.is_deleted is True assert result.deleted_at is not None with pytest.raises(ValueError) as exc_info: get_assistant(db, name=assistant.name) @@ -402,7 +401,7 @@ def test_get_assistant_by_id_success(self, db: Session) -> None: assert result is not None assert result.assistant_id == assistant.assistant_id assert result.project_id == assistant.project_id - assert result.is_deleted is False + assert result.deleted_at is None def test_get_assistant_by_id_not_found(self, db: Session) -> None: """Returns None when assistant is not found""" @@ -464,7 +463,7 @@ def test_get_assistants_by_project_success( assert assistant2.assistant_id in assistant_ids for assistant in result: assert assistant.project_id == project.id - assert assistant.is_deleted is False + assert assistant.deleted_at is None def test_get_assistants_by_project_empty(self, db: Session) -> None: """Returns empty list when project has no assistants""" diff --git a/backend/app/tests/crud/test_fine_tuning.py b/backend/app/tests/crud/test_fine_tuning.py index b128a4fc5..5ac8d7548 100644 --- a/backend/app/tests/crud/test_fine_tuning.py +++ b/backend/app/tests/crud/test_fine_tuning.py @@ -135,4 +135,4 @@ def test_fetch_active_jobs_by_document_id(db: Session) -> None: assert len(result) == 1 assert result[0].id == active_job.id assert result[0].status == FineTuningStatus.running - assert result[0].is_deleted is False + assert result[0].deleted_at is None diff --git a/backend/app/tests/crud/test_onboarding.py b/backend/app/tests/crud/test_onboarding.py index b9514dbf0..13f652bfa 100644 --- a/backend/app/tests/crud/test_onboarding.py +++ b/backend/app/tests/crud/test_onboarding.py @@ -253,7 +253,7 @@ def test_onboard_project_api_key_generation(db: Session) -> None: APIKey.user_id == user.id, APIKey.project_id == project.id, APIKey.organization_id == org.id, - APIKey.is_deleted.is_(False), + APIKey.deleted_at.is_(None), ) ).first() assert api_key_record is not None diff --git a/backend/app/tests/crud/test_openai_conversation.py b/backend/app/tests/crud/test_openai_conversation.py index 314238bc3..908f7adee 100644 --- a/backend/app/tests/crud/test_openai_conversation.py +++ b/backend/app/tests/crud/test_openai_conversation.py @@ -187,7 +187,7 @@ def test_get_conversations_by_project_success(db: Session) -> None: assert len(conversations) >= 3 for conversation in conversations: assert conversation.project_id == project.id - assert conversation.is_deleted is False + assert conversation.deleted_at is None def test_get_conversations_by_project_with_pagination(db: Session) -> None: @@ -253,7 +253,6 @@ def test_delete_conversation_success(db: Session) -> None: assert deleted_conversation is not None assert deleted_conversation.id == conversation.id - assert deleted_conversation.is_deleted is True assert deleted_conversation.deleted_at is not None @@ -764,7 +763,6 @@ def test_response_id_validation_pattern(db: Session) -> None: assert conversation.assistant_id == conversation_data.assistant_id assert conversation.project_id == project.id assert conversation.organization_id == organization.id - assert conversation.is_deleted is False assert conversation.deleted_at is None diff --git a/backend/app/tests/seed_data/seed_data.json b/backend/app/tests/seed_data/seed_data.json index 9888cc846..20637e1f7 100644 --- a/backend/app/tests/seed_data/seed_data.json +++ b/backend/app/tests/seed_data/seed_data.json @@ -42,7 +42,6 @@ "user_email": "{{SUPERUSER_EMAIL}}", "project_name": "Glific", "api_key": "ApiKey No3x47A5qoIGhm0kVKjQ77dhCqEdWRIQZlEPzzzh7i8", - "is_deleted": false, "deleted_at": null }, { @@ -50,7 +49,6 @@ "user_email": "{{ADMIN_EMAIL}}", "project_name": "Dalgo", "api_key": "ApiKey Px8y47B6roJHin1lWLkR88eiDrFdXSJRZmFQazzai8j", - "is_deleted": false, "deleted_at": null } ], diff --git a/backend/app/tests/seed_data/seed_data.py b/backend/app/tests/seed_data/seed_data.py index 2db5420c1..01b7e56cd 100644 --- a/backend/app/tests/seed_data/seed_data.py +++ b/backend/app/tests/seed_data/seed_data.py @@ -48,7 +48,6 @@ class APIKeyData(BaseModel): project_name: str user_email: EmailStr api_key: str - is_deleted: bool deleted_at: Optional[str] = None @@ -202,7 +201,6 @@ def create_api_key(session: Session, api_key_data_raw: dict[str, Any]) -> APIKey user_id=user.id, key_prefix=key_prefix, key_hash=key_hash, - is_deleted=api_key_data.is_deleted, deleted_at=api_key_data.deleted_at, ) session.add(api_key) diff --git a/backend/app/tests/utils/auth.py b/backend/app/tests/utils/auth.py index 922ae8a3d..c4ecef130 100644 --- a/backend/app/tests/utils/auth.py +++ b/backend/app/tests/utils/auth.py @@ -66,7 +66,7 @@ def get_test_auth_context( select(APIKey) .where(APIKey.user_id == user.id) .where(APIKey.project_id == project.id) - .where(APIKey.is_deleted == False) + .where(APIKey.deleted_at.is_(None)) ).first() if not api_key: raise ValueError( diff --git a/backend/app/tests/utils/document.py b/backend/app/tests/utils/document.py index bcb8b75fb..f868f3193 100644 --- a/backend/app/tests/utils/document.py +++ b/backend/app/tests/utils/document.py @@ -46,7 +46,6 @@ def __next__(self): project_id=self.project.id, fname=f"{doc_id}.xyz", object_store_url=object_store_url, - is_deleted=False, ) diff --git a/backend/app/tests/utils/test_data.py b/backend/app/tests/utils/test_data.py index 9b144c2f4..5b1bb0565 100644 --- a/backend/app/tests/utils/test_data.py +++ b/backend/app/tests/utils/test_data.py @@ -10,7 +10,7 @@ ConfigBlob, CredsCreate, FineTuningJobCreate, - Fine_Tuning, + FineTuning, ModelEvaluation, ModelEvaluationBase, ModelEvaluationStatus, @@ -166,7 +166,7 @@ def create_test_credential(db: Session) -> tuple[list[Credential], Project]: def create_test_fine_tuning_jobs( db: Session, ratios: list[float], -) -> tuple[list[Fine_Tuning], bool]: +) -> tuple[list[FineTuning], bool]: project = get_project(db, "Dalgo") document = get_document(db, "dalgo_sample.json") jobs = [] @@ -196,7 +196,7 @@ def create_test_fine_tuning_jobs( def create_test_finetuning_job_with_extra_fields( db: Session, ratios: list[float], -) -> tuple[list[Fine_Tuning], bool]: +) -> tuple[list[FineTuning], bool]: jobs, _ = create_test_fine_tuning_jobs(db, ratios) if jobs: diff --git a/backend/app/tests/utils/utils.py b/backend/app/tests/utils/utils.py index ffffd5185..8bf7b1971 100644 --- a/backend/app/tests/utils/utils.py +++ b/backend/app/tests/utils/utils.py @@ -79,7 +79,7 @@ def get_assistant( If a assistant name is provided, fetch the active assistant with that name. If no name is provided, fetch any random assistant. """ - filters = [Assistant.is_deleted == False] + filters = [Assistant.deleted_at.is_(None)] if project_id is not None: filters.append(Assistant.project_id == project_id) From 367f258d0decd1ad1ae25b321c8ba8f395cf0d1d Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Wed, 6 May 2026 16:56:01 +0530 Subject: [PATCH 2/8] database cleanups next iteration --- ...reate_global_schema_and_languages_table.py | 2 +- .../057_add_updated_at_to_user_project.py | 34 +++++++++ .../versions/058_add_project_fk_to_job.py | 51 ++++++++++++++ .../059_rename_created_at_to_inserted_at.py | 27 +++++++ backend/app/api/routes/llm.py | 2 +- backend/app/api/routes/user_project.py | 64 ++++++++++++++--- backend/app/crud/llm.py | 2 +- backend/app/crud/model_evaluation.py | 25 +++---- backend/app/models/evaluation.py | 4 +- backend/app/models/job.py | 5 +- backend/app/models/llm/request.py | 2 +- backend/app/models/user_project.py | 8 +++ backend/app/tests/api/test_user_project.py | 70 ++++++++++++++++++- .../app/tests/crud/test_model_evaluation.py | 36 ++++++++++ 14 files changed, 299 insertions(+), 33 deletions(-) create mode 100644 backend/app/alembic/versions/057_add_updated_at_to_user_project.py create mode 100644 backend/app/alembic/versions/058_add_project_fk_to_job.py create mode 100644 backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py diff --git a/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py b/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py index f25807380..6fee39066 100644 --- a/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py +++ b/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py @@ -25,7 +25,7 @@ def upgrade(): "languages", sa.Column( "id", - sa.BigInteger(), + sa.Integer(), sa.Identity(always=False), primary_key=True, comment="Unique identifier for the language", diff --git a/backend/app/alembic/versions/057_add_updated_at_to_user_project.py b/backend/app/alembic/versions/057_add_updated_at_to_user_project.py new file mode 100644 index 000000000..f2d172248 --- /dev/null +++ b/backend/app/alembic/versions/057_add_updated_at_to_user_project.py @@ -0,0 +1,34 @@ +"""add updated_at to user_project + +Revision ID: 057 +Revises: 056 +Create Date: 2026-05-06 12:00:00.000000 + +""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "057" +down_revision = "056" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column( + "user_project", + sa.Column( + "updated_at", + sa.DateTime(), + nullable=False, + server_default=sa.text("NOW()"), + comment="Timestamp when the mapping was last updated", + ), + ) + op.alter_column("user_project", "updated_at", server_default=None) + + +def downgrade(): + op.drop_column("user_project", "updated_at") diff --git a/backend/app/alembic/versions/058_add_project_fk_to_job.py b/backend/app/alembic/versions/058_add_project_fk_to_job.py new file mode 100644 index 000000000..2c2d622bd --- /dev/null +++ b/backend/app/alembic/versions/058_add_project_fk_to_job.py @@ -0,0 +1,51 @@ +"""add project_id foreign key to job table + +Revision ID: 058 +Revises: 057 +Create Date: 2026-05-06 13:00:00.000000 + +Migration 051 added job.project_id as a plain Integer with no foreign key +constraint, leaving the column without referential integrity. This migration: + + 1. Backfills orphan rows: any job.project_id that doesn't match a real + project.id is set to NULL (the column is nullable). This preserves + historical job records whose project was deleted before the FK existed. + Switch the cleanup to a DELETE if you'd rather discard orphans + retroactively under CASCADE semantics. + + 2. Adds the foreign key constraint with ON DELETE CASCADE, matching the + pattern used by every other project_id FK in the schema. + +The supporting index (ix_job_project_id) is created by migration 055. +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "058" +down_revision = "057" +branch_labels = None +depends_on = None + + +def upgrade(): + op.execute( + """ + UPDATE job + SET project_id = NULL + WHERE project_id IS NOT NULL + AND project_id NOT IN (SELECT id FROM project) + """ + ) + op.create_foreign_key( + "job_project_id_fkey", + "job", + "project", + ["project_id"], + ["id"], + ondelete="CASCADE", + ) + + +def downgrade(): + op.drop_constraint("job_project_id_fkey", "job", type_="foreignkey") diff --git a/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py b/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py new file mode 100644 index 000000000..5de7f2a9f --- /dev/null +++ b/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py @@ -0,0 +1,27 @@ +"""rename created_at to inserted_at on job and llm_call + +Revision ID: 059 +Revises: 058 +Create Date: 2026-05-06 14:00:00.000000 + +Aligns `job` and `llm_call` with the rest of the schema (51 other tables +use `inserted_at`). Pure rename — no type or default change. +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "059" +down_revision = "058" +branch_labels = None +depends_on = None + + +def upgrade(): + op.alter_column("job", "created_at", new_column_name="inserted_at") + op.alter_column("llm_call", "created_at", new_column_name="inserted_at") + + +def downgrade(): + op.alter_column("llm_call", "inserted_at", new_column_name="created_at") + op.alter_column("job", "inserted_at", new_column_name="created_at") diff --git a/backend/app/api/routes/llm.py b/backend/app/api/routes/llm.py index 62d4dbb04..d220bc244 100644 --- a/backend/app/api/routes/llm.py +++ b/backend/app/api/routes/llm.py @@ -101,7 +101,7 @@ def llm_call( job_id=job.id, status=job.status.value, message=message, - job_inserted_at=job.created_at, + job_inserted_at=job.inserted_at, job_updated_at=job.updated_at, ) diff --git a/backend/app/api/routes/user_project.py b/backend/app/api/routes/user_project.py index 3da8afdca..7904666ad 100644 --- a/backend/app/api/routes/user_project.py +++ b/backend/app/api/routes/user_project.py @@ -1,21 +1,25 @@ import logging +import secrets from typing import Any from fastapi import APIRouter, Depends, HTTPException, status +from sqlmodel import select from app.api.deps import AuthContextDep, SessionDep from app.api.permissions import Permission, require_permission from app.core.config import settings +from app.core.security import get_password_hash from app.crud.organization import get_organization_by_id, validate_organization from app.crud.project import get_project_by_id, validate_project from app.crud.user_project import ( - add_user_to_project, get_users_by_project, remove_user_from_project, ) from app.models import ( AddUsersToProjectRequest, Message, + User, + UserProject, UserProjectPublic, ) from app.services.auth import generate_invite_token @@ -62,21 +66,59 @@ def add_project_users( validate_organization(session=session, org_id=body.organization_id) validate_project(session=session, project_id=body.project_id) - same_project_emails = [] - different_project_emails = [] + emails = [str(entry.email) for entry in body.users] + + existing_users = session.exec(select(User).where(User.email.in_(emails))).all() + users_by_email: dict[str, User] = {u.email: u for u in existing_users} + + if existing_users: + existing_memberships = session.exec( + select(UserProject).where( + UserProject.user_id.in_([u.id for u in existing_users]) + ) + ).all() + else: + existing_memberships = [] + memberships_by_user: dict[int, UserProject] = { + m.user_id: m for m in existing_memberships + } + + same_project_emails: list[str] = [] + different_project_emails: list[str] = [] for entry in body.users: - _, add_status = add_user_to_project( - session=session, - email=str(entry.email), + email = str(entry.email) + user = users_by_email.get(email) + + if user is None: + user = User( + email=email, + full_name=entry.full_name, + is_active=False, + hashed_password=get_password_hash(secrets.token_urlsafe(16)), + ) + session.add(user) + session.flush() + users_by_email[email] = user + elif entry.full_name and not user.full_name: + user.full_name = entry.full_name + + membership = memberships_by_user.get(user.id) + if membership is not None: + if membership.project_id == body.project_id: + same_project_emails.append(email) + else: + different_project_emails.append(email) + continue + + new_membership = UserProject( + user_id=user.id, organization_id=body.organization_id, project_id=body.project_id, - full_name=entry.full_name, ) - if add_status == "same_project": - same_project_emails.append(str(entry.email)) - elif add_status == "different_project": - different_project_emails.append(str(entry.email)) + session.add(new_membership) + session.flush() + memberships_by_user[user.id] = new_membership if same_project_emails or different_project_emails: session.rollback() diff --git a/backend/app/crud/llm.py b/backend/app/crud/llm.py index c7f5b1aee..4aa14bf41 100644 --- a/backend/app/crud/llm.py +++ b/backend/app/crud/llm.py @@ -244,7 +244,7 @@ def get_llm_calls_by_job_id( LlmCall.project_id == project_id, LlmCall.deleted_at.is_(None), ) - .order_by(LlmCall.created_at.desc()) + .order_by(LlmCall.inserted_at.desc()) ) return list(session.exec(statement).all()) diff --git a/backend/app/crud/model_evaluation.py b/backend/app/crud/model_evaluation.py index 10cfbe667..ddc38ba7b 100644 --- a/backend/app/crud/model_evaluation.py +++ b/backend/app/crud/model_evaluation.py @@ -2,6 +2,7 @@ from uuid import UUID from fastapi import HTTPException +from sqlalchemy import Float, cast from sqlmodel import Session, select from app.crud import fetch_by_id @@ -112,28 +113,24 @@ def fetch_eval_by_doc_id( def fetch_top_model_by_doc_id( session: Session, document_id: UUID, project_id: int ) -> ModelEvaluation: - query = ( + mcc_expr = cast(ModelEvaluation.score["mcc_score"].astext, Float) + + stmt = ( select(ModelEvaluation) .where( ModelEvaluation.document_id == document_id, ModelEvaluation.project_id == project_id, + ModelEvaluation.deleted_at.is_(None), + ModelEvaluation.score.is_not(None), + mcc_expr.is_not(None), ) - .order_by(ModelEvaluation.updated_at.desc()) + .order_by(mcc_expr.desc()) + .limit(1) ) - model_evals = session.exec(query).all() - - top_model = None - highest_mcc = -float("inf") - - for model_eval in model_evals: - if model_eval.score is not None: - mcc = model_eval.score.get("mcc_score", None) - if mcc is not None and mcc > highest_mcc: - highest_mcc = mcc - top_model = model_eval + top_model = session.exec(stmt).first() - if not top_model: + if top_model is None: logger.error( f"[fetch_top_model_by_doc_id]No model evaluation found with populated score for document_id={document_id}, project_id={project_id}" ) diff --git a/backend/app/models/evaluation.py b/backend/app/models/evaluation.py index c9130d3c3..67834cc8d 100644 --- a/backend/app/models/evaluation.py +++ b/backend/app/models/evaluation.py @@ -348,13 +348,13 @@ class EvaluationRun(SQLModel, table=True): ) # Timestamps - inserted_at: datetime = Field( + inserted_at: datetime = SQLField( default_factory=now, nullable=False, description="The timestamp when the evaluation run was started", sa_column_kwargs={"comment": "Timestamp when the evaluation run was started"}, ) - updated_at: datetime = Field( + updated_at: datetime = SQLField( default_factory=now, nullable=False, description="The timestamp when the evaluation run was last updated", diff --git a/backend/app/models/job.py b/backend/app/models/job.py index f7f61277b..3ea4d8650 100644 --- a/backend/app/models/job.py +++ b/backend/app/models/job.py @@ -42,6 +42,9 @@ class Job(SQLModel, table=True): ) project_id: int | None = Field( default=None, + foreign_key="project.id", + ondelete="CASCADE", + index=True, description="Project ID of the project the job belongs to.", sa_column_kwargs={"comment": "Project ID of the job's project"}, ) @@ -65,7 +68,7 @@ class Job(SQLModel, table=True): ) # Timestamps - created_at: datetime = Field( + inserted_at: datetime = Field( default_factory=now, sa_column_kwargs={"comment": "Timestamp when the job was created"}, ) diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py index a5c337a44..555d7c68c 100644 --- a/backend/app/models/llm/request.py +++ b/backend/app/models/llm/request.py @@ -597,7 +597,7 @@ class LlmCall(SQLModel, table=True): ) # Timestamps - created_at: datetime = Field( + inserted_at: datetime = Field( default_factory=now, nullable=False, sa_column_kwargs={"comment": "Timestamp when the LLM call was created"}, diff --git a/backend/app/models/user_project.py b/backend/app/models/user_project.py index c231c6d0f..2361cecfb 100644 --- a/backend/app/models/user_project.py +++ b/backend/app/models/user_project.py @@ -47,6 +47,14 @@ class UserProject(UserProjectBase, table=True): nullable=False, sa_column_kwargs={"comment": "Timestamp when the mapping was created"}, ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the mapping was last updated", + "onupdate": now, + }, + ) class UserEntry(SQLModel): diff --git a/backend/app/tests/api/test_user_project.py b/backend/app/tests/api/test_user_project.py index dd4f6cceb..94c0ae080 100644 --- a/backend/app/tests/api/test_user_project.py +++ b/backend/app/tests/api/test_user_project.py @@ -1,7 +1,7 @@ from unittest.mock import patch from fastapi.testclient import TestClient -from sqlmodel import Session +from sqlmodel import Session, select from app.core.config import settings from app.crud.user_project import add_user_to_project @@ -198,6 +198,74 @@ def test_add_user_different_project_returns_409( assert resp.status_code == 409 assert "Already assigned to another project" in resp.json()["error"] + def test_add_bulk_surfaces_all_same_project_conflicts( + self, + db: Session, + client: TestClient, + superuser_token_headers: dict[str, str], + ): + """All emails already on the project should appear in the 409 error.""" + project = create_test_project(db) + email_a = random_email() + email_b = random_email() + for email in (email_a, email_b): + add_user_to_project( + session=db, + email=email, + organization_id=project.organization_id, + project_id=project.id, + ) + db.commit() + + resp = client.post( + f"{USER_PROJECTS_URL}/", + json={ + "organization_id": project.organization_id, + "project_id": project.id, + "users": [{"email": email_a}, {"email": email_b}], + }, + headers=superuser_token_headers, + ) + assert resp.status_code == 409 + body = resp.json()["error"] + assert "Already added to this project" in body + assert email_a in body + assert email_b in body + + def test_add_duplicate_email_in_same_request_rolls_back( + self, + db: Session, + client: TestClient, + superuser_token_headers: dict[str, str], + ): + """Submitting the same email twice in one request rolls back the whole batch. + + Pins current behaviour: the second occurrence is detected as a + same-project conflict because the first occurrence was just added. + """ + project = create_test_project(db) + project_id = project.id + organization_id = project.organization_id + email = random_email() + + resp = client.post( + f"{USER_PROJECTS_URL}/", + json={ + "organization_id": organization_id, + "project_id": project_id, + "users": [{"email": email}, {"email": email}], + }, + headers=superuser_token_headers, + ) + assert resp.status_code == 409 + assert "Already added to this project" in resp.json()["error"] + + # Confirm rollback: no UserProject row was persisted. + rows = db.exec( + select(UserProject).where(UserProject.project_id == project_id) + ).all() + assert rows == [] + class TestDeleteProjectUser: """Test suite for DELETE /user-projects/{user_id}""" diff --git a/backend/app/tests/crud/test_model_evaluation.py b/backend/app/tests/crud/test_model_evaluation.py index 12d190fdd..d4f47e0fd 100644 --- a/backend/app/tests/crud/test_model_evaluation.py +++ b/backend/app/tests/crud/test_model_evaluation.py @@ -4,6 +4,7 @@ from sqlmodel import Session from fastapi import HTTPException +from app.core.util import now from app.tests.utils.utils import get_project, get_non_existent_id from app.tests.utils.test_data import ( create_test_model_evaluation, @@ -110,6 +111,41 @@ def test_fetch_top_model_by_doc_id_not_found(db: Session) -> None: assert exc.value.status_code == 404 +def test_fetch_top_model_by_doc_id_picks_highest_mcc(db: Session) -> None: + model_evals = create_test_model_evaluation(db) + assert len(model_evals) >= 2 + assert model_evals[0].document_id == model_evals[1].document_id + + model_evals[0].score = {"mcc_score": 0.5} + model_evals[1].score = {"mcc_score": 0.9} + db.flush() + + result = fetch_top_model_by_doc_id( + db, + document_id=model_evals[0].document_id, + project_id=model_evals[0].project_id, + ) + assert result.id == model_evals[1].id + + +def test_fetch_top_model_by_doc_id_excludes_soft_deleted(db: Session) -> None: + model_evals = create_test_model_evaluation(db) + assert len(model_evals) >= 2 + assert model_evals[0].document_id == model_evals[1].document_id + + model_evals[0].score = {"mcc_score": 0.5} + model_evals[1].score = {"mcc_score": 0.9} + model_evals[1].deleted_at = now() + db.flush() + + result = fetch_top_model_by_doc_id( + db, + document_id=model_evals[0].document_id, + project_id=model_evals[0].project_id, + ) + assert result.id == model_evals[0].id + + def test_fetch_active_model_evals(db: Session) -> None: model_evals = create_test_model_evaluation(db) active_evals = fetch_active_model_evals( From 65239106a4e933b4ceb07c3ec1eaa8b4bfe60d21 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 7 May 2026 08:04:51 +0530 Subject: [PATCH 3/8] added unique constraint --- ...dd_unique_constraint_documentcollection.py | 44 +++++++++++++++++++ backend/app/models/document_collection.py | 6 ++- 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py diff --git a/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py b/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py new file mode 100644 index 000000000..90c0d085b --- /dev/null +++ b/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py @@ -0,0 +1,44 @@ +"""add unique constraint to documentcollection + +Revision ID: 060 +Revises: 059 +Create Date: 2026-05-07 12:00:00.000000 + +The `documentcollection` junction table never had a uniqueness constraint +on (document_id, collection_id), so the same document could be linked to +the same collection multiple times. This migration: + + 1. Removes any existing duplicate rows, keeping the row with the lowest + `id` for each (document_id, collection_id) pair. + 2. Adds the unique constraint going forward. +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "060" +down_revision = "059" +branch_labels = None +depends_on = None + + +def upgrade(): + op.execute( + """ + DELETE FROM documentcollection + WHERE id NOT IN ( + SELECT MIN(id) + FROM documentcollection + GROUP BY document_id, collection_id + ) + """ + ) + op.create_unique_constraint( + "uq_document_collection", + "documentcollection", + ["document_id", "collection_id"], + ) + + +def downgrade(): + op.drop_constraint("uq_document_collection", "documentcollection", type_="unique") diff --git a/backend/app/models/document_collection.py b/backend/app/models/document_collection.py index 93db6df31..05329e563 100644 --- a/backend/app/models/document_collection.py +++ b/backend/app/models/document_collection.py @@ -1,11 +1,15 @@ from uuid import UUID -from sqlmodel import Field, SQLModel +from sqlmodel import Field, SQLModel, UniqueConstraint class DocumentCollection(SQLModel, table=True): """Junction table linking documents to collections.""" + __table_args__ = ( + UniqueConstraint("document_id", "collection_id", name="uq_document_collection"), + ) + id: int | None = Field( default=None, primary_key=True, From 47329e3f4a8644532bee990c16e62d3639b8ba46 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 7 May 2026 08:19:27 +0530 Subject: [PATCH 4/8] added migration --- .../061_align_languages_id_to_integer.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 backend/app/alembic/versions/061_align_languages_id_to_integer.py diff --git a/backend/app/alembic/versions/061_align_languages_id_to_integer.py b/backend/app/alembic/versions/061_align_languages_id_to_integer.py new file mode 100644 index 000000000..84dddd713 --- /dev/null +++ b/backend/app/alembic/versions/061_align_languages_id_to_integer.py @@ -0,0 +1,34 @@ +"""align global.languages.id to INTEGER + +Revision ID: 061 +Revises: 060 +Create Date: 2026-05-07 13:00:00.000000 + +Migration 043 originally created `global.languages.id` as BIGINT, but every +FK column referencing it (evaluation_dataset, evaluation_run, stt_sample) is +INTEGER. Migration 043's source has been edited to use INTEGER for fresh +setups; this migration aligns already-deployed databases. + +The underlying IDENTITY sequence stays BIGINT (PostgreSQL doesn't change it +on ALTER COLUMN TYPE). This is harmless — values would have to exceed +2^31 - 1 to cause an INSERT failure, and the table holds ~13 seeded rows. + +Languages table is small (≤100 rows in practice), so the AccessExclusiveLock +taken by ALTER COLUMN TYPE is sub-second. +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "061" +down_revision = "060" +branch_labels = None +depends_on = None + + +def upgrade(): + op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE INTEGER") + + +def downgrade(): + op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE BIGINT") From bb2a56ab21073723cbd7b3d7c3b1127bcd1eba49 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 7 May 2026 09:55:10 +0530 Subject: [PATCH 5/8] migration cleanups --- .../057_add_updated_at_to_user_project.py | 34 ------ ...zation.py => 057_v1_query_optimization.py} | 8 +- .../versions/058_add_project_fk_to_job.py | 51 --------- ...dexes.py => 058_drop_redundant_indexes.py} | 12 +- .../059_rename_created_at_to_inserted_at.py | 27 ----- .../versions/059_v1_assorted_cleanups.py | 104 ++++++++++++++++++ ...dd_unique_constraint_documentcollection.py | 44 -------- .../061_align_languages_id_to_integer.py | 34 ------ 8 files changed, 114 insertions(+), 200 deletions(-) delete mode 100644 backend/app/alembic/versions/057_add_updated_at_to_user_project.py rename backend/app/alembic/versions/{055_v1_query_optimization.py => 057_v1_query_optimization.py} (99%) delete mode 100644 backend/app/alembic/versions/058_add_project_fk_to_job.py rename backend/app/alembic/versions/{056_drop_redundant_indexes.py => 058_drop_redundant_indexes.py} (94%) delete mode 100644 backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py create mode 100644 backend/app/alembic/versions/059_v1_assorted_cleanups.py delete mode 100644 backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py delete mode 100644 backend/app/alembic/versions/061_align_languages_id_to_integer.py diff --git a/backend/app/alembic/versions/057_add_updated_at_to_user_project.py b/backend/app/alembic/versions/057_add_updated_at_to_user_project.py deleted file mode 100644 index f2d172248..000000000 --- a/backend/app/alembic/versions/057_add_updated_at_to_user_project.py +++ /dev/null @@ -1,34 +0,0 @@ -"""add updated_at to user_project - -Revision ID: 057 -Revises: 056 -Create Date: 2026-05-06 12:00:00.000000 - -""" - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision = "057" -down_revision = "056" -branch_labels = None -depends_on = None - - -def upgrade(): - op.add_column( - "user_project", - sa.Column( - "updated_at", - sa.DateTime(), - nullable=False, - server_default=sa.text("NOW()"), - comment="Timestamp when the mapping was last updated", - ), - ) - op.alter_column("user_project", "updated_at", server_default=None) - - -def downgrade(): - op.drop_column("user_project", "updated_at") diff --git a/backend/app/alembic/versions/055_v1_query_optimization.py b/backend/app/alembic/versions/057_v1_query_optimization.py similarity index 99% rename from backend/app/alembic/versions/055_v1_query_optimization.py rename to backend/app/alembic/versions/057_v1_query_optimization.py index ce4821b70..cce91f3de 100644 --- a/backend/app/alembic/versions/055_v1_query_optimization.py +++ b/backend/app/alembic/versions/057_v1_query_optimization.py @@ -1,7 +1,7 @@ """v1.0 query optimization: project_id + composite indexes, drop is_deleted -Revision ID: 055 -Revises: 054 +Revision ID: 057 +Revises: 056 Create Date: 2026-05-05 12:00:00.000000 Bundles three coordinated changes for v1.0 lock: @@ -34,8 +34,8 @@ from alembic import op -revision = "055" -down_revision = "054" +revision = "057" +down_revision = "056" branch_labels = None depends_on = None diff --git a/backend/app/alembic/versions/058_add_project_fk_to_job.py b/backend/app/alembic/versions/058_add_project_fk_to_job.py deleted file mode 100644 index 2c2d622bd..000000000 --- a/backend/app/alembic/versions/058_add_project_fk_to_job.py +++ /dev/null @@ -1,51 +0,0 @@ -"""add project_id foreign key to job table - -Revision ID: 058 -Revises: 057 -Create Date: 2026-05-06 13:00:00.000000 - -Migration 051 added job.project_id as a plain Integer with no foreign key -constraint, leaving the column without referential integrity. This migration: - - 1. Backfills orphan rows: any job.project_id that doesn't match a real - project.id is set to NULL (the column is nullable). This preserves - historical job records whose project was deleted before the FK existed. - Switch the cleanup to a DELETE if you'd rather discard orphans - retroactively under CASCADE semantics. - - 2. Adds the foreign key constraint with ON DELETE CASCADE, matching the - pattern used by every other project_id FK in the schema. - -The supporting index (ix_job_project_id) is created by migration 055. -""" - -from alembic import op - -# revision identifiers, used by Alembic. -revision = "058" -down_revision = "057" -branch_labels = None -depends_on = None - - -def upgrade(): - op.execute( - """ - UPDATE job - SET project_id = NULL - WHERE project_id IS NOT NULL - AND project_id NOT IN (SELECT id FROM project) - """ - ) - op.create_foreign_key( - "job_project_id_fkey", - "job", - "project", - ["project_id"], - ["id"], - ondelete="CASCADE", - ) - - -def downgrade(): - op.drop_constraint("job_project_id_fkey", "job", type_="foreignkey") diff --git a/backend/app/alembic/versions/056_drop_redundant_indexes.py b/backend/app/alembic/versions/058_drop_redundant_indexes.py similarity index 94% rename from backend/app/alembic/versions/056_drop_redundant_indexes.py rename to backend/app/alembic/versions/058_drop_redundant_indexes.py index fbe76be1a..19b0f1256 100644 --- a/backend/app/alembic/versions/056_drop_redundant_indexes.py +++ b/backend/app/alembic/versions/058_drop_redundant_indexes.py @@ -1,10 +1,10 @@ -"""drop redundant indexes superseded by 055 composites +"""drop redundant indexes superseded by 057 composites -Revision ID: 056 -Revises: 055 +Revision ID: 058 +Revises: 057 Create Date: 2026-05-05 14:00:00.000000 -Drops indexes that are now redundant after migration 055 added the +Drops indexes that are now redundant after migration 057 added the real composite/partial indexes that match actual query shapes: ix_project_name @@ -51,8 +51,8 @@ from alembic import op -revision = "056" -down_revision = "055" +revision = "058" +down_revision = "057" branch_labels = None depends_on = None diff --git a/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py b/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py deleted file mode 100644 index 5de7f2a9f..000000000 --- a/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py +++ /dev/null @@ -1,27 +0,0 @@ -"""rename created_at to inserted_at on job and llm_call - -Revision ID: 059 -Revises: 058 -Create Date: 2026-05-06 14:00:00.000000 - -Aligns `job` and `llm_call` with the rest of the schema (51 other tables -use `inserted_at`). Pure rename — no type or default change. -""" - -from alembic import op - -# revision identifiers, used by Alembic. -revision = "059" -down_revision = "058" -branch_labels = None -depends_on = None - - -def upgrade(): - op.alter_column("job", "created_at", new_column_name="inserted_at") - op.alter_column("llm_call", "created_at", new_column_name="inserted_at") - - -def downgrade(): - op.alter_column("llm_call", "inserted_at", new_column_name="created_at") - op.alter_column("job", "inserted_at", new_column_name="created_at") diff --git a/backend/app/alembic/versions/059_v1_assorted_cleanups.py b/backend/app/alembic/versions/059_v1_assorted_cleanups.py new file mode 100644 index 000000000..d2965b243 --- /dev/null +++ b/backend/app/alembic/versions/059_v1_assorted_cleanups.py @@ -0,0 +1,104 @@ +"""v1.0 assorted schema cleanups + +Revision ID: 059 +Revises: 058 +Create Date: 2026-05-07 14:00:00.000000 + +Bundles five small, mutually independent v1.0 cleanups in source order: + + 1. user_project: add `updated_at` column with NOW() server default for + backfill, then drop the default so future inserts use the model's + `default_factory=now`. + + 2. job: backfill orphan project_id rows to NULL, then add the missing + foreign key constraint with ON DELETE CASCADE. The supporting + ix_job_project_id index is created by migration 057. + + 3. job + llm_call: rename `created_at` → `inserted_at` to align with + the rest of the schema (every other table uses `inserted_at`). + + 4. documentcollection: dedupe any existing duplicate (document_id, + collection_id) pairs (keeps the lowest id), then add the missing + unique constraint. + + 5. global.languages: align id column type to INTEGER. Migration 043 + originally created it as BIGINT, but every FK column referencing it + is INTEGER. The IDENTITY sequence stays BIGINT (PG doesn't change + it on ALTER COLUMN TYPE) — harmless at this scale. +""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "059" +down_revision = "058" +branch_labels = None +depends_on = None + + +def upgrade(): + # 1. user_project.updated_at + op.add_column( + "user_project", + sa.Column( + "updated_at", + sa.DateTime(), + nullable=False, + server_default=sa.text("NOW()"), + comment="Timestamp when the mapping was last updated", + ), + ) + op.alter_column("user_project", "updated_at", server_default=None) + + # 2. job.project_id foreign key (with orphan backfill) + op.execute( + """ + UPDATE job + SET project_id = NULL + WHERE project_id IS NOT NULL + AND project_id NOT IN (SELECT id FROM project) + """ + ) + op.create_foreign_key( + "job_project_id_fkey", + "job", + "project", + ["project_id"], + ["id"], + ondelete="CASCADE", + ) + + # 3. Rename created_at → inserted_at on job and llm_call + op.alter_column("job", "created_at", new_column_name="inserted_at") + op.alter_column("llm_call", "created_at", new_column_name="inserted_at") + + # 4. documentcollection unique constraint (with dedupe) + op.execute( + """ + DELETE FROM documentcollection + WHERE id NOT IN ( + SELECT MIN(id) + FROM documentcollection + GROUP BY document_id, collection_id + ) + """ + ) + op.create_unique_constraint( + "uq_document_collection", + "documentcollection", + ["document_id", "collection_id"], + ) + + # 5. Align global.languages.id to INTEGER + op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE INTEGER") + + +def downgrade(): + # Reverse order of upgrade() + op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE BIGINT") + op.drop_constraint("uq_document_collection", "documentcollection", type_="unique") + op.alter_column("llm_call", "inserted_at", new_column_name="created_at") + op.alter_column("job", "inserted_at", new_column_name="created_at") + op.drop_constraint("job_project_id_fkey", "job", type_="foreignkey") + op.drop_column("user_project", "updated_at") diff --git a/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py b/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py deleted file mode 100644 index 90c0d085b..000000000 --- a/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py +++ /dev/null @@ -1,44 +0,0 @@ -"""add unique constraint to documentcollection - -Revision ID: 060 -Revises: 059 -Create Date: 2026-05-07 12:00:00.000000 - -The `documentcollection` junction table never had a uniqueness constraint -on (document_id, collection_id), so the same document could be linked to -the same collection multiple times. This migration: - - 1. Removes any existing duplicate rows, keeping the row with the lowest - `id` for each (document_id, collection_id) pair. - 2. Adds the unique constraint going forward. -""" - -from alembic import op - -# revision identifiers, used by Alembic. -revision = "060" -down_revision = "059" -branch_labels = None -depends_on = None - - -def upgrade(): - op.execute( - """ - DELETE FROM documentcollection - WHERE id NOT IN ( - SELECT MIN(id) - FROM documentcollection - GROUP BY document_id, collection_id - ) - """ - ) - op.create_unique_constraint( - "uq_document_collection", - "documentcollection", - ["document_id", "collection_id"], - ) - - -def downgrade(): - op.drop_constraint("uq_document_collection", "documentcollection", type_="unique") diff --git a/backend/app/alembic/versions/061_align_languages_id_to_integer.py b/backend/app/alembic/versions/061_align_languages_id_to_integer.py deleted file mode 100644 index 84dddd713..000000000 --- a/backend/app/alembic/versions/061_align_languages_id_to_integer.py +++ /dev/null @@ -1,34 +0,0 @@ -"""align global.languages.id to INTEGER - -Revision ID: 061 -Revises: 060 -Create Date: 2026-05-07 13:00:00.000000 - -Migration 043 originally created `global.languages.id` as BIGINT, but every -FK column referencing it (evaluation_dataset, evaluation_run, stt_sample) is -INTEGER. Migration 043's source has been edited to use INTEGER for fresh -setups; this migration aligns already-deployed databases. - -The underlying IDENTITY sequence stays BIGINT (PostgreSQL doesn't change it -on ALTER COLUMN TYPE). This is harmless — values would have to exceed -2^31 - 1 to cause an INSERT failure, and the table holds ~13 seeded rows. - -Languages table is small (≤100 rows in practice), so the AccessExclusiveLock -taken by ALTER COLUMN TYPE is sub-second. -""" - -from alembic import op - -# revision identifiers, used by Alembic. -revision = "061" -down_revision = "060" -branch_labels = None -depends_on = None - - -def upgrade(): - op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE INTEGER") - - -def downgrade(): - op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE BIGINT") From fbc34525e46f693fce73706c073ea919d5987fc5 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 7 May 2026 11:52:16 +0530 Subject: [PATCH 6/8] migration cleanups --- ..._optimization.py => 058_v1_query_optimization.py} | 8 ++++---- ...dant_indexes.py => 059_drop_redundant_indexes.py} | 12 ++++++------ ...orted_cleanups.py => 060_v1_assorted_cleanups.py} | 10 +++++----- 3 files changed, 15 insertions(+), 15 deletions(-) rename backend/app/alembic/versions/{057_v1_query_optimization.py => 058_v1_query_optimization.py} (99%) rename backend/app/alembic/versions/{058_drop_redundant_indexes.py => 059_drop_redundant_indexes.py} (94%) rename backend/app/alembic/versions/{059_v1_assorted_cleanups.py => 060_v1_assorted_cleanups.py} (96%) diff --git a/backend/app/alembic/versions/057_v1_query_optimization.py b/backend/app/alembic/versions/058_v1_query_optimization.py similarity index 99% rename from backend/app/alembic/versions/057_v1_query_optimization.py rename to backend/app/alembic/versions/058_v1_query_optimization.py index cce91f3de..c951b30dc 100644 --- a/backend/app/alembic/versions/057_v1_query_optimization.py +++ b/backend/app/alembic/versions/058_v1_query_optimization.py @@ -1,7 +1,7 @@ """v1.0 query optimization: project_id + composite indexes, drop is_deleted -Revision ID: 057 -Revises: 056 +Revision ID: 058 +Revises: 057 Create Date: 2026-05-05 12:00:00.000000 Bundles three coordinated changes for v1.0 lock: @@ -34,8 +34,8 @@ from alembic import op -revision = "057" -down_revision = "056" +revision = "058" +down_revision = "057" branch_labels = None depends_on = None diff --git a/backend/app/alembic/versions/058_drop_redundant_indexes.py b/backend/app/alembic/versions/059_drop_redundant_indexes.py similarity index 94% rename from backend/app/alembic/versions/058_drop_redundant_indexes.py rename to backend/app/alembic/versions/059_drop_redundant_indexes.py index 19b0f1256..84bd2653e 100644 --- a/backend/app/alembic/versions/058_drop_redundant_indexes.py +++ b/backend/app/alembic/versions/059_drop_redundant_indexes.py @@ -1,10 +1,10 @@ -"""drop redundant indexes superseded by 057 composites +"""drop redundant indexes superseded by 058 composites -Revision ID: 058 -Revises: 057 +Revision ID: 059 +Revises: 058 Create Date: 2026-05-05 14:00:00.000000 -Drops indexes that are now redundant after migration 057 added the +Drops indexes that are now redundant after migration 058 added the real composite/partial indexes that match actual query shapes: ix_project_name @@ -51,8 +51,8 @@ from alembic import op -revision = "058" -down_revision = "057" +revision = "059" +down_revision = "058" branch_labels = None depends_on = None diff --git a/backend/app/alembic/versions/059_v1_assorted_cleanups.py b/backend/app/alembic/versions/060_v1_assorted_cleanups.py similarity index 96% rename from backend/app/alembic/versions/059_v1_assorted_cleanups.py rename to backend/app/alembic/versions/060_v1_assorted_cleanups.py index d2965b243..e55c0557b 100644 --- a/backend/app/alembic/versions/059_v1_assorted_cleanups.py +++ b/backend/app/alembic/versions/060_v1_assorted_cleanups.py @@ -1,7 +1,7 @@ """v1.0 assorted schema cleanups -Revision ID: 059 -Revises: 058 +Revision ID: 060 +Revises: 059 Create Date: 2026-05-07 14:00:00.000000 Bundles five small, mutually independent v1.0 cleanups in source order: @@ -12,7 +12,7 @@ 2. job: backfill orphan project_id rows to NULL, then add the missing foreign key constraint with ON DELETE CASCADE. The supporting - ix_job_project_id index is created by migration 057. + ix_job_project_id index is created by migration 058. 3. job + llm_call: rename `created_at` → `inserted_at` to align with the rest of the schema (every other table uses `inserted_at`). @@ -31,8 +31,8 @@ from alembic import op # revision identifiers, used by Alembic. -revision = "059" -down_revision = "058" +revision = "060" +down_revision = "059" branch_labels = None depends_on = None From 67e2e1428fef7681e9ecbc79881dfbe881b72be0 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 7 May 2026 14:29:29 +0530 Subject: [PATCH 7/8] migration cleanups --- .../versions/058_v1_query_optimization.py | 33 +++++++++++++------ .../versions/060_v1_assorted_cleanups.py | 18 +++++++++- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/backend/app/alembic/versions/058_v1_query_optimization.py b/backend/app/alembic/versions/058_v1_query_optimization.py index c951b30dc..43a9ffd39 100644 --- a/backend/app/alembic/versions/058_v1_query_optimization.py +++ b/backend/app/alembic/versions/058_v1_query_optimization.py @@ -90,65 +90,78 @@ ] -# Composite + partial indexes (P1). (index_name, body_after_INDEX_NAME) -COMPOSITE_INDEXES: list[tuple[str, str]] = [ +# Composite + partial indexes (P1). (index_name, body_after_INDEX_NAME, schema) +# `schema` is the unquoted PG schema for downgrade DROP INDEX, or None for +# the default (public) schema. The upgrade body already names the schema +# inline in its ON clause; the field exists so downgrade doesn't have to +# string-sniff it back out. +COMPOSITE_INDEXES: list[tuple[str, str, str | None]] = [ ( "ix_document_project_inserted_at_active", 'ON "document" ("project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL', + None, ), ( "ix_openai_conversation_project_inserted_at_active", 'ON "openai_conversation" ("project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL', + None, ), ( "ix_openai_conversation_ancestor_project_inserted_at_active", 'ON "openai_conversation" ("ancestor_response_id", "project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL', + None, ), ( "ix_openai_conversation_response_project_active", 'ON "openai_conversation" ("response_id", "project_id") WHERE "deleted_at" IS NULL', + None, ), ( "ix_collection_jobs_project_status_inserted_at", 'ON "collection_jobs" ("project_id", "status", "inserted_at" DESC)', + None, ), ( "ix_evaluation_run_org_project_type_inserted_at", 'ON "evaluation_run" ("organization_id", "project_id", "type", "inserted_at" DESC)', + None, ), ( "ix_evaluation_dataset_org_project_type_inserted_at", 'ON "evaluation_dataset" ("organization_id", "project_id", "type", "inserted_at" DESC)', - ), - ( - "ix_llm_call_job_created_at_active", - 'ON "llm_call" ("job_id", "created_at" DESC) WHERE "deleted_at" IS NULL', + None, ), ( "ix_model_evaluation_document_project_updated_at", 'ON "model_evaluation" ("document_id", "project_id", "updated_at" DESC) WHERE "deleted_at" IS NULL', + None, ), ( "ix_model_config_active_provider_name", 'ON "global"."model_config" ("is_active", "provider", "model_name")', + "global", ), ( "ix_collection_project_active", 'ON "collection" ("project_id") WHERE "deleted_at" IS NULL', + None, ), # Composite FK indexes that match the actual query shape ( "ix_fine_tuning_document_project", 'ON "fine_tuning" ("document_id", "project_id")', + None, ), ( "ix_model_evaluation_fine_tuning_project", 'ON "model_evaluation" ("fine_tuning_id", "project_id")', + None, ), # Partial index for active-key listing on apikey ( "ix_apikey_project_active", 'ON "apikey" ("project_id") WHERE "deleted_at" IS NULL', + None, ), ] @@ -172,15 +185,15 @@ def upgrade(): f'CREATE INDEX CONCURRENTLY IF NOT EXISTS "{index}" ' f'ON "{table}" ("{column}")' ) - for index, body in COMPOSITE_INDEXES: + for index, body, _schema in COMPOSITE_INDEXES: op.execute(f'CREATE INDEX CONCURRENTLY IF NOT EXISTS "{index}" {body}') def downgrade(): with op.get_context().autocommit_block(): - for index, body in COMPOSITE_INDEXES: - schema_qualified = '"global".' if '"global"."model_config"' in body else "" - op.execute(f'DROP INDEX CONCURRENTLY IF EXISTS {schema_qualified}"{index}"') + for index, _body, schema in COMPOSITE_INDEXES: + qualified = f'"{schema}"."{index}"' if schema else f'"{index}"' + op.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {qualified}") for _table, _column, index in FK_INDEXES: op.execute(f'DROP INDEX CONCURRENTLY IF EXISTS "{index}"') diff --git a/backend/app/alembic/versions/060_v1_assorted_cleanups.py b/backend/app/alembic/versions/060_v1_assorted_cleanups.py index e55c0557b..264d56ae3 100644 --- a/backend/app/alembic/versions/060_v1_assorted_cleanups.py +++ b/backend/app/alembic/versions/060_v1_assorted_cleanups.py @@ -16,6 +16,9 @@ 3. job + llm_call: rename `created_at` → `inserted_at` to align with the rest of the schema (every other table uses `inserted_at`). + Also creates the partial index ix_llm_call_job_inserted_at_active + here (rather than in migration 058) so the index name reflects the + post-rename column. 4. documentcollection: dedupe any existing duplicate (document_id, collection_id) pairs (keeps the lowest id), then add the missing @@ -69,9 +72,18 @@ def upgrade(): ondelete="CASCADE", ) - # 3. Rename created_at → inserted_at on job and llm_call + # 3. Rename created_at → inserted_at on job and llm_call, then create + # the llm_call hot-path index using the new column name. Index + # creation is CONCURRENTLY and must run outside a transaction. op.alter_column("job", "created_at", new_column_name="inserted_at") op.alter_column("llm_call", "created_at", new_column_name="inserted_at") + with op.get_context().autocommit_block(): + op.execute( + "CREATE INDEX CONCURRENTLY IF NOT EXISTS " + '"ix_llm_call_job_inserted_at_active" ' + 'ON "llm_call" ("job_id", "inserted_at" DESC) ' + 'WHERE "deleted_at" IS NULL' + ) # 4. documentcollection unique constraint (with dedupe) op.execute( @@ -98,6 +110,10 @@ def downgrade(): # Reverse order of upgrade() op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE BIGINT") op.drop_constraint("uq_document_collection", "documentcollection", type_="unique") + with op.get_context().autocommit_block(): + op.execute( + 'DROP INDEX CONCURRENTLY IF EXISTS "ix_llm_call_job_inserted_at_active"' + ) op.alter_column("llm_call", "inserted_at", new_column_name="created_at") op.alter_column("job", "inserted_at", new_column_name="created_at") op.drop_constraint("job_project_id_fkey", "job", type_="foreignkey") From 914ff2c437f2428d5a50f970fef9ac8a013e5076 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Fri, 8 May 2026 16:49:49 +0530 Subject: [PATCH 8/8] review resolves --- ...reate_global_schema_and_languages_table.py | 2 +- .../versions/058_v1_query_optimization.py | 13 ++++ .../versions/060_v1_assorted_cleanups.py | 62 ++++++++++++++++--- backend/app/models/user.py | 17 +++++ 4 files changed, 83 insertions(+), 11 deletions(-) diff --git a/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py b/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py index 6fee39066..f25807380 100644 --- a/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py +++ b/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py @@ -25,7 +25,7 @@ def upgrade(): "languages", sa.Column( "id", - sa.Integer(), + sa.BigInteger(), sa.Identity(always=False), primary_key=True, comment="Unique identifier for the language", diff --git a/backend/app/alembic/versions/058_v1_query_optimization.py b/backend/app/alembic/versions/058_v1_query_optimization.py index 43a9ffd39..1724c01bf 100644 --- a/backend/app/alembic/versions/058_v1_query_optimization.py +++ b/backend/app/alembic/versions/058_v1_query_optimization.py @@ -15,6 +15,9 @@ 2. Composite + partial indexes for hot list/pagination paths matching: WHERE project_id = ? [AND deleted_at IS NULL] ORDER BY DESC + Plus a small partial index `ix_evaluation_run_processing` for the + cron polling queries that filter by (type, status='processing') + without an organization_id predicate. 3. Drop the redundant `is_deleted` boolean from every table that also carries `deleted_at`. `deleted_at IS NULL` becomes the single source @@ -126,6 +129,16 @@ 'ON "evaluation_run" ("organization_id", "project_id", "type", "inserted_at" DESC)', None, ), + # Partial index for cron polling queries that filter by + # (type, status='processing') without an organization_id predicate + # (crud/evaluations/cron_utils.py and crud/evaluations/processing.py). + # The composite above leads with organization_id and does not serve + # these unscoped scans. + ( + "ix_evaluation_run_processing", + 'ON "evaluation_run" ("type", "batch_job_id") WHERE "status" = \'processing\'', + None, + ), ( "ix_evaluation_dataset_org_project_type_inserted_at", 'ON "evaluation_dataset" ("organization_id", "project_id", "type", "inserted_at" DESC)', diff --git a/backend/app/alembic/versions/060_v1_assorted_cleanups.py b/backend/app/alembic/versions/060_v1_assorted_cleanups.py index 264d56ae3..e851a02ed 100644 --- a/backend/app/alembic/versions/060_v1_assorted_cleanups.py +++ b/backend/app/alembic/versions/060_v1_assorted_cleanups.py @@ -6,9 +6,11 @@ Bundles five small, mutually independent v1.0 cleanups in source order: - 1. user_project: add `updated_at` column with NOW() server default for - backfill, then drop the default so future inserts use the model's - `default_factory=now`. + 1. user_project + user: add timestamp columns missing from the model. + `user_project` gets `updated_at`; `user` gets both `inserted_at` + and `updated_at`. Each new column is added with NOW() server + default so existing rows are backfilled, then the default is + dropped so future inserts use the model's `default_factory=now`. 2. job: backfill orphan project_id rows to NULL, then add the missing foreign key constraint with ON DELETE CASCADE. The supporting @@ -22,7 +24,9 @@ 4. documentcollection: dedupe any existing duplicate (document_id, collection_id) pairs (keeps the lowest id), then add the missing - unique constraint. + unique constraint. The unique index is built CONCURRENTLY and then + attached via ADD CONSTRAINT ... USING INDEX so the index build does + not take AccessExclusiveLock on the table. 5. global.languages: align id column type to INTEGER. Migration 043 originally created it as BIGINT, but every FK column referencing it @@ -41,7 +45,10 @@ def upgrade(): - # 1. user_project.updated_at + # 1. Backfill missing timestamp columns. Each column is created with + # a NOW() server default so existing rows are populated atomically; + # the default is then dropped so future inserts get their value + # from the model's `default_factory=now`. op.add_column( "user_project", sa.Column( @@ -54,6 +61,30 @@ def upgrade(): ) op.alter_column("user_project", "updated_at", server_default=None) + op.add_column( + "user", + sa.Column( + "inserted_at", + sa.DateTime(), + nullable=False, + server_default=sa.text("NOW()"), + comment="Timestamp when the user was created", + ), + ) + op.alter_column("user", "inserted_at", server_default=None) + + op.add_column( + "user", + sa.Column( + "updated_at", + sa.DateTime(), + nullable=False, + server_default=sa.text("NOW()"), + comment="Timestamp when the user was last updated", + ), + ) + op.alter_column("user", "updated_at", server_default=None) + # 2. job.project_id foreign key (with orphan backfill) op.execute( """ @@ -85,7 +116,10 @@ def upgrade(): 'WHERE "deleted_at" IS NULL' ) - # 4. documentcollection unique constraint (with dedupe) + # 4. documentcollection unique constraint (with dedupe). + # Build the underlying unique index CONCURRENTLY (no AccessExclusive + # on the table during the scan/build), then attach it as a constraint + # via ADD CONSTRAINT ... USING INDEX (catalog-only, brief lock). op.execute( """ DELETE FROM documentcollection @@ -96,10 +130,16 @@ def upgrade(): ) """ ) - op.create_unique_constraint( - "uq_document_collection", - "documentcollection", - ["document_id", "collection_id"], + with op.get_context().autocommit_block(): + op.execute( + "CREATE UNIQUE INDEX CONCURRENTLY IF NOT EXISTS " + '"uq_document_collection" ' + 'ON "documentcollection" ("document_id", "collection_id")' + ) + op.execute( + 'ALTER TABLE "documentcollection" ' + 'ADD CONSTRAINT "uq_document_collection" ' + 'UNIQUE USING INDEX "uq_document_collection"' ) # 5. Align global.languages.id to INTEGER @@ -117,4 +157,6 @@ def downgrade(): op.alter_column("llm_call", "inserted_at", new_column_name="created_at") op.alter_column("job", "inserted_at", new_column_name="created_at") op.drop_constraint("job_project_id_fkey", "job", type_="foreignkey") + op.drop_column("user", "updated_at") + op.drop_column("user", "inserted_at") op.drop_column("user_project", "updated_at") diff --git a/backend/app/models/user.py b/backend/app/models/user.py index 9596fb0e4..924a5c414 100644 --- a/backend/app/models/user.py +++ b/backend/app/models/user.py @@ -1,6 +1,10 @@ +from datetime import datetime + from pydantic import EmailStr from sqlmodel import Field, SQLModel +from app.core.util import now + # Shared properties class UserBase(SQLModel): @@ -73,6 +77,19 @@ class User(UserBase, table=True): hashed_password: str = Field( sa_column_kwargs={"comment": "Bcrypt hash of the user's password"}, ) + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the user was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the user was last updated", + "onupdate": now, + }, + ) # Properties to return via API, id is always required