From d0de1ce2867b88a3cfae686e2bfff841c89d3e76 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Tue, 5 May 2026 11:34:06 +0530
Subject: [PATCH 1/8] v1.0 readiness database cleanups

---
 .../versions/055_v1_query_optimization.py     | 198 ++++++++++++++++++
 .../versions/056_drop_redundant_indexes.py    |  98 +++++++++
 backend/app/api/deps.py                       |   2 +-
 backend/app/core/security.py                  |   2 +-
 backend/app/crud/api_key.py                   |   5 +-
 backend/app/crud/assistants.py                |   5 +-
 backend/app/crud/auth.py                      |   2 +-
 .../crud/document/doc_transformation_job.py   |   4 +-
 backend/app/crud/document/document.py         |   7 +-
 backend/app/crud/fine_tuning.py               |  56 ++---
 backend/app/crud/model_evaluation.py          |   4 +-
 backend/app/crud/openai_conversation.py       |  11 +-
 backend/app/crud/thread_results.py            |  10 +-
 backend/app/models/__init__.py                |   4 +-
 backend/app/models/api_key.py                 |   6 -
 backend/app/models/assistants.py              |   6 -
 backend/app/models/document.py                |   4 -
 backend/app/models/fine_tuning.py             |  10 +-
 backend/app/models/model_evaluation.py        |   9 +-
 backend/app/models/openai_conversation.py     |   6 -
 backend/app/models/project.py                 |   4 +-
 backend/app/models/threads.py                 |   4 +-
 backend/app/seed_data/seed_data.json          |   2 -
 backend/app/seed_data/seed_data.py            |   2 -
 .../documents/test_route_document_remove.py   |   2 +-
 .../app/tests/api/routes/test_fine_tuning.py  |   4 +-
 backend/app/tests/api/routes/test_threads.py  |  14 +-
 backend/app/tests/core/test_security.py       |   3 +-
 .../collection/test_crud_collection_delete.py |   2 +-
 .../documents/test_crud_document_delete.py    |   2 +-
 .../documents/test_crud_document_read_many.py |   2 +-
 .../documents/test_crud_document_update.py    |   2 +-
 .../documents/test_doc_transformation_job.py  |   7 +-
 backend/app/tests/crud/test_api_key.py        |   4 +-
 backend/app/tests/crud/test_assistants.py     |   5 +-
 backend/app/tests/crud/test_fine_tuning.py    |   2 +-
 backend/app/tests/crud/test_onboarding.py     |   2 +-
 .../tests/crud/test_openai_conversation.py    |   4 +-
 backend/app/tests/seed_data/seed_data.json    |   2 -
 backend/app/tests/seed_data/seed_data.py      |   2 -
 backend/app/tests/utils/auth.py               |   2 +-
 backend/app/tests/utils/document.py           |   1 -
 backend/app/tests/utils/test_data.py          |   6 +-
 backend/app/tests/utils/utils.py              |   2 +-
 44 files changed, 390 insertions(+), 141 deletions(-)
 create mode 100644 backend/app/alembic/versions/055_v1_query_optimization.py
 create mode 100644 backend/app/alembic/versions/056_drop_redundant_indexes.py
diff --git a/backend/app/alembic/versions/055_v1_query_optimization.py b/backend/app/alembic/versions/055_v1_query_optimization.py
new file mode 100644
index 000000000..ce4821b70
--- /dev/null
+++ b/backend/app/alembic/versions/055_v1_query_optimization.py
@@ -0,0 +1,198 @@
+"""v1.0 query optimization: project_id + composite indexes, drop is_deleted
+
+Revision ID: 055
+Revises: 054
+Create Date: 2026-05-05 12:00:00.000000
+
+Bundles three coordinated changes for v1.0 lock:
+
+1. Single-column `project_id` btree indexes on every table-mapped model
+   that filters by project_id (the dominant tenant filter).
+   organization_id-only access is rare and intentionally deferred.
+   Tables already covered by a leading-column index are skipped:
+     - openai_assistant: UNIQUE(project_id, assistant_id) leads with project_id
+     - batch_job: ix_batch_job_project_id (migration 036)
+
+2. Composite + partial indexes for hot list/pagination paths matching:
+       WHERE project_id = ? [AND deleted_at IS NULL] ORDER BY <ts> DESC
+
+3. Drop the redundant `is_deleted` boolean from every table that also
+   carries `deleted_at`. `deleted_at IS NULL` becomes the single source
+   of truth for soft-delete: same query cost when paired with a partial
+   index, preserves audit timestamp, no dual-write drift.
+   Affected tables: openai_assistant, apikey, document,
+   openai_conversation, fine_tuning, model_evaluation.
+
+Execution model:
+  Phase A (transactional): backfill deleted_at where is_deleted was true
+  but deleted_at was never set, then drop the is_deleted columns.
+  Phase B (autocommit_block): CREATE INDEX CONCURRENTLY for every index
+  so no AccessExclusiveLock is taken on hot tables.
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+
+revision = "055"
+down_revision = "054"
+branch_labels = None
+depends_on = None
+
+
+# Tables that currently carry both `is_deleted` and `deleted_at`.
+IS_DELETED_TABLES = [
+    "openai_assistant",
+    "apikey",
+    "document",
+    "openai_conversation",
+    "fine_tuning",
+    "model_evaluation",
+]
+
+
+# Single-column FK / multi-tenant filter indexes (P0).
+# (table_name, column_name, index_name)
+FK_INDEXES: list[tuple[str, str, str]] = [
+    # project_id across tables that filter by tenant
+    ("apikey", "project_id", "ix_apikey_project_id"),
+    ("credential", "project_id", "ix_credential_project_id"),
+    ("collection", "project_id", "ix_collection_project_id"),
+    ("collection_jobs", "project_id", "ix_collection_jobs_project_id"),
+    ("document", "project_id", "ix_document_project_id"),
+    ("evaluation_dataset", "project_id", "ix_evaluation_dataset_project_id"),
+    ("evaluation_run", "project_id", "ix_evaluation_run_project_id"),
+    ("file", "project_id", "ix_file_project_id"),
+    ("fine_tuning", "project_id", "ix_fine_tuning_project_id"),
+    ("job", "project_id", "ix_job_project_id"),
+    ("llm_call", "project_id", "ix_llm_call_project_id"),
+    ("llm_chain", "project_id", "ix_llm_chain_project_id"),
+    ("model_evaluation", "project_id", "ix_model_evaluation_project_id"),
+    ("openai_conversation", "project_id", "ix_openai_conversation_project_id"),
+    ("stt_result", "project_id", "ix_stt_result_project_id"),
+    ("stt_sample", "project_id", "ix_stt_sample_project_id"),
+    ("tts_result", "project_id", "ix_tts_result_project_id"),
+    ("user_project", "project_id", "ix_user_project_project_id"),
+    # Other un-indexed FKs surfaced by the audit
+    ("apikey", "user_id", "ix_apikey_user_id"),
+    ("collection_jobs", "collection_id", "ix_collection_jobs_collection_id"),
+    (
+        "doc_transformation_job",
+        "source_document_id",
+        "ix_doc_transformation_job_source_document_id",
+    ),
+    (
+        "doc_transformation_job",
+        "transformed_document_id",
+        "ix_doc_transformation_job_transformed_document_id",
+    ),
+    ("evaluation_run", "dataset_id", "ix_evaluation_run_dataset_id"),
+]
+
+
+# Composite + partial indexes (P1). (index_name, body_after_INDEX_NAME)
+COMPOSITE_INDEXES: list[tuple[str, str]] = [
+    (
+        "ix_document_project_inserted_at_active",
+        'ON "document" ("project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL',
+    ),
+    (
+        "ix_openai_conversation_project_inserted_at_active",
+        'ON "openai_conversation" ("project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL',
+    ),
+    (
+        "ix_openai_conversation_ancestor_project_inserted_at_active",
+        'ON "openai_conversation" ("ancestor_response_id", "project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL',
+    ),
+    (
+        "ix_openai_conversation_response_project_active",
+        'ON "openai_conversation" ("response_id", "project_id") WHERE "deleted_at" IS NULL',
+    ),
+    (
+        "ix_collection_jobs_project_status_inserted_at",
+        'ON "collection_jobs" ("project_id", "status", "inserted_at" DESC)',
+    ),
+    (
+        "ix_evaluation_run_org_project_type_inserted_at",
+        'ON "evaluation_run" ("organization_id", "project_id", "type", "inserted_at" DESC)',
+    ),
+    (
+        "ix_evaluation_dataset_org_project_type_inserted_at",
+        'ON "evaluation_dataset" ("organization_id", "project_id", "type", "inserted_at" DESC)',
+    ),
+    (
+        "ix_llm_call_job_created_at_active",
+        'ON "llm_call" ("job_id", "created_at" DESC) WHERE "deleted_at" IS NULL',
+    ),
+    (
+        "ix_model_evaluation_document_project_updated_at",
+        'ON "model_evaluation" ("document_id", "project_id", "updated_at" DESC) WHERE "deleted_at" IS NULL',
+    ),
+    (
+        "ix_model_config_active_provider_name",
+        'ON "global"."model_config" ("is_active", "provider", "model_name")',
+    ),
+    (
+        "ix_collection_project_active",
+        'ON "collection" ("project_id") WHERE "deleted_at" IS NULL',
+    ),
+    # Composite FK indexes that match the actual query shape
+    (
+        "ix_fine_tuning_document_project",
+        'ON "fine_tuning" ("document_id", "project_id")',
+    ),
+    (
+        "ix_model_evaluation_fine_tuning_project",
+        'ON "model_evaluation" ("fine_tuning_id", "project_id")',
+    ),
+    # Partial index for active-key listing on apikey
+    (
+        "ix_apikey_project_active",
+        'ON "apikey" ("project_id") WHERE "deleted_at" IS NULL',
+    ),
+]
+
+
+def upgrade():
+    # Phase A (transactional): preserve audit timestamp, drop redundant column.
+    for table in IS_DELETED_TABLES:
+        op.execute(
+            f"UPDATE {table} "
+            f"SET deleted_at = NOW() "
+            f"WHERE is_deleted = TRUE AND deleted_at IS NULL"
+        )
+        op.drop_column(table, "is_deleted")
+
+    # Phase B (autocommit): CONCURRENTLY index creation. Each statement
+    # runs in its own implicit transaction, required by the CONCURRENTLY
+    # variant.
+    with op.get_context().autocommit_block():
+        for table, column, index in FK_INDEXES:
+            op.execute(
+                f'CREATE INDEX CONCURRENTLY IF NOT EXISTS "{index}" '
+                f'ON "{table}" ("{column}")'
+            )
+        for index, body in COMPOSITE_INDEXES:
+            op.execute(f'CREATE INDEX CONCURRENTLY IF NOT EXISTS "{index}" {body}')
+
+
+def downgrade():
+    with op.get_context().autocommit_block():
+        for index, body in COMPOSITE_INDEXES:
+            schema_qualified = '"global".' if '"global"."model_config"' in body else ""
+            op.execute(f'DROP INDEX CONCURRENTLY IF EXISTS {schema_qualified}"{index}"')
+        for _table, _column, index in FK_INDEXES:
+            op.execute(f'DROP INDEX CONCURRENTLY IF EXISTS "{index}"')
+
+    for table in IS_DELETED_TABLES:
+        op.add_column(
+            table,
+            sa.Column(
+                "is_deleted",
+                sa.Boolean(),
+                nullable=False,
+                server_default=sa.text("false"),
+                comment="Soft delete flag",
+            ),
+        )
+        op.execute(f"UPDATE {table} SET is_deleted = TRUE WHERE deleted_at IS NOT NULL")
diff --git a/backend/app/alembic/versions/056_drop_redundant_indexes.py b/backend/app/alembic/versions/056_drop_redundant_indexes.py
new file mode 100644
index 000000000..fbe76be1a
--- /dev/null
+++ b/backend/app/alembic/versions/056_drop_redundant_indexes.py
@@ -0,0 +1,98 @@
+"""drop redundant indexes superseded by 055 composites
+
+Revision ID: 056
+Revises: 055
+Create Date: 2026-05-05 14:00:00.000000
+
+Drops indexes that are now redundant after migration 055 added the
+real composite/partial indexes that match actual query shapes:
+
+  ix_project_name
+    Subsumed by uq_project_name_org_id (name is leading column).
+    No code path queries Project.name without organization_id.
+
+  ix_credential_provider
+    Subsumed by uq_credential_org_project_provider. All four CRUD
+    paths in crud/credentials.py filter (org, project, provider) — never
+    provider alone.
+
+  ix_openai_conversation_previous_response_id
+    Zero query consumers; previous_response_id is read but never
+    filtered on in any WHERE clause.
+
+  ix_openai_conversation_response_id
+    Superseded by ix_openai_conversation_response_project_active
+    (project-scoped partial), which exactly matches CRUD predicates
+    in crud/openai_conversation.py:get_conversation_by_response_id.
+
+  ix_openai_conversation_ancestor_response_id
+    Superseded by
+    ix_openai_conversation_ancestor_project_inserted_at_active, which
+    matches the (ancestor_response_id, project_id) + ORDER BY shape
+    used in crud/openai_conversation.py:get_conversation_by_ancestor_id
+    and the /responses thread reconstruction path.
+
+  idx_file_type
+    Low cardinality (4 enum values) and the only consumer in
+    crud/file.py:147 always pairs file_type with (organization_id,
+    project_id). idx_file_org_project covers the query; an extra
+    in-memory filter on file_type is cheaper than a second index hit.
+
+  idx_eval_run_status_org / idx_eval_run_status_project
+    Both lead with low-cardinality status. Real CRUD queries lead with
+    (organization_id, project_id, type), now covered by
+    ix_evaluation_run_org_project_type_inserted_at.
+
+Uses DROP INDEX CONCURRENTLY so no AccessExclusiveLock is taken.
+Downgrade recreates the original indexes (also concurrently) so the
+schema can be restored bit-for-bit if needed.
+"""
+
+from alembic import op
+
+
+revision = "056"
+down_revision = "055"
+branch_labels = None
+depends_on = None
+
+
+# (index_name, recreate_sql_body)
+# recreate_sql_body is "ON \"<table>\" (<columns>)" used by downgrade only.
+INDEXES_TO_DROP: list[tuple[str, str]] = [
+    ("ix_project_name", 'ON "project" ("name")'),
+    ("ix_credential_provider", 'ON "credential" ("provider")'),
+    (
+        "ix_openai_conversation_previous_response_id",
+        'ON "openai_conversation" ("previous_response_id")',
+    ),
+    (
+        "ix_openai_conversation_response_id",
+        'ON "openai_conversation" ("response_id")',
+    ),
+    (
+        "ix_openai_conversation_ancestor_response_id",
+        'ON "openai_conversation" ("ancestor_response_id")',
+    ),
+    ("idx_file_type", 'ON "file" ("file_type")'),
+    (
+        "idx_eval_run_status_org",
+        'ON "evaluation_run" ("status", "organization_id")',
+    ),
+    (
+        "idx_eval_run_status_project",
+        'ON "evaluation_run" ("status", "project_id")',
+    ),
+]
+
+
+def upgrade():
+    with op.get_context().autocommit_block():
+        for index_name, _body in INDEXES_TO_DROP:
+            op.execute(f'DROP INDEX CONCURRENTLY IF EXISTS "{index_name}"')
+
+
+def downgrade():
+    with op.get_context().autocommit_block():
+        for index_name, body in INDEXES_TO_DROP:
+            op.execute(f'CREATE INDEX CONCURRENTLY IF NOT EXISTS "{index_name}" {body}')
diff --git a/backend/app/api/deps.py b/backend/app/api/deps.py
index 526c5877b..eb7dd9d9a 100644
--- a/backend/app/api/deps.py
+++ b/backend/app/api/deps.py
@@ -117,7 +117,7 @@ def _authenticate_with_jwt(session: Session, token: str) -> AuthContext:
                     and_(
                         APIKey.user_id == user.id,
                         APIKey.project_id == project.id,
-                        APIKey.is_deleted.is_(False),
+                        APIKey.deleted_at.is_(None),
                     )
                 )
                 .limit(1)
diff --git a/backend/app/core/security.py b/backend/app/core/security.py
index ef2db7396..27481a256 100644
--- a/backend/app/core/security.py
+++ b/backend/app/core/security.py
@@ -317,7 +317,7 @@ def verify(cls, session: Session, raw_key: str) -> AuthContext | None:
                 .where(
                     and_(
                         APIKey.key_prefix == key_prefix,
-                        APIKey.is_deleted.is_(False),
+                        APIKey.deleted_at.is_(None),
                     )
                 )
                 .join(User, User.id == APIKey.user_id)
diff --git a/backend/app/crud/api_key.py b/backend/app/crud/api_key.py
index 374b496e9..23c0f583e 100644
--- a/backend/app/crud/api_key.py
+++ b/backend/app/crud/api_key.py
@@ -30,7 +30,7 @@ def read_one(self, key_id: UUID) -> APIKey | None:
             and_(
                 APIKey.id == key_id,
                 APIKey.project_id == self.project_id,
-                APIKey.is_deleted.is_(False),
+                APIKey.deleted_at.is_(None),
             )
         )
         return self.session.exec(statement).one_or_none()
@@ -44,7 +44,7 @@ def read_all(self, skip: int = 0, limit: int = 100) -> list[APIKey]:
             .where(
                 and_(
                     APIKey.project_id == self.project_id,
-                    APIKey.is_deleted.is_(False),
+                    APIKey.deleted_at.is_(None),
                 )
             )
             .offset(skip)
@@ -105,7 +105,6 @@ def delete(self, key_id: UUID) -> None:
         if not api_key:
             raise HTTPException(status_code=404, detail="API Key not found")
 
-        api_key.is_deleted = True
         api_key.deleted_at = now()
         api_key.updated_at = now()
         self.session.add(api_key)
diff --git a/backend/app/crud/assistants.py b/backend/app/crud/assistants.py
index c32f820da..950ef62b7 100644
--- a/backend/app/crud/assistants.py
+++ b/backend/app/crud/assistants.py
@@ -24,7 +24,7 @@ def get_assistant_by_id(
         and_(
             Assistant.assistant_id == assistant_id,
             Assistant.project_id == project_id,
-            Assistant.is_deleted == False,
+            Assistant.deleted_at.is_(None),
         )
     )
     return session.exec(statement).first()
@@ -43,7 +43,7 @@ def get_assistants_by_project(
         select(Assistant)
         .where(
             Assistant.project_id == project_id,
-            Assistant.is_deleted == False,
+            Assistant.deleted_at.is_(None),
         )
         .offset(skip)
         .limit(limit)
@@ -272,7 +272,6 @@ def delete_assistant(
         )
         raise HTTPException(status_code=404, detail="Assistant not found.")
 
-    existing_assistant.is_deleted = True
     existing_assistant.deleted_at = now()
     session.add(existing_assistant)
     session.commit()
diff --git a/backend/app/crud/auth.py b/backend/app/crud/auth.py
index 39147b86e..27e703fe9 100644
--- a/backend/app/crud/auth.py
+++ b/backend/app/crud/auth.py
@@ -41,7 +41,7 @@ def get_user_accessible_projects(*, session: Session, user_id: int) -> list[dict
         .where(
             and_(
                 APIKey.user_id == user_id,
-                APIKey.is_deleted.is_(False),
+                APIKey.deleted_at.is_(None),
                 Organization.is_active.is_(True),
                 Project.is_active.is_(True),
             )
diff --git a/backend/app/crud/document/doc_transformation_job.py b/backend/app/crud/document/doc_transformation_job.py
index 0fd278013..3e329df70 100644
--- a/backend/app/crud/document/doc_transformation_job.py
+++ b/backend/app/crud/document/doc_transformation_job.py
@@ -41,7 +41,7 @@ def read_one(self, job_id: UUID) -> DocTransformationJob:
                 and_(
                     DocTransformationJob.id == job_id,
                     Document.project_id == self.project_id,
-                    Document.is_deleted.is_(False),
+                    Document.deleted_at.is_(None),
                 )
             )
         )
@@ -62,7 +62,7 @@ def read_each(self, job_ids: set[UUID]) -> list[DocTransformationJob]:
                 and_(
                     DocTransformationJob.id.in_(list(job_ids)),
                     Document.project_id == self.project_id,
-                    Document.is_deleted.is_(False),
+                    Document.deleted_at.is_(None),
                 )
             )
         )
diff --git a/backend/app/crud/document/document.py b/backend/app/crud/document/document.py
index 35e4d86fb..fbadea255 100644
--- a/backend/app/crud/document/document.py
+++ b/backend/app/crud/document/document.py
@@ -20,7 +20,7 @@ def read_one(self, doc_id: UUID) -> Document:
             and_(
                 Document.id == doc_id,
                 Document.project_id == self.project_id,
-                Document.is_deleted.is_(False),
+                Document.deleted_at.is_(None),
             )
         )
 
@@ -39,7 +39,7 @@ def read_many(
         limit: int | None = None,
     ) -> tuple[list[Document], bool]:
         statement = select(Document).where(
-            and_(Document.project_id == self.project_id, Document.is_deleted.is_(False))
+            and_(Document.project_id == self.project_id, Document.deleted_at.is_(None))
         )
         statement = statement.order_by(Document.inserted_at.desc())
 
@@ -81,7 +81,7 @@ def read_each(self, doc_ids: list[UUID]):
             and_(
                 Document.project_id == self.project_id,
                 Document.id.in_(doc_ids),
-                Document.is_deleted.is_(False),
+                Document.deleted_at.is_(None),
             )
         )
         results = self.session.exec(statement).all()
@@ -130,7 +130,6 @@ def update(self, document: Document):
 
     def delete(self, doc_id: UUID):
         document = self.read_one(doc_id)
-        document.is_deleted = True
         document.deleted_at = now()
         document.updated_at = now()
 
diff --git a/backend/app/crud/fine_tuning.py b/backend/app/crud/fine_tuning.py
index 61a0ccc6a..8d9e5fed8 100644
--- a/backend/app/crud/fine_tuning.py
+++ b/backend/app/crud/fine_tuning.py
@@ -7,7 +7,7 @@
 
 from app.core.util import now
 from app.models import (
-    Fine_Tuning,
+    FineTuning,
     FineTuningJobCreate,
     FineTuningUpdate,
     FineTuningStatus,
@@ -24,7 +24,7 @@ def create_fine_tuning_job(
     status: FineTuningStatus = FineTuningStatus.pending,
     project_id: int = None,
     organization_id: int = None,
-) -> tuple[Fine_Tuning, bool]:
+) -> tuple[FineTuning, bool]:
     active_jobs = fetch_active_jobs_by_document_id(
         session=session,
         document_id=request.document_id,
@@ -53,7 +53,7 @@ def create_fine_tuning_job(
         "status": status,
     }
 
-    fine_tune = Fine_Tuning(**base_data)
+    fine_tune = FineTuning(**base_data)
     fine_tune.updated_at = now()
 
     session.add(fine_tune)
@@ -68,11 +68,11 @@ def create_fine_tuning_job(
 
 def fetch_by_provider_job_id(
     session: Session, provider_job_id: str, project_id: int
-) -> Fine_Tuning:
+) -> FineTuning:
     job = session.exec(
-        select(Fine_Tuning).where(
-            Fine_Tuning.provider_job_id == provider_job_id,
-            Fine_Tuning.project_id == project_id,
+        select(FineTuning).where(
+            FineTuning.provider_job_id == provider_job_id,
+            FineTuning.project_id == project_id,
         )
     ).one_or_none()
 
@@ -85,10 +85,10 @@ def fetch_by_provider_job_id(
     return job
 
 
-def fetch_by_id(session: Session, job_id: int, project_id: int) -> Fine_Tuning:
+def fetch_by_id(session: Session, job_id: int, project_id: int) -> FineTuning:
     job = session.exec(
-        select(Fine_Tuning).where(
-            Fine_Tuning.id == job_id, Fine_Tuning.project_id == project_id
+        select(FineTuning).where(
+            FineTuning.id == job_id, FineTuning.project_id == project_id
         )
     ).one_or_none()
 
@@ -110,15 +110,15 @@ def fetch_by_document_id(
     project_id: int,
     split_ratio: float = None,
     base_model: Optional[str] = None,
-) -> list[Fine_Tuning]:
-    query = select(Fine_Tuning).where(
-        Fine_Tuning.document_id == document_id, Fine_Tuning.project_id == project_id
+) -> list[FineTuning]:
+    query = select(FineTuning).where(
+        FineTuning.document_id == document_id, FineTuning.project_id == project_id
     )
 
     if split_ratio is not None:
-        query = query.where(Fine_Tuning.split_ratio == split_ratio)
+        query = query.where(FineTuning.split_ratio == split_ratio)
     if base_model is not None:
-        query = query.where(Fine_Tuning.base_model == base_model)
+        query = query.where(FineTuning.base_model == base_model)
 
     jobs = session.exec(query).all()
     logger.info(
@@ -134,39 +134,39 @@ def fetch_active_jobs_by_document_id(
     split_ratio: Optional[float] = None,
     base_model: Optional[str] = None,
     exclude_job_id: Optional[int] = None,
-) -> list["Fine_Tuning"]:
+) -> list["FineTuning"]:
     """
     Return all ACTIVE jobs for the given document & project.
-    Active = status != failed AND is_deleted is false.
+    Active = status != failed AND not soft-deleted.
     """
     stmt = (
-        select(Fine_Tuning)
+        select(FineTuning)
         .where(
-            Fine_Tuning.document_id == document_id,
-            Fine_Tuning.project_id == project_id,
-            Fine_Tuning.is_deleted.is_(False),
-            Fine_Tuning.status != FineTuningStatus.failed,
+            FineTuning.document_id == document_id,
+            FineTuning.project_id == project_id,
+            FineTuning.deleted_at.is_(None),
+            FineTuning.status != FineTuningStatus.failed,
         )
-        .order_by(Fine_Tuning.inserted_at.desc())
+        .order_by(FineTuning.inserted_at.desc())
     )
 
     if split_ratio is not None:
-        stmt = stmt.where(Fine_Tuning.split_ratio == split_ratio)
+        stmt = stmt.where(FineTuning.split_ratio == split_ratio)
 
     if base_model is not None:
-        stmt = stmt.where(Fine_Tuning.base_model == base_model)
+        stmt = stmt.where(FineTuning.base_model == base_model)
 
     if exclude_job_id is not None:
-        stmt = stmt.where(Fine_Tuning.id != exclude_job_id)
+        stmt = stmt.where(FineTuning.id != exclude_job_id)
 
     return session.exec(stmt).all()
 
 
 def update_finetune_job(
     session: Session,
-    job: Fine_Tuning,
+    job: FineTuning,
     update: FineTuningUpdate,
-) -> Fine_Tuning:
+) -> FineTuning:
     for key, value in update.model_dump(exclude_unset=True).items():
         setattr(job, key, value)
 
diff --git a/backend/app/crud/model_evaluation.py b/backend/app/crud/model_evaluation.py
index 51fa7a486..10cfbe667 100644
--- a/backend/app/crud/model_evaluation.py
+++ b/backend/app/crud/model_evaluation.py
@@ -154,14 +154,14 @@ def fetch_active_model_evals(
 ) -> list["ModelEvaluation"]:
     """
     Return all ACTIVE model evaluations for the given document & project.
-    Active = status != failed AND is_deleted is false.
+    Active = status != failed AND not soft-deleted.
     """
     stmt = (
         select(ModelEvaluation)
         .where(
             ModelEvaluation.fine_tuning_id == fine_tuning_id,
             ModelEvaluation.project_id == project_id,
-            ModelEvaluation.is_deleted.is_(False),
+            ModelEvaluation.deleted_at.is_(None),
             ModelEvaluation.status != "failed",
         )
         .order_by(ModelEvaluation.inserted_at.desc())
diff --git a/backend/app/crud/openai_conversation.py b/backend/app/crud/openai_conversation.py
index 7ef127b47..83f3316c4 100644
--- a/backend/app/crud/openai_conversation.py
+++ b/backend/app/crud/openai_conversation.py
@@ -17,7 +17,7 @@ def get_conversation_by_id(
     statement = select(OpenAIConversation).where(
         OpenAIConversation.id == conversation_id,
         OpenAIConversation.project_id == project_id,
-        OpenAIConversation.is_deleted == False,
+        OpenAIConversation.deleted_at.is_(None),
     )
     result = session.exec(statement).first()
     return result
@@ -32,7 +32,7 @@ def get_conversation_by_response_id(
     statement = select(OpenAIConversation).where(
         OpenAIConversation.response_id == response_id,
         OpenAIConversation.project_id == project_id,
-        OpenAIConversation.is_deleted == False,
+        OpenAIConversation.deleted_at.is_(None),
     )
     result = session.exec(statement).first()
     return result
@@ -49,7 +49,7 @@ def get_conversation_by_ancestor_id(
         .where(
             OpenAIConversation.ancestor_response_id == ancestor_response_id,
             OpenAIConversation.project_id == project_id,
-            OpenAIConversation.is_deleted == False,
+            OpenAIConversation.deleted_at.is_(None),
         )
         .order_by(OpenAIConversation.inserted_at.desc())
         .limit(1)
@@ -108,7 +108,7 @@ def get_conversations_count_by_project(
     """
     statement = select(func.count(OpenAIConversation.id)).where(
         OpenAIConversation.project_id == project_id,
-        OpenAIConversation.is_deleted == False,
+        OpenAIConversation.deleted_at.is_(None),
     )
     result = session.exec(statement).one()
     return result
@@ -127,7 +127,7 @@ def get_conversations_by_project(
         select(OpenAIConversation)
         .where(
             OpenAIConversation.project_id == project_id,
-            OpenAIConversation.is_deleted == False,
+            OpenAIConversation.deleted_at.is_(None),
         )
         .order_by(OpenAIConversation.inserted_at.desc())
         .offset(skip)
@@ -175,7 +175,6 @@ def delete_conversation(
     if not db_conversation:
         return None
 
-    db_conversation.is_deleted = True
     db_conversation.deleted_at = now()
     session.add(db_conversation)
     session.commit()
diff --git a/backend/app/crud/thread_results.py b/backend/app/crud/thread_results.py
index 7a2691ff2..3f9c05c5a 100644
--- a/backend/app/crud/thread_results.py
+++ b/backend/app/crud/thread_results.py
@@ -1,14 +1,14 @@
 import logging
 from sqlmodel import Session, select
 from datetime import datetime
-from app.models import OpenAIThreadCreate, OpenAI_Thread
+from app.models import OpenAIThreadCreate, OpenAIThread
 from app.utils import mask_string
 
 logger = logging.getLogger(__name__)
 
 
 def upsert_thread_result(session: Session, data: OpenAIThreadCreate):
-    statement = select(OpenAI_Thread).where(OpenAI_Thread.thread_id == data.thread_id)
+    statement = select(OpenAIThread).where(OpenAIThread.thread_id == data.thread_id)
     existing = session.exec(statement).first()
 
     if existing:
@@ -21,7 +21,7 @@ def upsert_thread_result(session: Session, data: OpenAIThreadCreate):
             f"[upsert_thread_result] Updated existing thread result in the db with ID: {mask_string(data.thread_id)}"
         )
     else:
-        new_thread = OpenAI_Thread(**data.dict())
+        new_thread = OpenAIThread(**data.dict())
         session.add(new_thread)
         logger.info(
             f"[upsert_thread_result] Created new thread result in the db with ID: {mask_string(new_thread.thread_id)}"
@@ -29,6 +29,6 @@ def upsert_thread_result(session: Session, data: OpenAIThreadCreate):
     session.commit()
 
 
-def get_thread_result(session: Session, thread_id: str) -> OpenAI_Thread | None:
-    statement = select(OpenAI_Thread).where(OpenAI_Thread.thread_id == thread_id)
+def get_thread_result(session: Session, thread_id: str) -> OpenAIThread | None:
+    statement = select(OpenAIThread).where(OpenAIThread.thread_id == thread_id)
     return session.exec(statement).first()
diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py
index 05f39032e..888c1c891 100644
--- a/backend/app/models/__init__.py
+++ b/backend/app/models/__init__.py
@@ -91,7 +91,7 @@
 )
 from .file import AudioUploadResponse, File, FilePublic, FileType
 from .fine_tuning import (
-    Fine_Tuning,
+    FineTuning,
     FineTuningJobBase,
     FineTuningJobCreate,
     FineTuningJobPublic,
@@ -161,7 +161,7 @@
     ResponsesAPIRequest,
     ResponsesSyncAPIRequest,
 )
-from .threads import OpenAI_Thread, OpenAIThreadBase, OpenAIThreadCreate
+from .threads import OpenAIThread, OpenAIThreadBase, OpenAIThreadCreate
 from .user import (
     NewPassword,
     UpdatePassword,
diff --git a/backend/app/models/api_key.py b/backend/app/models/api_key.py
index e8bd6c1b0..5780a8029 100644
--- a/backend/app/models/api_key.py
+++ b/backend/app/models/api_key.py
@@ -73,12 +73,6 @@ class APIKey(APIKeyBase, table=True):
         nullable=False,
         sa_column_kwargs={"comment": "Bcrypt hash of the secret of the API key"},
     )
-    is_deleted: bool = Field(
-        default=False,
-        nullable=False,
-        sa_column_kwargs={"comment": "Soft delete flag"},
-    )
-
     # Timestamps
     inserted_at: datetime = Field(
         default_factory=now,
diff --git a/backend/app/models/assistants.py b/backend/app/models/assistants.py
index bb9b3318b..bdd5c4293 100644
--- a/backend/app/models/assistants.py
+++ b/backend/app/models/assistants.py
@@ -74,12 +74,6 @@ class Assistant(AssistantBase, table=True):
             "comment": "Parameter that controls maximum number of results to return"
         },
     )
-    is_deleted: bool = Field(
-        default=False,
-        nullable=False,
-        sa_column_kwargs={"comment": "Soft delete flag"},
-    )
-
     # Foreign keys
     project_id: int = Field(
         foreign_key="project.id",
diff --git a/backend/app/models/document.py b/backend/app/models/document.py
index 12843e72a..c0aa6c8b6 100644
--- a/backend/app/models/document.py
+++ b/backend/app/models/document.py
@@ -37,10 +37,6 @@ class Document(DocumentBase, table=True):
     object_store_url: str = Field(
         sa_column_kwargs={"comment": "Cloud storage URL for the document"},
     )
-    is_deleted: bool = Field(
-        default=False,
-        sa_column_kwargs={"comment": "Soft delete flag"},
-    )
     file_size_kb: float | None = Field(
         default=None,
         description="The size of the document in kilobytes",
diff --git a/backend/app/models/fine_tuning.py b/backend/app/models/fine_tuning.py
index d16576fdb..8283caef8 100644
--- a/backend/app/models/fine_tuning.py
+++ b/backend/app/models/fine_tuning.py
@@ -52,9 +52,11 @@ def check_prompt(cls, v):
         return v.strip()
 
 
-class Fine_Tuning(FineTuningJobBase, table=True):
+class FineTuning(FineTuningJobBase, table=True):
     """Database model for tracking fine-tuning jobs."""
 
+    __tablename__ = "fine_tuning"
+
     id: int = Field(
         primary_key=True,
         sa_column_kwargs={"comment": "Unique identifier for the fine-tuning job"},
@@ -107,12 +109,6 @@ class Fine_Tuning(FineTuningJobBase, table=True):
         description="Error message for when something failed",
         sa_column_kwargs={"comment": "Error message if the job failed"},
     )
-    is_deleted: bool = Field(
-        default=False,
-        nullable=False,
-        sa_column_kwargs={"comment": "Soft delete flag"},
-    )
-
     # Foreign keys
     document_id: UUID = Field(
         foreign_key="document.id",
diff --git a/backend/app/models/model_evaluation.py b/backend/app/models/model_evaluation.py
index 3dbadb1b3..4ae80fc4c 100644
--- a/backend/app/models/model_evaluation.py
+++ b/backend/app/models/model_evaluation.py
@@ -89,13 +89,6 @@ class ModelEvaluation(ModelEvaluationBase, table=True):
         description="Error message if evaluation failed",
         sa_column_kwargs={"comment": "Error message if evaluation failed"},
     )
-    is_deleted: bool = Field(
-        default=False,
-        nullable=False,
-        description="Soft delete flag",
-        sa_column_kwargs={"comment": "Soft delete flag"},
-    )
-
     # Foreign keys
     fine_tuning_id: int = Field(
         foreign_key="fine_tuning.id",
@@ -140,7 +133,7 @@ class ModelEvaluation(ModelEvaluationBase, table=True):
 
     # Relationships
     project: Project = Relationship()
-    fine_tuning: "Fine_Tuning" = Relationship(back_populates="model_evaluation")
+    fine_tuning: "FineTuning" = Relationship(back_populates="model_evaluation")
 
 
 class ModelEvaluationUpdate(SQLModel):
diff --git a/backend/app/models/openai_conversation.py b/backend/app/models/openai_conversation.py
index c319f9de7..297c8fea0 100644
--- a/backend/app/models/openai_conversation.py
+++ b/backend/app/models/openai_conversation.py
@@ -91,12 +91,6 @@ class OpenAIConversation(OpenAIConversationBase, table=True):
         primary_key=True,
         sa_column_kwargs={"comment": "Unique identifier for the conversation record"},
     )
-    is_deleted: bool = Field(
-        default=False,
-        nullable=False,
-        sa_column_kwargs={"comment": "Soft delete flag"},
-    )
-
     # Timestamps
     inserted_at: datetime = Field(
         default_factory=now,
diff --git a/backend/app/models/project.py b/backend/app/models/project.py
index 66111d0cd..03b8d0f46 100644
--- a/backend/app/models/project.py
+++ b/backend/app/models/project.py
@@ -10,7 +10,7 @@
     from .assistants import Assistant
     from .collection import Collection
     from .credentials import Credential
-    from .fine_tuning import Fine_Tuning
+    from .fine_tuning import FineTuning
     from .openai_conversation import OpenAIConversation
     from .organization import Organization
 
@@ -99,7 +99,7 @@ class Project(ProjectBase, table=True):
     collections: list["Collection"] = Relationship(
         back_populates="project", cascade_delete=True
     )
-    fine_tuning: list["Fine_Tuning"] = Relationship(
+    fine_tuning: list["FineTuning"] = Relationship(
         back_populates="project", cascade_delete=True
     )
     openai_conversations: list["OpenAIConversation"] = Relationship(
diff --git a/backend/app/models/threads.py b/backend/app/models/threads.py
index 2753f72cb..d4dff77e9 100644
--- a/backend/app/models/threads.py
+++ b/backend/app/models/threads.py
@@ -17,9 +17,11 @@ class OpenAIThreadCreate(OpenAIThreadBase):
     pass  # Used for requests, no `id` or timestamps
 
 
-class OpenAI_Thread(OpenAIThreadBase, table=True):
+class OpenAIThread(OpenAIThreadBase, table=True):
     """Stores OpenAI thread interactions and their responses."""
 
+    __tablename__ = "openai_thread"
+
     id: int = Field(
         default=None,
         primary_key=True,
diff --git a/backend/app/seed_data/seed_data.json b/backend/app/seed_data/seed_data.json
index bfaa929f1..ef0cbcd4a 100644
--- a/backend/app/seed_data/seed_data.json
+++ b/backend/app/seed_data/seed_data.json
@@ -42,7 +42,6 @@
       "user_email": "{{SUPERUSER_EMAIL}}",
       "project_name": "Glific",
       "api_key": "ApiKey No3x47A5qoIGhm0kVKjQ77dhCqEdWRIQZlEPzzzh7i8",
-      "is_deleted": false,
       "deleted_at": null
     },
     {
@@ -50,7 +49,6 @@
       "user_email": "{{ADMIN_EMAIL}}",
       "project_name": "Dalgo",
       "api_key": "ApiKey Px8y47B6roJHin1lWLkR88eiDrFdXSJRZmFQazzai8j",
-      "is_deleted": false,
       "deleted_at": null
     }
   ]}
diff --git a/backend/app/seed_data/seed_data.py b/backend/app/seed_data/seed_data.py
index 8a50279e1..324cac795 100644
--- a/backend/app/seed_data/seed_data.py
+++ b/backend/app/seed_data/seed_data.py
@@ -42,7 +42,6 @@ class APIKeyData(BaseModel):
     project_name: str
     user_email: EmailStr
     api_key: str
-    is_deleted: bool
     deleted_at: Optional[str] = None
 
 
@@ -167,7 +166,6 @@ def create_api_key(session: Session, api_key_data_raw: dict[str, Any]) -> APIKey
             user_id=user.id,
             key_prefix=key_prefix,
             key_hash=key_hash,
-            is_deleted=api_key_data.is_deleted,
             deleted_at=api_key_data.deleted_at,
         )
         session.add(api_key)
diff --git a/backend/app/tests/api/routes/documents/test_route_document_remove.py b/backend/app/tests/api/routes/documents/test_route_document_remove.py
index 2519de0a0..b99bd5640 100644
--- a/backend/app/tests/api/routes/documents/test_route_document_remove.py
+++ b/backend/app/tests/api/routes/documents/test_route_document_remove.py
@@ -63,7 +63,7 @@ def test_item_is_soft_removed(
             statement = select(Document).where(Document.id == document.id)
             result = db.exec(statement).one()
 
-            assert result.is_deleted is True
+            assert result.deleted_at is not None
 
     @openai_responses.mock()
     @patch("app.api.routes.documents.get_openai_client")
diff --git a/backend/app/tests/api/routes/test_fine_tuning.py b/backend/app/tests/api/routes/test_fine_tuning.py
index 1696f4505..5654ce4f3 100644
--- a/backend/app/tests/api/routes/test_fine_tuning.py
+++ b/backend/app/tests/api/routes/test_fine_tuning.py
@@ -11,7 +11,7 @@
 from app.tests.utils.test_data import create_test_fine_tuning_jobs
 from app.tests.utils.utils import get_document
 from app.models import (
-    Fine_Tuning,
+    FineTuning,
     FineTuningStatus,
     ModelEvaluation,
     ModelEvaluationStatus,
@@ -105,7 +105,7 @@ def test_finetune_from_csv_multiple_split_ratio(
         # Verify that the background task was called for each split ratio
         assert mock_process_job.call_count == 3
 
-        jobs = db.query(Fine_Tuning).all()
+        jobs = db.query(FineTuning).all()
         assert len(jobs) == 3
 
         for job in jobs:
diff --git a/backend/app/tests/api/routes/test_threads.py b/backend/app/tests/api/routes/test_threads.py
index 9a1f297a6..56284de44 100644
--- a/backend/app/tests/api/routes/test_threads.py
+++ b/backend/app/tests/api/routes/test_threads.py
@@ -14,7 +14,7 @@
     handle_openai_error,
     poll_run_and_prepare_response,
 )
-from app.models import OpenAI_Thread
+from app.models import OpenAIThread
 from app.crud import get_thread_result
 from app.core.langfuse.langfuse import LangfuseTracer
 
@@ -457,8 +457,8 @@ def test_poll_run_and_prepare_response_openai_error_handling(
     poll_run_and_prepare_response(request, mock_client, db)
 
     # Since thread_id is not the primary key, use select query
-    statement = select(OpenAI_Thread).where(
-        OpenAI_Thread.thread_id == "test_openai_error"
+    statement = select(OpenAIThread).where(
+        OpenAIThread.thread_id == "test_openai_error"
     )
     result = db.exec(statement).first()
 
@@ -488,8 +488,8 @@ def test_poll_run_and_prepare_response_non_completed(
     poll_run_and_prepare_response(request, mock_client, db)
 
     # thread_id is not the primary key, so we query using SELECT
-    statement = select(OpenAI_Thread).where(
-        OpenAI_Thread.thread_id == "test_non_complete"
+    statement = select(OpenAIThread).where(
+        OpenAIThread.thread_id == "test_non_complete"
     )
     result = db.exec(statement).first()
 
@@ -537,7 +537,7 @@ def test_threads_result_endpoint_success(client, db, user_api_key_header):
     question = "Capital of France?"
     message = "Paris."
 
-    db.add(OpenAI_Thread(thread_id=thread_id, prompt=question, response=message))
+    db.add(OpenAIThread(thread_id=thread_id, prompt=question, response=message))
     db.commit()
 
     response = client.get(
@@ -557,7 +557,7 @@ def test_threads_result_endpoint_processing(client, db, user_api_key_header):
     thread_id = f"test_processing_{uuid.uuid4()}"
     question = "What is Glific?"
 
-    db.add(OpenAI_Thread(thread_id=thread_id, prompt=question, response=None))
+    db.add(OpenAIThread(thread_id=thread_id, prompt=question, response=None))
     db.commit()
 
     response = client.get(
diff --git a/backend/app/tests/core/test_security.py b/backend/app/tests/core/test_security.py
index 438bf8b05..daf4bc0bc 100644
--- a/backend/app/tests/core/test_security.py
+++ b/backend/app/tests/core/test_security.py
@@ -11,6 +11,7 @@
     get_encryption_key,
     APIKeyManager,
 )
+from app.core.util import now
 from app.models import APIKey, User, Organization, Project, AuthContext
 from app.tests.utils.test_data import create_test_api_key
 
@@ -157,7 +158,7 @@ def test_verify_deleted_key(self, db: Session):
         raw_key = api_key_response.key
 
         api_key = db.get(APIKey, api_key_response.id)
-        api_key.is_deleted = True
+        api_key.deleted_at = now()
         db.commit()
 
         auth_context = APIKeyManager.verify(db, raw_key)
diff --git a/backend/app/tests/crud/collections/collection/test_crud_collection_delete.py b/backend/app/tests/crud/collections/collection/test_crud_collection_delete.py
index 5cf4643d6..c0751fe72 100644
--- a/backend/app/tests/crud/collections/collection/test_crud_collection_delete.py
+++ b/backend/app/tests/crud/collections/collection/test_crud_collection_delete.py
@@ -69,7 +69,7 @@ def test_delete_document_deletes_collections(self, db: Session) -> None:
         documents = store.fill(1)
 
         stmt = select(APIKey).where(
-            APIKey.project_id == project.id, APIKey.is_deleted == False
+            APIKey.project_id == project.id, APIKey.deleted_at.is_(None)
         )
         api_key = db.exec(stmt).first()
 
diff --git a/backend/app/tests/crud/documents/documents/test_crud_document_delete.py b/backend/app/tests/crud/documents/documents/test_crud_document_delete.py
index 212c513ed..83da6e2b5 100644
--- a/backend/app/tests/crud/documents/documents/test_crud_document_delete.py
+++ b/backend/app/tests/crud/documents/documents/test_crud_document_delete.py
@@ -27,7 +27,7 @@ def test_delete_is_soft(self, document: Document) -> None:
         assert document is not None
 
     def test_delete_marks_deleted(self, document: Document) -> None:
-        assert document.is_deleted is True
+        assert document.deleted_at is not None
 
     def test_delete_follows_insert(self, document: Document) -> None:
         assert document.inserted_at <= document.deleted_at
diff --git a/backend/app/tests/crud/documents/documents/test_crud_document_read_many.py b/backend/app/tests/crud/documents/documents/test_crud_document_read_many.py
index a193f84bb..5b872a5d7 100644
--- a/backend/app/tests/crud/documents/documents/test_crud_document_read_many.py
+++ b/backend/app/tests/crud/documents/documents/test_crud_document_read_many.py
@@ -34,7 +34,7 @@ def test_deleted_docs_are_excluded(
     ) -> None:
         crud = DocumentCrud(db, store.project.id)
         docs, _ = crud.read_many()
-        assert all(x.is_deleted is False for x in docs)
+        assert all(x.deleted_at is None for x in docs)
 
     def test_skip_is_respected(
         self,
diff --git a/backend/app/tests/crud/documents/documents/test_crud_document_update.py b/backend/app/tests/crud/documents/documents/test_crud_document_update.py
index 1b63104d1..aa3a8b4f6 100644
--- a/backend/app/tests/crud/documents/documents/test_crud_document_update.py
+++ b/backend/app/tests/crud/documents/documents/test_crud_document_update.py
@@ -42,7 +42,7 @@ def test_insert_does_not_delete(
         crud = DocumentCrud(db, documents.project_id)
         document = crud.update(next(documents))
 
-        assert document.is_deleted is False
+        assert document.deleted_at is None
 
     def test_update_sets_default_owner(
         self,
diff --git a/backend/app/tests/crud/documents/test_doc_transformation_job.py b/backend/app/tests/crud/documents/test_doc_transformation_job.py
index b13104b79..f5f5ba74d 100644
--- a/backend/app/tests/crud/documents/test_doc_transformation_job.py
+++ b/backend/app/tests/crud/documents/test_doc_transformation_job.py
@@ -9,6 +9,7 @@
     DocTransformJobUpdate,
 )
 from app.core.exception_handlers import HTTPException
+from app.core.util import now
 from app.tests.utils.document import DocumentStore
 from app.tests.utils.utils import get_project, SequentialUuidGenerator
 from app.tests.utils.test_data import create_test_project
@@ -58,12 +59,12 @@ def test_cannot_create_job_with_deleted_document(
         because read filters out deleted documents.
         """
         document = store.put()
-        document.is_deleted = True
+        document.deleted_at = now()
         db.add(document)
         db.commit()
 
         job = crud.create(DocTransformJobCreate(source_document_id=document.id))
-        # read_one should 404 due to is_deleted=True on joined document
+        # read_one should 404 due to soft-deleted joined document
         with pytest.raises(HTTPException) as exc_info:
             crud.read_one(job.id)
         assert exc_info.value.status_code == 404
@@ -100,7 +101,7 @@ def test_cannot_read_job_with_deleted_document(
         document = store.put()
         job = crud.create(DocTransformJobCreate(source_document_id=document.id))
 
-        document.is_deleted = True
+        document.deleted_at = now()
         db.add(document)
         db.commit()
 
diff --git a/backend/app/tests/crud/test_api_key.py b/backend/app/tests/crud/test_api_key.py
index 028364172..e6ca9e76e 100644
--- a/backend/app/tests/crud/test_api_key.py
+++ b/backend/app/tests/crud/test_api_key.py
@@ -25,7 +25,6 @@ def test_create_api_key(db: Session) -> None:
     assert api_key.organization_id == project.organization_id
     assert api_key.key_prefix is not None
     assert api_key.key_hash is not None
-    assert api_key.is_deleted is False
     assert api_key.deleted_at is None
     assert raw_key is not None
     assert len(raw_key) > 0
@@ -216,7 +215,6 @@ def test_delete_api_key(db: Session) -> None:
 
     db_key = db.get(APIKey, api_key.id)
     assert db_key is not None
-    assert db_key.is_deleted is True
     assert db_key.deleted_at is not None
 
     retrieved_key = api_key_crud.read_one(key_id=api_key.id)
@@ -251,7 +249,7 @@ def test_delete_api_key_from_wrong_project(db: Session) -> None:
 
     db_key = db.get(APIKey, api_key.id)
     assert db_key is not None
-    assert db_key.is_deleted is False
+    assert db_key.deleted_at is None
 
 
 def test_delete_already_deleted_api_key(db: Session) -> None:
diff --git a/backend/app/tests/crud/test_assistants.py b/backend/app/tests/crud/test_assistants.py
index 12aa49944..c05b9aa28 100644
--- a/backend/app/tests/crud/test_assistants.py
+++ b/backend/app/tests/crud/test_assistants.py
@@ -376,7 +376,6 @@ def test_delete_assistant_success(self, db: Session) -> None:
 
         result = delete_assistant(db, assistant.assistant_id, assistant.project_id)
 
-        assert result.is_deleted is True
         assert result.deleted_at is not None
         with pytest.raises(ValueError) as exc_info:
             get_assistant(db, name=assistant.name)
@@ -402,7 +401,7 @@ def test_get_assistant_by_id_success(self, db: Session) -> None:
         assert result is not None
         assert result.assistant_id == assistant.assistant_id
         assert result.project_id == assistant.project_id
-        assert result.is_deleted is False
+        assert result.deleted_at is None
 
     def test_get_assistant_by_id_not_found(self, db: Session) -> None:
         """Returns None when assistant is not found"""
@@ -464,7 +463,7 @@ def test_get_assistants_by_project_success(
         assert assistant2.assistant_id in assistant_ids
         for assistant in result:
             assert assistant.project_id == project.id
-            assert assistant.is_deleted is False
+            assert assistant.deleted_at is None
 
     def test_get_assistants_by_project_empty(self, db: Session) -> None:
         """Returns empty list when project has no assistants"""
diff --git a/backend/app/tests/crud/test_fine_tuning.py b/backend/app/tests/crud/test_fine_tuning.py
index b128a4fc5..5ac8d7548 100644
--- a/backend/app/tests/crud/test_fine_tuning.py
+++ b/backend/app/tests/crud/test_fine_tuning.py
@@ -135,4 +135,4 @@ def test_fetch_active_jobs_by_document_id(db: Session) -> None:
     assert len(result) == 1
     assert result[0].id == active_job.id
     assert result[0].status == FineTuningStatus.running
-    assert result[0].is_deleted is False
+    assert result[0].deleted_at is None
diff --git a/backend/app/tests/crud/test_onboarding.py b/backend/app/tests/crud/test_onboarding.py
index b9514dbf0..13f652bfa 100644
--- a/backend/app/tests/crud/test_onboarding.py
+++ b/backend/app/tests/crud/test_onboarding.py
@@ -253,7 +253,7 @@ def test_onboard_project_api_key_generation(db: Session) -> None:
             APIKey.user_id == user.id,
             APIKey.project_id == project.id,
             APIKey.organization_id == org.id,
-            APIKey.is_deleted.is_(False),
+            APIKey.deleted_at.is_(None),
         )
     ).first()
     assert api_key_record is not None
diff --git a/backend/app/tests/crud/test_openai_conversation.py b/backend/app/tests/crud/test_openai_conversation.py
index 314238bc3..908f7adee 100644
--- a/backend/app/tests/crud/test_openai_conversation.py
+++ b/backend/app/tests/crud/test_openai_conversation.py
@@ -187,7 +187,7 @@ def test_get_conversations_by_project_success(db: Session) -> None:
     assert len(conversations) >= 3
     for conversation in conversations:
         assert conversation.project_id == project.id
-        assert conversation.is_deleted is False
+        assert conversation.deleted_at is None
 
 
 def test_get_conversations_by_project_with_pagination(db: Session) -> None:
@@ -253,7 +253,6 @@ def test_delete_conversation_success(db: Session) -> None:
 
     assert deleted_conversation is not None
     assert deleted_conversation.id == conversation.id
-    assert deleted_conversation.is_deleted is True
     assert deleted_conversation.deleted_at is not None
 
 
@@ -764,7 +763,6 @@ def test_response_id_validation_pattern(db: Session) -> None:
     assert conversation.assistant_id == conversation_data.assistant_id
     assert conversation.project_id == project.id
     assert conversation.organization_id == organization.id
-    assert conversation.is_deleted is False
     assert conversation.deleted_at is None
 
 
diff --git a/backend/app/tests/seed_data/seed_data.json b/backend/app/tests/seed_data/seed_data.json
index 9888cc846..20637e1f7 100644
--- a/backend/app/tests/seed_data/seed_data.json
+++ b/backend/app/tests/seed_data/seed_data.json
@@ -42,7 +42,6 @@
       "user_email": "{{SUPERUSER_EMAIL}}",
       "project_name": "Glific",
       "api_key": "ApiKey No3x47A5qoIGhm0kVKjQ77dhCqEdWRIQZlEPzzzh7i8",
-      "is_deleted": false,
       "deleted_at": null
     },
     {
@@ -50,7 +49,6 @@
       "user_email": "{{ADMIN_EMAIL}}",
       "project_name": "Dalgo",
       "api_key": "ApiKey Px8y47B6roJHin1lWLkR88eiDrFdXSJRZmFQazzai8j",
-      "is_deleted": false,
       "deleted_at": null
     }
   ],
diff --git a/backend/app/tests/seed_data/seed_data.py b/backend/app/tests/seed_data/seed_data.py
index 2db5420c1..01b7e56cd 100644
--- a/backend/app/tests/seed_data/seed_data.py
+++ b/backend/app/tests/seed_data/seed_data.py
@@ -48,7 +48,6 @@ class APIKeyData(BaseModel):
     project_name: str
     user_email: EmailStr
     api_key: str
-    is_deleted: bool
     deleted_at: Optional[str] = None
 
 
@@ -202,7 +201,6 @@ def create_api_key(session: Session, api_key_data_raw: dict[str, Any]) -> APIKey
             user_id=user.id,
             key_prefix=key_prefix,
             key_hash=key_hash,
-            is_deleted=api_key_data.is_deleted,
             deleted_at=api_key_data.deleted_at,
         )
         session.add(api_key)
diff --git a/backend/app/tests/utils/auth.py b/backend/app/tests/utils/auth.py
index 922ae8a3d..c4ecef130 100644
--- a/backend/app/tests/utils/auth.py
+++ b/backend/app/tests/utils/auth.py
@@ -66,7 +66,7 @@ def get_test_auth_context(
         select(APIKey)
         .where(APIKey.user_id == user.id)
         .where(APIKey.project_id == project.id)
-        .where(APIKey.is_deleted == False)
+        .where(APIKey.deleted_at.is_(None))
     ).first()
     if not api_key:
         raise ValueError(
diff --git a/backend/app/tests/utils/document.py b/backend/app/tests/utils/document.py
index bcb8b75fb..f868f3193 100644
--- a/backend/app/tests/utils/document.py
+++ b/backend/app/tests/utils/document.py
@@ -46,7 +46,6 @@ def __next__(self):
             project_id=self.project.id,
             fname=f"{doc_id}.xyz",
             object_store_url=object_store_url,
-            is_deleted=False,
         )
 
 
diff --git a/backend/app/tests/utils/test_data.py b/backend/app/tests/utils/test_data.py
index 9b144c2f4..5b1bb0565 100644
--- a/backend/app/tests/utils/test_data.py
+++ b/backend/app/tests/utils/test_data.py
@@ -10,7 +10,7 @@
     ConfigBlob,
     CredsCreate,
     FineTuningJobCreate,
-    Fine_Tuning,
+    FineTuning,
     ModelEvaluation,
     ModelEvaluationBase,
     ModelEvaluationStatus,
@@ -166,7 +166,7 @@ def create_test_credential(db: Session) -> tuple[list[Credential], Project]:
 def create_test_fine_tuning_jobs(
     db: Session,
     ratios: list[float],
-) -> tuple[list[Fine_Tuning], bool]:
+) -> tuple[list[FineTuning], bool]:
     project = get_project(db, "Dalgo")
     document = get_document(db, "dalgo_sample.json")
     jobs = []
@@ -196,7 +196,7 @@ def create_test_fine_tuning_jobs(
 def create_test_finetuning_job_with_extra_fields(
     db: Session,
     ratios: list[float],
-) -> tuple[list[Fine_Tuning], bool]:
+) -> tuple[list[FineTuning], bool]:
     jobs, _ = create_test_fine_tuning_jobs(db, ratios)
 
     if jobs:
diff --git a/backend/app/tests/utils/utils.py b/backend/app/tests/utils/utils.py
index ffffd5185..8bf7b1971 100644
--- a/backend/app/tests/utils/utils.py
+++ b/backend/app/tests/utils/utils.py
@@ -79,7 +79,7 @@ def get_assistant(
     If a assistant name is provided, fetch the active assistant with that name.
     If no name is provided, fetch any random assistant.
     """
-    filters = [Assistant.is_deleted == False]
+    filters = [Assistant.deleted_at.is_(None)]
 
     if project_id is not None:
         filters.append(Assistant.project_id == project_id)

From 367f258d0decd1ad1ae25b321c8ba8f395cf0d1d Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Wed, 6 May 2026 16:56:01 +0530
Subject: [PATCH 2/8] database cleanups next iteration

---
 ...reate_global_schema_and_languages_table.py |  2 +-
 .../057_add_updated_at_to_user_project.py     | 34 +++++++++
 .../versions/058_add_project_fk_to_job.py     | 51 ++++++++++++++
 .../059_rename_created_at_to_inserted_at.py   | 27 +++++++
 backend/app/api/routes/llm.py                 |  2 +-
 backend/app/api/routes/user_project.py        | 64 ++++++++++++++---
 backend/app/crud/llm.py                       |  2 +-
 backend/app/crud/model_evaluation.py          | 25 +++----
 backend/app/models/evaluation.py              |  4 +-
 backend/app/models/job.py                     |  5 +-
 backend/app/models/llm/request.py             |  2 +-
 backend/app/models/user_project.py            |  8 +++
 backend/app/tests/api/test_user_project.py    | 70 ++++++++++++++++++-
 .../app/tests/crud/test_model_evaluation.py   | 36 ++++++++++
 14 files changed, 299 insertions(+), 33 deletions(-)
 create mode 100644 backend/app/alembic/versions/057_add_updated_at_to_user_project.py
 create mode 100644 backend/app/alembic/versions/058_add_project_fk_to_job.py
 create mode 100644 backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py

diff --git a/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py b/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py
index f25807380..6fee39066 100644
--- a/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py
+++ b/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py
@@ -25,7 +25,7 @@ def upgrade():
         "languages",
         sa.Column(
             "id",
-            sa.BigInteger(),
+            sa.Integer(),
             sa.Identity(always=False),
             primary_key=True,
             comment="Unique identifier for the language",
diff --git a/backend/app/alembic/versions/057_add_updated_at_to_user_project.py b/backend/app/alembic/versions/057_add_updated_at_to_user_project.py
new file mode 100644
index 000000000..f2d172248
--- /dev/null
+++ b/backend/app/alembic/versions/057_add_updated_at_to_user_project.py
@@ -0,0 +1,34 @@
+"""add updated_at to user_project
+
+Revision ID: 057
+Revises: 056
+Create Date: 2026-05-06 12:00:00.000000
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "057"
+down_revision = "056"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.add_column(
+        "user_project",
+        sa.Column(
+            "updated_at",
+            sa.DateTime(),
+            nullable=False,
+            server_default=sa.text("NOW()"),
+            comment="Timestamp when the mapping was last updated",
+        ),
+    )
+    op.alter_column("user_project", "updated_at", server_default=None)
+
+
+def downgrade():
+    op.drop_column("user_project", "updated_at")
diff --git a/backend/app/alembic/versions/058_add_project_fk_to_job.py b/backend/app/alembic/versions/058_add_project_fk_to_job.py
new file mode 100644
index 000000000..2c2d622bd
--- /dev/null
+++ b/backend/app/alembic/versions/058_add_project_fk_to_job.py
@@ -0,0 +1,51 @@
+"""add project_id foreign key to job table
+
+Revision ID: 058
+Revises: 057
+Create Date: 2026-05-06 13:00:00.000000
+
+Migration 051 added job.project_id as a plain Integer with no foreign key
+constraint, leaving the column without referential integrity. This migration:
+
+  1. Backfills orphan rows: any job.project_id that doesn't match a real
+     project.id is set to NULL (the column is nullable). This preserves
+     historical job records whose project was deleted before the FK existed.
+     Switch the cleanup to a DELETE if you'd rather discard orphans
+     retroactively under CASCADE semantics.
+
+  2. Adds the foreign key constraint with ON DELETE CASCADE, matching the
+     pattern used by every other project_id FK in the schema.
+
+The supporting index (ix_job_project_id) is created by migration 055.
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "058"
+down_revision = "057"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.execute(
+        """
+        UPDATE job
+        SET project_id = NULL
+        WHERE project_id IS NOT NULL
+          AND project_id NOT IN (SELECT id FROM project)
+        """
+    )
+    op.create_foreign_key(
+        "job_project_id_fkey",
+        "job",
+        "project",
+        ["project_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+
+def downgrade():
+    op.drop_constraint("job_project_id_fkey", "job", type_="foreignkey")
diff --git a/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py b/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py
new file mode 100644
index 000000000..5de7f2a9f
--- /dev/null
+++ b/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py
@@ -0,0 +1,27 @@
+"""rename created_at to inserted_at on job and llm_call
+
+Revision ID: 059
+Revises: 058
+Create Date: 2026-05-06 14:00:00.000000
+
+Aligns `job` and `llm_call` with the rest of the schema (51 other tables
+use `inserted_at`). Pure rename — no type or default change.
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "059"
+down_revision = "058"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.alter_column("job", "created_at", new_column_name="inserted_at")
+    op.alter_column("llm_call", "created_at", new_column_name="inserted_at")
+
+
+def downgrade():
+    op.alter_column("llm_call", "inserted_at", new_column_name="created_at")
+    op.alter_column("job", "inserted_at", new_column_name="created_at")
diff --git a/backend/app/api/routes/llm.py b/backend/app/api/routes/llm.py
index 62d4dbb04..d220bc244 100644
--- a/backend/app/api/routes/llm.py
+++ b/backend/app/api/routes/llm.py
@@ -101,7 +101,7 @@ def llm_call(
             job_id=job.id,
             status=job.status.value,
             message=message,
-            job_inserted_at=job.created_at,
+            job_inserted_at=job.inserted_at,
             job_updated_at=job.updated_at,
         )
 
diff --git a/backend/app/api/routes/user_project.py b/backend/app/api/routes/user_project.py
index 3da8afdca..7904666ad 100644
--- a/backend/app/api/routes/user_project.py
+++ b/backend/app/api/routes/user_project.py
@@ -1,21 +1,25 @@
 import logging
+import secrets
 from typing import Any
 
 from fastapi import APIRouter, Depends, HTTPException, status
+from sqlmodel import select
 
 from app.api.deps import AuthContextDep, SessionDep
 from app.api.permissions import Permission, require_permission
 from app.core.config import settings
+from app.core.security import get_password_hash
 from app.crud.organization import get_organization_by_id, validate_organization
 from app.crud.project import get_project_by_id, validate_project
 from app.crud.user_project import (
-    add_user_to_project,
     get_users_by_project,
     remove_user_from_project,
 )
 from app.models import (
     AddUsersToProjectRequest,
     Message,
+    User,
+    UserProject,
     UserProjectPublic,
 )
 from app.services.auth import generate_invite_token
@@ -62,21 +66,59 @@ def add_project_users(
     validate_organization(session=session, org_id=body.organization_id)
     validate_project(session=session, project_id=body.project_id)
 
-    same_project_emails = []
-    different_project_emails = []
+    emails = [str(entry.email) for entry in body.users]
+
+    existing_users = session.exec(select(User).where(User.email.in_(emails))).all()
+    users_by_email: dict[str, User] = {u.email: u for u in existing_users}
+
+    if existing_users:
+        existing_memberships = session.exec(
+            select(UserProject).where(
+                UserProject.user_id.in_([u.id for u in existing_users])
+            )
+        ).all()
+    else:
+        existing_memberships = []
+    memberships_by_user: dict[int, UserProject] = {
+        m.user_id: m for m in existing_memberships
+    }
+
+    same_project_emails: list[str] = []
+    different_project_emails: list[str] = []
 
     for entry in body.users:
-        _, add_status = add_user_to_project(
-            session=session,
-            email=str(entry.email),
+        email = str(entry.email)
+        user = users_by_email.get(email)
+
+        if user is None:
+            user = User(
+                email=email,
+                full_name=entry.full_name,
+                is_active=False,
+                hashed_password=get_password_hash(secrets.token_urlsafe(16)),
+            )
+            session.add(user)
+            session.flush()
+            users_by_email[email] = user
+        elif entry.full_name and not user.full_name:
+            user.full_name = entry.full_name
+
+        membership = memberships_by_user.get(user.id)
+        if membership is not None:
+            if membership.project_id == body.project_id:
+                same_project_emails.append(email)
+            else:
+                different_project_emails.append(email)
+            continue
+
+        new_membership = UserProject(
+            user_id=user.id,
             organization_id=body.organization_id,
             project_id=body.project_id,
-            full_name=entry.full_name,
         )
-        if add_status == "same_project":
-            same_project_emails.append(str(entry.email))
-        elif add_status == "different_project":
-            different_project_emails.append(str(entry.email))
+        session.add(new_membership)
+        session.flush()
+        memberships_by_user[user.id] = new_membership
 
     if same_project_emails or different_project_emails:
         session.rollback()
diff --git a/backend/app/crud/llm.py b/backend/app/crud/llm.py
index c7f5b1aee..4aa14bf41 100644
--- a/backend/app/crud/llm.py
+++ b/backend/app/crud/llm.py
@@ -244,7 +244,7 @@ def get_llm_calls_by_job_id(
             LlmCall.project_id == project_id,
             LlmCall.deleted_at.is_(None),
         )
-        .order_by(LlmCall.created_at.desc())
+        .order_by(LlmCall.inserted_at.desc())
     )
 
     return list(session.exec(statement).all())
diff --git a/backend/app/crud/model_evaluation.py b/backend/app/crud/model_evaluation.py
index 10cfbe667..ddc38ba7b 100644
--- a/backend/app/crud/model_evaluation.py
+++ b/backend/app/crud/model_evaluation.py
@@ -2,6 +2,7 @@
 from uuid import UUID
 
 from fastapi import HTTPException
+from sqlalchemy import Float, cast
 from sqlmodel import Session, select
 
 from app.crud import fetch_by_id
@@ -112,28 +113,24 @@ def fetch_eval_by_doc_id(
 def fetch_top_model_by_doc_id(
     session: Session, document_id: UUID, project_id: int
 ) -> ModelEvaluation:
-    query = (
+    mcc_expr = cast(ModelEvaluation.score["mcc_score"].astext, Float)
+
+    stmt = (
         select(ModelEvaluation)
         .where(
             ModelEvaluation.document_id == document_id,
             ModelEvaluation.project_id == project_id,
+            ModelEvaluation.deleted_at.is_(None),
+            ModelEvaluation.score.is_not(None),
+            mcc_expr.is_not(None),
         )
-        .order_by(ModelEvaluation.updated_at.desc())
+        .order_by(mcc_expr.desc())
+        .limit(1)
     )
 
-    model_evals = session.exec(query).all()
-
-    top_model = None
-    highest_mcc = -float("inf")
-
-    for model_eval in model_evals:
-        if model_eval.score is not None:
-            mcc = model_eval.score.get("mcc_score", None)
-            if mcc is not None and mcc > highest_mcc:
-                highest_mcc = mcc
-                top_model = model_eval
+    top_model = session.exec(stmt).first()
 
-    if not top_model:
+    if top_model is None:
         logger.error(
             f"[fetch_top_model_by_doc_id]No model evaluation found with populated score for document_id={document_id}, project_id={project_id}"
         )
diff --git a/backend/app/models/evaluation.py b/backend/app/models/evaluation.py
index c9130d3c3..67834cc8d 100644
--- a/backend/app/models/evaluation.py
+++ b/backend/app/models/evaluation.py
@@ -348,13 +348,13 @@ class EvaluationRun(SQLModel, table=True):
     )
 
     # Timestamps
-    inserted_at: datetime = Field(
+    inserted_at: datetime = SQLField(
         default_factory=now,
         nullable=False,
         description="The timestamp when the evaluation run was started",
         sa_column_kwargs={"comment": "Timestamp when the evaluation run was started"},
     )
-    updated_at: datetime = Field(
+    updated_at: datetime = SQLField(
         default_factory=now,
         nullable=False,
         description="The timestamp when the evaluation run was last updated",
diff --git a/backend/app/models/job.py b/backend/app/models/job.py
index f7f61277b..3ea4d8650 100644
--- a/backend/app/models/job.py
+++ b/backend/app/models/job.py
@@ -42,6 +42,9 @@ class Job(SQLModel, table=True):
     )
     project_id: int | None = Field(
         default=None,
+        foreign_key="project.id",
+        ondelete="CASCADE",
+        index=True,
         description="Project ID of the project the job belongs to.",
         sa_column_kwargs={"comment": "Project ID of the job's project"},
     )
@@ -65,7 +68,7 @@ class Job(SQLModel, table=True):
     )
 
     # Timestamps
-    created_at: datetime = Field(
+    inserted_at: datetime = Field(
         default_factory=now,
         sa_column_kwargs={"comment": "Timestamp when the job was created"},
     )
diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py
index a5c337a44..555d7c68c 100644
--- a/backend/app/models/llm/request.py
+++ b/backend/app/models/llm/request.py
@@ -597,7 +597,7 @@ class LlmCall(SQLModel, table=True):
     )
 
     # Timestamps
-    created_at: datetime = Field(
+    inserted_at: datetime = Field(
         default_factory=now,
         nullable=False,
         sa_column_kwargs={"comment": "Timestamp when the LLM call was created"},
diff --git a/backend/app/models/user_project.py b/backend/app/models/user_project.py
index c231c6d0f..2361cecfb 100644
--- a/backend/app/models/user_project.py
+++ b/backend/app/models/user_project.py
@@ -47,6 +47,14 @@ class UserProject(UserProjectBase, table=True):
         nullable=False,
         sa_column_kwargs={"comment": "Timestamp when the mapping was created"},
     )
+    updated_at: datetime = Field(
+        default_factory=now,
+        nullable=False,
+        sa_column_kwargs={
+            "comment": "Timestamp when the mapping was last updated",
+            "onupdate": now,
+        },
+    )
 
 
 class UserEntry(SQLModel):
diff --git a/backend/app/tests/api/test_user_project.py b/backend/app/tests/api/test_user_project.py
index dd4f6cceb..94c0ae080 100644
--- a/backend/app/tests/api/test_user_project.py
+++ b/backend/app/tests/api/test_user_project.py
@@ -1,7 +1,7 @@
 from unittest.mock import patch
 
 from fastapi.testclient import TestClient
-from sqlmodel import Session
+from sqlmodel import Session, select
 
 from app.core.config import settings
 from app.crud.user_project import add_user_to_project
@@ -198,6 +198,74 @@ def test_add_user_different_project_returns_409(
         assert resp.status_code == 409
         assert "Already assigned to another project" in resp.json()["error"]
 
+    def test_add_bulk_surfaces_all_same_project_conflicts(
+        self,
+        db: Session,
+        client: TestClient,
+        superuser_token_headers: dict[str, str],
+    ):
+        """All emails already on the project should appear in the 409 error."""
+        project = create_test_project(db)
+        email_a = random_email()
+        email_b = random_email()
+        for email in (email_a, email_b):
+            add_user_to_project(
+                session=db,
+                email=email,
+                organization_id=project.organization_id,
+                project_id=project.id,
+            )
+        db.commit()
+
+        resp = client.post(
+            f"{USER_PROJECTS_URL}/",
+            json={
+                "organization_id": project.organization_id,
+                "project_id": project.id,
+                "users": [{"email": email_a}, {"email": email_b}],
+            },
+            headers=superuser_token_headers,
+        )
+        assert resp.status_code == 409
+        body = resp.json()["error"]
+        assert "Already added to this project" in body
+        assert email_a in body
+        assert email_b in body
+
+    def test_add_duplicate_email_in_same_request_rolls_back(
+        self,
+        db: Session,
+        client: TestClient,
+        superuser_token_headers: dict[str, str],
+    ):
+        """Submitting the same email twice in one request rolls back the whole batch.
+
+        Pins current behaviour: the second occurrence is detected as a
+        same-project conflict because the first occurrence was just added.
+        """
+        project = create_test_project(db)
+        project_id = project.id
+        organization_id = project.organization_id
+        email = random_email()
+
+        resp = client.post(
+            f"{USER_PROJECTS_URL}/",
+            json={
+                "organization_id": organization_id,
+                "project_id": project_id,
+                "users": [{"email": email}, {"email": email}],
+            },
+            headers=superuser_token_headers,
+        )
+        assert resp.status_code == 409
+        assert "Already added to this project" in resp.json()["error"]
+
+        # Confirm rollback: no UserProject row was persisted.
+        rows = db.exec(
+            select(UserProject).where(UserProject.project_id == project_id)
+        ).all()
+        assert rows == []
+
 
 class TestDeleteProjectUser:
     """Test suite for DELETE /user-projects/{user_id}"""
diff --git a/backend/app/tests/crud/test_model_evaluation.py b/backend/app/tests/crud/test_model_evaluation.py
index 12d190fdd..d4f47e0fd 100644
--- a/backend/app/tests/crud/test_model_evaluation.py
+++ b/backend/app/tests/crud/test_model_evaluation.py
@@ -4,6 +4,7 @@
 from sqlmodel import Session
 from fastapi import HTTPException
 
+from app.core.util import now
 from app.tests.utils.utils import get_project, get_non_existent_id
 from app.tests.utils.test_data import (
     create_test_model_evaluation,
@@ -110,6 +111,41 @@ def test_fetch_top_model_by_doc_id_not_found(db: Session) -> None:
     assert exc.value.status_code == 404
 
 
+def test_fetch_top_model_by_doc_id_picks_highest_mcc(db: Session) -> None:
+    model_evals = create_test_model_evaluation(db)
+    assert len(model_evals) >= 2
+    assert model_evals[0].document_id == model_evals[1].document_id
+
+    model_evals[0].score = {"mcc_score": 0.5}
+    model_evals[1].score = {"mcc_score": 0.9}
+    db.flush()
+
+    result = fetch_top_model_by_doc_id(
+        db,
+        document_id=model_evals[0].document_id,
+        project_id=model_evals[0].project_id,
+    )
+    assert result.id == model_evals[1].id
+
+
+def test_fetch_top_model_by_doc_id_excludes_soft_deleted(db: Session) -> None:
+    model_evals = create_test_model_evaluation(db)
+    assert len(model_evals) >= 2
+    assert model_evals[0].document_id == model_evals[1].document_id
+
+    model_evals[0].score = {"mcc_score": 0.5}
+    model_evals[1].score = {"mcc_score": 0.9}
+    model_evals[1].deleted_at = now()
+    db.flush()
+
+    result = fetch_top_model_by_doc_id(
+        db,
+        document_id=model_evals[0].document_id,
+        project_id=model_evals[0].project_id,
+    )
+    assert result.id == model_evals[0].id
+
+
 def test_fetch_active_model_evals(db: Session) -> None:
     model_evals = create_test_model_evaluation(db)
     active_evals = fetch_active_model_evals(

From 65239106a4e933b4ceb07c3ec1eaa8b4bfe60d21 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Thu, 7 May 2026 08:04:51 +0530
Subject: [PATCH 3/8] added unique constraint

---
 ...dd_unique_constraint_documentcollection.py | 44 +++++++++++++++++++
 backend/app/models/document_collection.py     |  6 ++-
 2 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py

diff --git a/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py b/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py
new file mode 100644
index 000000000..90c0d085b
--- /dev/null
+++ b/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py
@@ -0,0 +1,44 @@
+"""add unique constraint to documentcollection
+
+Revision ID: 060
+Revises: 059
+Create Date: 2026-05-07 12:00:00.000000
+
+The `documentcollection` junction table never had a uniqueness constraint
+on (document_id, collection_id), so the same document could be linked to
+the same collection multiple times. This migration:
+
+  1. Removes any existing duplicate rows, keeping the row with the lowest
+     `id` for each (document_id, collection_id) pair.
+  2. Adds the unique constraint going forward.
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "060"
+down_revision = "059"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.execute(
+        """
+        DELETE FROM documentcollection
+        WHERE id NOT IN (
+            SELECT MIN(id)
+            FROM documentcollection
+            GROUP BY document_id, collection_id
+        )
+        """
+    )
+    op.create_unique_constraint(
+        "uq_document_collection",
+        "documentcollection",
+        ["document_id", "collection_id"],
+    )
+
+
+def downgrade():
+    op.drop_constraint("uq_document_collection", "documentcollection", type_="unique")
diff --git a/backend/app/models/document_collection.py b/backend/app/models/document_collection.py
index 93db6df31..05329e563 100644
--- a/backend/app/models/document_collection.py
+++ b/backend/app/models/document_collection.py
@@ -1,11 +1,15 @@
 from uuid import UUID
 
-from sqlmodel import Field, SQLModel
+from sqlmodel import Field, SQLModel, UniqueConstraint
 
 
 class DocumentCollection(SQLModel, table=True):
     """Junction table linking documents to collections."""
 
+    __table_args__ = (
+        UniqueConstraint("document_id", "collection_id", name="uq_document_collection"),
+    )
+
     id: int | None = Field(
         default=None,
         primary_key=True,

From 47329e3f4a8644532bee990c16e62d3639b8ba46 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Thu, 7 May 2026 08:19:27 +0530
Subject: [PATCH 4/8] added migration

---
 .../061_align_languages_id_to_integer.py      | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 backend/app/alembic/versions/061_align_languages_id_to_integer.py

diff --git a/backend/app/alembic/versions/061_align_languages_id_to_integer.py b/backend/app/alembic/versions/061_align_languages_id_to_integer.py
new file mode 100644
index 000000000..84dddd713
--- /dev/null
+++ b/backend/app/alembic/versions/061_align_languages_id_to_integer.py
@@ -0,0 +1,34 @@
+"""align global.languages.id to INTEGER
+
+Revision ID: 061
+Revises: 060
+Create Date: 2026-05-07 13:00:00.000000
+
+Migration 043 originally created `global.languages.id` as BIGINT, but every
+FK column referencing it (evaluation_dataset, evaluation_run, stt_sample) is
+INTEGER. Migration 043's source has been edited to use INTEGER for fresh
+setups; this migration aligns already-deployed databases.
+
+The underlying IDENTITY sequence stays BIGINT (PostgreSQL doesn't change it
+on ALTER COLUMN TYPE). This is harmless — values would have to exceed
+2^31 - 1 to cause an INSERT failure, and the table holds ~13 seeded rows.
+
+Languages table is small (≤100 rows in practice), so the AccessExclusiveLock
+taken by ALTER COLUMN TYPE is sub-second.
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "061"
+down_revision = "060"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE INTEGER")
+
+
+def downgrade():
+    op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE BIGINT")

From bb2a56ab21073723cbd7b3d7c3b1127bcd1eba49 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Thu, 7 May 2026 09:55:10 +0530
Subject: [PATCH 5/8] migration cleanups

---
 .../057_add_updated_at_to_user_project.py     |  34 ------
 ...zation.py => 057_v1_query_optimization.py} |   8 +-
 .../versions/058_add_project_fk_to_job.py     |  51 ---------
 ...dexes.py => 058_drop_redundant_indexes.py} |  12 +-
 .../059_rename_created_at_to_inserted_at.py   |  27 -----
 .../versions/059_v1_assorted_cleanups.py      | 104 ++++++++++++++++++
 ...dd_unique_constraint_documentcollection.py |  44 --------
 .../061_align_languages_id_to_integer.py      |  34 ------
 8 files changed, 114 insertions(+), 200 deletions(-)
 delete mode 100644 backend/app/alembic/versions/057_add_updated_at_to_user_project.py
 rename backend/app/alembic/versions/{055_v1_query_optimization.py => 057_v1_query_optimization.py} (99%)
 delete mode 100644 backend/app/alembic/versions/058_add_project_fk_to_job.py
 rename backend/app/alembic/versions/{056_drop_redundant_indexes.py => 058_drop_redundant_indexes.py} (94%)
 delete mode 100644 backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py
 create mode 100644 backend/app/alembic/versions/059_v1_assorted_cleanups.py
 delete mode 100644 backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py
 delete mode 100644 backend/app/alembic/versions/061_align_languages_id_to_integer.py

diff --git a/backend/app/alembic/versions/057_add_updated_at_to_user_project.py b/backend/app/alembic/versions/057_add_updated_at_to_user_project.py
deleted file mode 100644
index f2d172248..000000000
--- a/backend/app/alembic/versions/057_add_updated_at_to_user_project.py
+++ /dev/null
@@ -1,34 +0,0 @@
-"""add updated_at to user_project
-
-Revision ID: 057
-Revises: 056
-Create Date: 2026-05-06 12:00:00.000000
-
-"""
-
-import sqlalchemy as sa
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "057"
-down_revision = "056"
-branch_labels = None
-depends_on = None
-
-
-def upgrade():
-    op.add_column(
-        "user_project",
-        sa.Column(
-            "updated_at",
-            sa.DateTime(),
-            nullable=False,
-            server_default=sa.text("NOW()"),
-            comment="Timestamp when the mapping was last updated",
-        ),
-    )
-    op.alter_column("user_project", "updated_at", server_default=None)
-
-
-def downgrade():
-    op.drop_column("user_project", "updated_at")
diff --git a/backend/app/alembic/versions/055_v1_query_optimization.py b/backend/app/alembic/versions/057_v1_query_optimization.py
similarity index 99%
rename from backend/app/alembic/versions/055_v1_query_optimization.py
rename to backend/app/alembic/versions/057_v1_query_optimization.py
index ce4821b70..cce91f3de 100644
--- a/backend/app/alembic/versions/055_v1_query_optimization.py
+++ b/backend/app/alembic/versions/057_v1_query_optimization.py
@@ -1,7 +1,7 @@
 """v1.0 query optimization: project_id + composite indexes, drop is_deleted
 
-Revision ID: 055
-Revises: 054
+Revision ID: 057
+Revises: 056
 Create Date: 2026-05-05 12:00:00.000000
 
 Bundles three coordinated changes for v1.0 lock:
@@ -34,8 +34,8 @@
 from alembic import op
 
 
-revision = "055"
-down_revision = "054"
+revision = "057"
+down_revision = "056"
 branch_labels = None
 depends_on = None
 
diff --git a/backend/app/alembic/versions/058_add_project_fk_to_job.py b/backend/app/alembic/versions/058_add_project_fk_to_job.py
deleted file mode 100644
index 2c2d622bd..000000000
--- a/backend/app/alembic/versions/058_add_project_fk_to_job.py
+++ /dev/null
@@ -1,51 +0,0 @@
-"""add project_id foreign key to job table
-
-Revision ID: 058
-Revises: 057
-Create Date: 2026-05-06 13:00:00.000000
-
-Migration 051 added job.project_id as a plain Integer with no foreign key
-constraint, leaving the column without referential integrity. This migration:
-
-  1. Backfills orphan rows: any job.project_id that doesn't match a real
-     project.id is set to NULL (the column is nullable). This preserves
-     historical job records whose project was deleted before the FK existed.
-     Switch the cleanup to a DELETE if you'd rather discard orphans
-     retroactively under CASCADE semantics.
-
-  2. Adds the foreign key constraint with ON DELETE CASCADE, matching the
-     pattern used by every other project_id FK in the schema.
-
-The supporting index (ix_job_project_id) is created by migration 055.
-"""
-
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "058"
-down_revision = "057"
-branch_labels = None
-depends_on = None
-
-
-def upgrade():
-    op.execute(
-        """
-        UPDATE job
-        SET project_id = NULL
-        WHERE project_id IS NOT NULL
-          AND project_id NOT IN (SELECT id FROM project)
-        """
-    )
-    op.create_foreign_key(
-        "job_project_id_fkey",
-        "job",
-        "project",
-        ["project_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-
-def downgrade():
-    op.drop_constraint("job_project_id_fkey", "job", type_="foreignkey")
diff --git a/backend/app/alembic/versions/056_drop_redundant_indexes.py b/backend/app/alembic/versions/058_drop_redundant_indexes.py
similarity index 94%
rename from backend/app/alembic/versions/056_drop_redundant_indexes.py
rename to backend/app/alembic/versions/058_drop_redundant_indexes.py
index fbe76be1a..19b0f1256 100644
--- a/backend/app/alembic/versions/056_drop_redundant_indexes.py
+++ b/backend/app/alembic/versions/058_drop_redundant_indexes.py
@@ -1,10 +1,10 @@
-"""drop redundant indexes superseded by 055 composites
+"""drop redundant indexes superseded by 057 composites
 
-Revision ID: 056
-Revises: 055
+Revision ID: 058
+Revises: 057
 Create Date: 2026-05-05 14:00:00.000000
 
-Drops indexes that are now redundant after migration 055 added the
+Drops indexes that are now redundant after migration 057 added the
 real composite/partial indexes that match actual query shapes:
 
   ix_project_name
@@ -51,8 +51,8 @@
 from alembic import op
 
 
-revision = "056"
-down_revision = "055"
+revision = "058"
+down_revision = "057"
 branch_labels = None
 depends_on = None
 
diff --git a/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py b/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py
deleted file mode 100644
index 5de7f2a9f..000000000
--- a/backend/app/alembic/versions/059_rename_created_at_to_inserted_at.py
+++ /dev/null
@@ -1,27 +0,0 @@
-"""rename created_at to inserted_at on job and llm_call
-
-Revision ID: 059
-Revises: 058
-Create Date: 2026-05-06 14:00:00.000000
-
-Aligns `job` and `llm_call` with the rest of the schema (51 other tables
-use `inserted_at`). Pure rename — no type or default change.
-"""
-
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "059"
-down_revision = "058"
-branch_labels = None
-depends_on = None
-
-
-def upgrade():
-    op.alter_column("job", "created_at", new_column_name="inserted_at")
-    op.alter_column("llm_call", "created_at", new_column_name="inserted_at")
-
-
-def downgrade():
-    op.alter_column("llm_call", "inserted_at", new_column_name="created_at")
-    op.alter_column("job", "inserted_at", new_column_name="created_at")
diff --git a/backend/app/alembic/versions/059_v1_assorted_cleanups.py b/backend/app/alembic/versions/059_v1_assorted_cleanups.py
new file mode 100644
index 000000000..d2965b243
--- /dev/null
+++ b/backend/app/alembic/versions/059_v1_assorted_cleanups.py
@@ -0,0 +1,104 @@
+"""v1.0 assorted schema cleanups
+
+Revision ID: 059
+Revises: 058
+Create Date: 2026-05-07 14:00:00.000000
+
+Bundles five small, mutually independent v1.0 cleanups in source order:
+
+  1. user_project: add `updated_at` column with NOW() server default for
+     backfill, then drop the default so future inserts use the model's
+     `default_factory=now`.
+
+  2. job: backfill orphan project_id rows to NULL, then add the missing
+     foreign key constraint with ON DELETE CASCADE. The supporting
+     ix_job_project_id index is created by migration 057.
+
+  3. job + llm_call: rename `created_at` → `inserted_at` to align with
+     the rest of the schema (every other table uses `inserted_at`).
+
+  4. documentcollection: dedupe any existing duplicate (document_id,
+     collection_id) pairs (keeps the lowest id), then add the missing
+     unique constraint.
+
+  5. global.languages: align id column type to INTEGER. Migration 043
+     originally created it as BIGINT, but every FK column referencing it
+     is INTEGER. The IDENTITY sequence stays BIGINT (PG doesn't change
+     it on ALTER COLUMN TYPE) — harmless at this scale.
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "059"
+down_revision = "058"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # 1. user_project.updated_at
+    op.add_column(
+        "user_project",
+        sa.Column(
+            "updated_at",
+            sa.DateTime(),
+            nullable=False,
+            server_default=sa.text("NOW()"),
+            comment="Timestamp when the mapping was last updated",
+        ),
+    )
+    op.alter_column("user_project", "updated_at", server_default=None)
+
+    # 2. job.project_id foreign key (with orphan backfill)
+    op.execute(
+        """
+        UPDATE job
+        SET project_id = NULL
+        WHERE project_id IS NOT NULL
+          AND project_id NOT IN (SELECT id FROM project)
+        """
+    )
+    op.create_foreign_key(
+        "job_project_id_fkey",
+        "job",
+        "project",
+        ["project_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    # 3. Rename created_at → inserted_at on job and llm_call
+    op.alter_column("job", "created_at", new_column_name="inserted_at")
+    op.alter_column("llm_call", "created_at", new_column_name="inserted_at")
+
+    # 4. documentcollection unique constraint (with dedupe)
+    op.execute(
+        """
+        DELETE FROM documentcollection
+        WHERE id NOT IN (
+            SELECT MIN(id)
+            FROM documentcollection
+            GROUP BY document_id, collection_id
+        )
+        """
+    )
+    op.create_unique_constraint(
+        "uq_document_collection",
+        "documentcollection",
+        ["document_id", "collection_id"],
+    )
+
+    # 5. Align global.languages.id to INTEGER
+    op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE INTEGER")
+
+
+def downgrade():
+    # Reverse order of upgrade()
+    op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE BIGINT")
+    op.drop_constraint("uq_document_collection", "documentcollection", type_="unique")
+    op.alter_column("llm_call", "inserted_at", new_column_name="created_at")
+    op.alter_column("job", "inserted_at", new_column_name="created_at")
+    op.drop_constraint("job_project_id_fkey", "job", type_="foreignkey")
+    op.drop_column("user_project", "updated_at")
diff --git a/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py b/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py
deleted file mode 100644
index 90c0d085b..000000000
--- a/backend/app/alembic/versions/060_add_unique_constraint_documentcollection.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""add unique constraint to documentcollection
-
-Revision ID: 060
-Revises: 059
-Create Date: 2026-05-07 12:00:00.000000
-
-The `documentcollection` junction table never had a uniqueness constraint
-on (document_id, collection_id), so the same document could be linked to
-the same collection multiple times. This migration:
-
-  1. Removes any existing duplicate rows, keeping the row with the lowest
-     `id` for each (document_id, collection_id) pair.
-  2. Adds the unique constraint going forward.
-"""
-
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "060"
-down_revision = "059"
-branch_labels = None
-depends_on = None
-
-
-def upgrade():
-    op.execute(
-        """
-        DELETE FROM documentcollection
-        WHERE id NOT IN (
-            SELECT MIN(id)
-            FROM documentcollection
-            GROUP BY document_id, collection_id
-        )
-        """
-    )
-    op.create_unique_constraint(
-        "uq_document_collection",
-        "documentcollection",
-        ["document_id", "collection_id"],
-    )
-
-
-def downgrade():
-    op.drop_constraint("uq_document_collection", "documentcollection", type_="unique")
diff --git a/backend/app/alembic/versions/061_align_languages_id_to_integer.py b/backend/app/alembic/versions/061_align_languages_id_to_integer.py
deleted file mode 100644
index 84dddd713..000000000
--- a/backend/app/alembic/versions/061_align_languages_id_to_integer.py
+++ /dev/null
@@ -1,34 +0,0 @@
-"""align global.languages.id to INTEGER
-
-Revision ID: 061
-Revises: 060
-Create Date: 2026-05-07 13:00:00.000000
-
-Migration 043 originally created `global.languages.id` as BIGINT, but every
-FK column referencing it (evaluation_dataset, evaluation_run, stt_sample) is
-INTEGER. Migration 043's source has been edited to use INTEGER for fresh
-setups; this migration aligns already-deployed databases.
-
-The underlying IDENTITY sequence stays BIGINT (PostgreSQL doesn't change it
-on ALTER COLUMN TYPE). This is harmless — values would have to exceed
-2^31 - 1 to cause an INSERT failure, and the table holds ~13 seeded rows.
-
-Languages table is small (≤100 rows in practice), so the AccessExclusiveLock
-taken by ALTER COLUMN TYPE is sub-second.
-"""
-
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "061"
-down_revision = "060"
-branch_labels = None
-depends_on = None
-
-
-def upgrade():
-    op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE INTEGER")
-
-
-def downgrade():
-    op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE BIGINT")

From fbc34525e46f693fce73706c073ea919d5987fc5 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Thu, 7 May 2026 11:52:16 +0530
Subject: [PATCH 6/8] migration cleanups

---
 ..._optimization.py => 058_v1_query_optimization.py} |  8 ++++----
 ...dant_indexes.py => 059_drop_redundant_indexes.py} | 12 ++++++------
 ...orted_cleanups.py => 060_v1_assorted_cleanups.py} | 10 +++++-----
 3 files changed, 15 insertions(+), 15 deletions(-)
 rename backend/app/alembic/versions/{057_v1_query_optimization.py => 058_v1_query_optimization.py} (99%)
 rename backend/app/alembic/versions/{058_drop_redundant_indexes.py => 059_drop_redundant_indexes.py} (94%)
 rename backend/app/alembic/versions/{059_v1_assorted_cleanups.py => 060_v1_assorted_cleanups.py} (96%)

diff --git a/backend/app/alembic/versions/057_v1_query_optimization.py b/backend/app/alembic/versions/058_v1_query_optimization.py
similarity index 99%
rename from backend/app/alembic/versions/057_v1_query_optimization.py
rename to backend/app/alembic/versions/058_v1_query_optimization.py
index cce91f3de..c951b30dc 100644
--- a/backend/app/alembic/versions/057_v1_query_optimization.py
+++ b/backend/app/alembic/versions/058_v1_query_optimization.py
@@ -1,7 +1,7 @@
 """v1.0 query optimization: project_id + composite indexes, drop is_deleted
 
-Revision ID: 057
-Revises: 056
+Revision ID: 058
+Revises: 057
 Create Date: 2026-05-05 12:00:00.000000
 
 Bundles three coordinated changes for v1.0 lock:
@@ -34,8 +34,8 @@
 from alembic import op
 
 
-revision = "057"
-down_revision = "056"
+revision = "058"
+down_revision = "057"
 branch_labels = None
 depends_on = None
 
diff --git a/backend/app/alembic/versions/058_drop_redundant_indexes.py b/backend/app/alembic/versions/059_drop_redundant_indexes.py
similarity index 94%
rename from backend/app/alembic/versions/058_drop_redundant_indexes.py
rename to backend/app/alembic/versions/059_drop_redundant_indexes.py
index 19b0f1256..84bd2653e 100644
--- a/backend/app/alembic/versions/058_drop_redundant_indexes.py
+++ b/backend/app/alembic/versions/059_drop_redundant_indexes.py
@@ -1,10 +1,10 @@
-"""drop redundant indexes superseded by 057 composites
+"""drop redundant indexes superseded by 058 composites
 
-Revision ID: 058
-Revises: 057
+Revision ID: 059
+Revises: 058
 Create Date: 2026-05-05 14:00:00.000000
 
-Drops indexes that are now redundant after migration 057 added the
+Drops indexes that are now redundant after migration 058 added the
 real composite/partial indexes that match actual query shapes:
 
   ix_project_name
@@ -51,8 +51,8 @@
 from alembic import op
 
 
-revision = "058"
-down_revision = "057"
+revision = "059"
+down_revision = "058"
 branch_labels = None
 depends_on = None
 
diff --git a/backend/app/alembic/versions/059_v1_assorted_cleanups.py b/backend/app/alembic/versions/060_v1_assorted_cleanups.py
similarity index 96%
rename from backend/app/alembic/versions/059_v1_assorted_cleanups.py
rename to backend/app/alembic/versions/060_v1_assorted_cleanups.py
index d2965b243..e55c0557b 100644
--- a/backend/app/alembic/versions/059_v1_assorted_cleanups.py
+++ b/backend/app/alembic/versions/060_v1_assorted_cleanups.py
@@ -1,7 +1,7 @@
 """v1.0 assorted schema cleanups
 
-Revision ID: 059
-Revises: 058
+Revision ID: 060
+Revises: 059
 Create Date: 2026-05-07 14:00:00.000000
 
 Bundles five small, mutually independent v1.0 cleanups in source order:
@@ -12,7 +12,7 @@
 
   2. job: backfill orphan project_id rows to NULL, then add the missing
      foreign key constraint with ON DELETE CASCADE. The supporting
-     ix_job_project_id index is created by migration 057.
+     ix_job_project_id index is created by migration 058.
 
   3. job + llm_call: rename `created_at` → `inserted_at` to align with
      the rest of the schema (every other table uses `inserted_at`).
@@ -31,8 +31,8 @@
 from alembic import op
 
 # revision identifiers, used by Alembic.
-revision = "059"
-down_revision = "058"
+revision = "060"
+down_revision = "059"
 branch_labels = None
 depends_on = None
 

From 67e2e1428fef7681e9ecbc79881dfbe881b72be0 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Thu, 7 May 2026 14:29:29 +0530
Subject: [PATCH 7/8] migration cleanups

---
 .../versions/058_v1_query_optimization.py     | 33 +++++++++++++------
 .../versions/060_v1_assorted_cleanups.py      | 18 +++++++++-
 2 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/backend/app/alembic/versions/058_v1_query_optimization.py b/backend/app/alembic/versions/058_v1_query_optimization.py
index c951b30dc..43a9ffd39 100644
--- a/backend/app/alembic/versions/058_v1_query_optimization.py
+++ b/backend/app/alembic/versions/058_v1_query_optimization.py
@@ -90,65 +90,78 @@
 ]
 
 
-# Composite + partial indexes (P1). (index_name, body_after_INDEX_NAME)
-COMPOSITE_INDEXES: list[tuple[str, str]] = [
+# Composite + partial indexes (P1). (index_name, body_after_INDEX_NAME, schema)
+# `schema` is the unquoted PG schema for downgrade DROP INDEX, or None for
+# the default (public) schema. The upgrade body already names the schema
+# inline in its ON clause; the field exists so downgrade doesn't have to
+# string-sniff it back out.
+COMPOSITE_INDEXES: list[tuple[str, str, str | None]] = [
     (
         "ix_document_project_inserted_at_active",
         'ON "document" ("project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL',
+        None,
     ),
     (
         "ix_openai_conversation_project_inserted_at_active",
         'ON "openai_conversation" ("project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL',
+        None,
     ),
     (
         "ix_openai_conversation_ancestor_project_inserted_at_active",
         'ON "openai_conversation" ("ancestor_response_id", "project_id", "inserted_at" DESC) WHERE "deleted_at" IS NULL',
+        None,
     ),
     (
         "ix_openai_conversation_response_project_active",
         'ON "openai_conversation" ("response_id", "project_id") WHERE "deleted_at" IS NULL',
+        None,
     ),
     (
         "ix_collection_jobs_project_status_inserted_at",
         'ON "collection_jobs" ("project_id", "status", "inserted_at" DESC)',
+        None,
     ),
     (
         "ix_evaluation_run_org_project_type_inserted_at",
         'ON "evaluation_run" ("organization_id", "project_id", "type", "inserted_at" DESC)',
+        None,
     ),
     (
         "ix_evaluation_dataset_org_project_type_inserted_at",
         'ON "evaluation_dataset" ("organization_id", "project_id", "type", "inserted_at" DESC)',
-    ),
-    (
-        "ix_llm_call_job_created_at_active",
-        'ON "llm_call" ("job_id", "created_at" DESC) WHERE "deleted_at" IS NULL',
+        None,
     ),
     (
         "ix_model_evaluation_document_project_updated_at",
         'ON "model_evaluation" ("document_id", "project_id", "updated_at" DESC) WHERE "deleted_at" IS NULL',
+        None,
     ),
     (
         "ix_model_config_active_provider_name",
         'ON "global"."model_config" ("is_active", "provider", "model_name")',
+        "global",
     ),
     (
         "ix_collection_project_active",
         'ON "collection" ("project_id") WHERE "deleted_at" IS NULL',
+        None,
     ),
     # Composite FK indexes that match the actual query shape
     (
         "ix_fine_tuning_document_project",
         'ON "fine_tuning" ("document_id", "project_id")',
+        None,
     ),
     (
         "ix_model_evaluation_fine_tuning_project",
         'ON "model_evaluation" ("fine_tuning_id", "project_id")',
+        None,
     ),
     # Partial index for active-key listing on apikey
     (
         "ix_apikey_project_active",
         'ON "apikey" ("project_id") WHERE "deleted_at" IS NULL',
+        None,
     ),
 ]
 
@@ -172,15 +185,15 @@ def upgrade():
                 f'CREATE INDEX CONCURRENTLY IF NOT EXISTS "{index}" '
                 f'ON "{table}" ("{column}")'
             )
-        for index, body in COMPOSITE_INDEXES:
+        for index, body, _schema in COMPOSITE_INDEXES:
             op.execute(f'CREATE INDEX CONCURRENTLY IF NOT EXISTS "{index}" {body}')
 
 
 def downgrade():
     with op.get_context().autocommit_block():
-        for index, body in COMPOSITE_INDEXES:
-            schema_qualified = '"global".' if '"global"."model_config"' in body else ""
-            op.execute(f'DROP INDEX CONCURRENTLY IF EXISTS {schema_qualified}"{index}"')
+        for index, _body, schema in COMPOSITE_INDEXES:
+            qualified = f'"{schema}"."{index}"' if schema else f'"{index}"'
+            op.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {qualified}")
         for _table, _column, index in FK_INDEXES:
             op.execute(f'DROP INDEX CONCURRENTLY IF EXISTS "{index}"')
 
diff --git a/backend/app/alembic/versions/060_v1_assorted_cleanups.py b/backend/app/alembic/versions/060_v1_assorted_cleanups.py
index e55c0557b..264d56ae3 100644
--- a/backend/app/alembic/versions/060_v1_assorted_cleanups.py
+++ b/backend/app/alembic/versions/060_v1_assorted_cleanups.py
@@ -16,6 +16,9 @@
 
   3. job + llm_call: rename `created_at` → `inserted_at` to align with
      the rest of the schema (every other table uses `inserted_at`).
+     Also creates the partial index ix_llm_call_job_inserted_at_active
+     here (rather than in migration 058) so the index name reflects the
+     post-rename column.
 
   4. documentcollection: dedupe any existing duplicate (document_id,
      collection_id) pairs (keeps the lowest id), then add the missing
@@ -69,9 +72,18 @@ def upgrade():
         ondelete="CASCADE",
     )
 
-    # 3. Rename created_at → inserted_at on job and llm_call
+    # 3. Rename created_at → inserted_at on job and llm_call, then create
+    #    the llm_call hot-path index using the new column name. Index
+    #    creation is CONCURRENTLY and must run outside a transaction.
     op.alter_column("job", "created_at", new_column_name="inserted_at")
     op.alter_column("llm_call", "created_at", new_column_name="inserted_at")
+    with op.get_context().autocommit_block():
+        op.execute(
+            "CREATE INDEX CONCURRENTLY IF NOT EXISTS "
+            '"ix_llm_call_job_inserted_at_active" '
+            'ON "llm_call" ("job_id", "inserted_at" DESC) '
+            'WHERE "deleted_at" IS NULL'
+        )
 
     # 4. documentcollection unique constraint (with dedupe)
     op.execute(
@@ -98,6 +110,10 @@ def downgrade():
     # Reverse order of upgrade()
     op.execute("ALTER TABLE global.languages ALTER COLUMN id SET DATA TYPE BIGINT")
     op.drop_constraint("uq_document_collection", "documentcollection", type_="unique")
+    with op.get_context().autocommit_block():
+        op.execute(
+            'DROP INDEX CONCURRENTLY IF EXISTS "ix_llm_call_job_inserted_at_active"'
+        )
     op.alter_column("llm_call", "inserted_at", new_column_name="created_at")
     op.alter_column("job", "inserted_at", new_column_name="created_at")
     op.drop_constraint("job_project_id_fkey", "job", type_="foreignkey")

From 914ff2c437f2428d5a50f970fef9ac8a013e5076 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Fri, 8 May 2026 16:49:49 +0530
Subject: [PATCH 8/8] review resolves

---
 ...reate_global_schema_and_languages_table.py |  2 +-
 .../versions/058_v1_query_optimization.py     | 13 ++++
 .../versions/060_v1_assorted_cleanups.py      | 62 ++++++++++++++++---
 backend/app/models/user.py                    | 17 +++++
 4 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py b/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py
index 6fee39066..f25807380 100644
--- a/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py
+++ b/backend/app/alembic/versions/043_create_global_schema_and_languages_table.py
@@ -25,7 +25,7 @@ def upgrade():
         "languages",
         sa.Column(
             "id",
-            sa.Integer(),
+            sa.BigInteger(),
             sa.Identity(always=False),
             primary_key=True,
             comment="Unique identifier for the language",
diff --git a/backend/app/alembic/versions/058_v1_query_optimization.py b/backend/app/alembic/versions/058_v1_query_optimization.py
index 43a9ffd39..1724c01bf 100644
--- a/backend/app/alembic/versions/058_v1_query_optimization.py
+++ b/backend/app/alembic/versions/058_v1_query_optimization.py
@@ -15,6 +15,9 @@
 
 2. Composite + partial indexes for hot list/pagination paths matching:
        WHERE project_id = ? [AND deleted_at IS NULL] ORDER BY <ts> DESC
+   Plus a small partial index `ix_evaluation_run_processing` for the
+   cron polling queries that filter by (type, status='processing')
+   without an organization_id predicate.
 
 3. Drop the redundant `is_deleted` boolean from every table that also
    carries `deleted_at`. `deleted_at IS NULL` becomes the single source
@@ -126,6 +129,16 @@
         'ON "evaluation_run" ("organization_id", "project_id", "type", "inserted_at" DESC)',
         None,
     ),
+    # Partial index for cron polling queries that filter by
+    # (type, status='processing') without an organization_id predicate
+    # (crud/evaluations/cron_utils.py and crud/evaluations/processing.py).
+    # The composite above leads with organization_id and does not serve
+    # these unscoped scans.
+    (
+        "ix_evaluation_run_processing",
+        'ON "evaluation_run" ("type", "batch_job_id") WHERE "status" = \'processing\'',
+        None,
+    ),
     (
         "ix_evaluation_dataset_org_project_type_inserted_at",
         'ON "evaluation_dataset" ("organization_id", "project_id", "type", "inserted_at" DESC)',
diff --git a/backend/app/alembic/versions/060_v1_assorted_cleanups.py b/backend/app/alembic/versions/060_v1_assorted_cleanups.py
index 264d56ae3..e851a02ed 100644
--- a/backend/app/alembic/versions/060_v1_assorted_cleanups.py
+++ b/backend/app/alembic/versions/060_v1_assorted_cleanups.py
@@ -6,9 +6,11 @@
 
 Bundles five small, mutually independent v1.0 cleanups in source order:
 
-  1. user_project: add `updated_at` column with NOW() server default for
-     backfill, then drop the default so future inserts use the model's
-     `default_factory=now`.
+  1. user_project + user: add timestamp columns missing from the model.
+     `user_project` gets `updated_at`; `user` gets both `inserted_at`
+     and `updated_at`. Each new column is added with NOW() server
+     default so existing rows are backfilled, then the default is
+     dropped so future inserts use the model's `default_factory=now`.
 
   2. job: backfill orphan project_id rows to NULL, then add the missing
      foreign key constraint with ON DELETE CASCADE. The supporting
@@ -22,7 +24,9 @@
 
   4. documentcollection: dedupe any existing duplicate (document_id,
      collection_id) pairs (keeps the lowest id), then add the missing
-     unique constraint.
+     unique constraint. The unique index is built CONCURRENTLY and then
+     attached via ADD CONSTRAINT ... USING INDEX so the index build does
+     not take AccessExclusiveLock on the table.
 
   5. global.languages: align id column type to INTEGER. Migration 043
      originally created it as BIGINT, but every FK column referencing it
@@ -41,7 +45,10 @@
 
 
 def upgrade():
-    # 1. user_project.updated_at
+    # 1. Backfill missing timestamp columns. Each column is created with
+    #    a NOW() server default so existing rows are populated atomically;
+    #    the default is then dropped so future inserts get their value
+    #    from the model's `default_factory=now`.
     op.add_column(
         "user_project",
         sa.Column(
@@ -54,6 +61,30 @@ def upgrade():
     )
     op.alter_column("user_project", "updated_at", server_default=None)
 
+    op.add_column(
+        "user",
+        sa.Column(
+            "inserted_at",
+            sa.DateTime(),
+            nullable=False,
+            server_default=sa.text("NOW()"),
+            comment="Timestamp when the user was created",
+        ),
+    )
+    op.alter_column("user", "inserted_at", server_default=None)
+
+    op.add_column(
+        "user",
+        sa.Column(
+            "updated_at",
+            sa.DateTime(),
+            nullable=False,
+            server_default=sa.text("NOW()"),
+            comment="Timestamp when the user was last updated",
+        ),
+    )
+    op.alter_column("user", "updated_at", server_default=None)
+
     # 2. job.project_id foreign key (with orphan backfill)
     op.execute(
         """
@@ -85,7 +116,10 @@ def upgrade():
             'WHERE "deleted_at" IS NULL'
         )
 
-    # 4. documentcollection unique constraint (with dedupe)
+    # 4. documentcollection unique constraint (with dedupe).
+    #    Build the underlying unique index CONCURRENTLY (no AccessExclusive
+    #    on the table during the scan/build), then attach it as a constraint
+    #    via ADD CONSTRAINT ... USING INDEX (catalog-only, brief lock).
     op.execute(
         """
         DELETE FROM documentcollection
@@ -96,10 +130,16 @@ def upgrade():
         )
         """
     )
-    op.create_unique_constraint(
-        "uq_document_collection",
-        "documentcollection",
-        ["document_id", "collection_id"],
+    with op.get_context().autocommit_block():
+        op.execute(
+            "CREATE UNIQUE INDEX CONCURRENTLY IF NOT EXISTS "
+            '"uq_document_collection" '
+            'ON "documentcollection" ("document_id", "collection_id")'
+        )
+    op.execute(
+        'ALTER TABLE "documentcollection" '
+        'ADD CONSTRAINT "uq_document_collection" '
+        'UNIQUE USING INDEX "uq_document_collection"'
     )
 
     # 5. Align global.languages.id to INTEGER
@@ -117,4 +157,6 @@ def downgrade():
     op.alter_column("llm_call", "inserted_at", new_column_name="created_at")
     op.alter_column("job", "inserted_at", new_column_name="created_at")
     op.drop_constraint("job_project_id_fkey", "job", type_="foreignkey")
+    op.drop_column("user", "updated_at")
+    op.drop_column("user", "inserted_at")
     op.drop_column("user_project", "updated_at")
diff --git a/backend/app/models/user.py b/backend/app/models/user.py
index 9596fb0e4..924a5c414 100644
--- a/backend/app/models/user.py
+++ b/backend/app/models/user.py
@@ -1,6 +1,10 @@
+from datetime import datetime
+
 from pydantic import EmailStr
 from sqlmodel import Field, SQLModel
 
+from app.core.util import now
+
 
 # Shared properties
 class UserBase(SQLModel):
@@ -73,6 +77,19 @@ class User(UserBase, table=True):
     hashed_password: str = Field(
         sa_column_kwargs={"comment": "Bcrypt hash of the user's password"},
     )
+    inserted_at: datetime = Field(
+        default_factory=now,
+        nullable=False,
+        sa_column_kwargs={"comment": "Timestamp when the user was created"},
+    )
+    updated_at: datetime = Field(
+        default_factory=now,
+        nullable=False,
+        sa_column_kwargs={
+            "comment": "Timestamp when the user was last updated",
+            "onupdate": now,
+        },
+    )
 
 
 # Properties to return via API, id is always required