From 4a1bf150ee820bae700dfb83e7623dbf03de6c62 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Tue, 9 Dec 2025 15:11:29 +0530 Subject: [PATCH 01/13] added database comments --- backend/app/alembic/versions/6d3cfa2cf283_.py | 149 ++++++++++++++++++ backend/app/models/credentials.py | 63 ++++++-- 2 files changed, 203 insertions(+), 9 deletions(-) create mode 100644 backend/app/alembic/versions/6d3cfa2cf283_.py diff --git a/backend/app/alembic/versions/6d3cfa2cf283_.py b/backend/app/alembic/versions/6d3cfa2cf283_.py new file mode 100644 index 000000000..fa3a3116a --- /dev/null +++ b/backend/app/alembic/versions/6d3cfa2cf283_.py @@ -0,0 +1,149 @@ +"""empty message + +Revision ID: 6d3cfa2cf283 +Revises: eed36ae3c79a +Create Date: 2025-12-09 15:11:09.763758 + +""" +from alembic import op +import sqlalchemy as sa +import sqlmodel.sql.sqltypes +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "6d3cfa2cf283" +down_revision = "eed36ae3c79a" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "credential", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "credential", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "credential", + "is_active", + existing_type=sa.BOOLEAN(), + comment="Flag indicating if this credential is currently active and usable", + existing_nullable=False, + ) + op.alter_column( + "credential", + "id", + existing_type=sa.INTEGER(), + comment="Unique ID for the credential", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "credential", + "provider", + existing_type=sa.VARCHAR(), + comment="Provider name like 'openai', 'gemini'", + existing_nullable=False, + ) + op.alter_column( + "credential", + "credential", + existing_type=sa.VARCHAR(), + comment="Encrypted JSON string containing provider-specific API credentials", + existing_nullable=False, + ) + op.alter_column( + "credential", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the credential was created", + existing_nullable=False, + ) + op.alter_column( + "credential", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the credential was last updated", + existing_nullable=False, + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "credential", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the credential was last updated", + existing_nullable=False, + ) + op.alter_column( + "credential", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the credential was created", + existing_nullable=False, + ) + op.alter_column( + "credential", + "credential", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Encrypted JSON string containing provider-specific API credentials", + existing_nullable=False, + ) + op.alter_column( + "credential", + "provider", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Provider name like 'openai', 'gemini'", + existing_nullable=False, + ) + op.alter_column( + "credential", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique ID for the credential", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "credential", + "is_active", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Flag indicating if this credential is currently active and usable", + existing_nullable=False, + ) + op.alter_column( + "credential", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "credential", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + # ### end Alembic commands ### diff --git a/backend/app/models/credentials.py b/backend/app/models/credentials.py index 03230b7b9..621905429 100644 --- a/backend/app/models/credentials.py +++ b/backend/app/models/credentials.py @@ -8,12 +8,30 @@ class CredsBase(SQLModel): organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + sa_column=sa.Column( + sa.Integer, + sa.ForeignKey("organization.id", ondelete="CASCADE"), + nullable=False, + comment="Reference to the organization", + ) ) project_id: int = Field( - default=None, foreign_key="project.id", nullable=False, ondelete="CASCADE" + sa_column=sa.Column( + sa.Integer, + sa.ForeignKey("project.id", ondelete="CASCADE"), + nullable=False, + comment="Reference to the project", + ) + ) + is_active: bool = Field( + default=True, + sa_column=sa.Column( + sa.Boolean, + default=True, + nullable=False, + comment="Flag indicating if this credential is currently active and usable", + ), ) - is_active: bool = True class CredsCreate(SQLModel): @@ -59,21 +77,48 @@ class Credential(CredsBase, table=True): ), ) - id: int = Field(default=None, primary_key=True) + id: int = Field( + default=None, + sa_column=sa.Column( + sa.Integer, + primary_key=True, + comment="Unique ID for the credential", + ), + ) provider: str = Field( - index=True, description="Provider name like 'openai', 'gemini'" + sa_column=sa.Column( + sa.String, + index=True, + nullable=False, + comment="Provider name like 'openai', 'gemini'", + ), + description="Provider name like 'openai', 'gemini'", ) credential: str = Field( - sa_column=sa.Column(sa.String, nullable=False), - description="Encrypted provider-specific credentials", + sa_column=sa.Column( + sa.String, + nullable=False, + comment="Encrypted JSON string containing provider-specific API credentials", + ), + description="Encrypted JSON string containing provider-specific API credentials", ) inserted_at: datetime = Field( default_factory=now, - sa_column=sa.Column(sa.DateTime, default=datetime.utcnow, nullable=False), + sa_column=sa.Column( + sa.DateTime, + default=datetime.utcnow, + nullable=False, + comment="Timestamp when the credential was created", + ), ) updated_at: datetime = Field( default_factory=now, - sa_column=sa.Column(sa.DateTime, onupdate=datetime.utcnow, nullable=False), + sa_column=sa.Column( + sa.DateTime, + onupdate=datetime.utcnow, + nullable=False, + comment="Timestamp when the credential was last updated", + ), ) organization: Optional["Organization"] = Relationship(back_populates="creds") From 0a5d088a9275d41ec1f848dff5f24619106f242f Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Wed, 10 Dec 2025 13:32:49 +0530 Subject: [PATCH 02/13] sticking to pydantic --- backend/app/models/credentials.py | 73 +++++++++++-------------------- 1 file changed, 26 insertions(+), 47 deletions(-) diff --git a/backend/app/models/credentials.py b/backend/app/models/credentials.py index 621905429..3e9d28928 100644 --- a/backend/app/models/credentials.py +++ b/backend/app/models/credentials.py @@ -8,29 +8,23 @@ class CredsBase(SQLModel): organization_id: int = Field( - sa_column=sa.Column( - sa.Integer, - sa.ForeignKey("organization.id", ondelete="CASCADE"), - nullable=False, - comment="Reference to the organization", - ) + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) project_id: int = Field( - sa_column=sa.Column( - sa.Integer, - sa.ForeignKey("project.id", ondelete="CASCADE"), - nullable=False, - comment="Reference to the project", - ) + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) is_active: bool = Field( default=True, - sa_column=sa.Column( - sa.Boolean, - default=True, - nullable=False, - comment="Flag indicating if this credential is currently active and usable", - ), + nullable=False, + sa_column_kwargs={ + "comment": "Flag indicating if this credential is currently active and usable" + }, ) @@ -77,48 +71,33 @@ class Credential(CredsBase, table=True): ), ) - id: int = Field( + id: int | None = Field( default=None, - sa_column=sa.Column( - sa.Integer, - primary_key=True, - comment="Unique ID for the credential", - ), + primary_key=True, + sa_column_kwargs={"comment": "Unique ID for the credential"}, ) provider: str = Field( - sa_column=sa.Column( - sa.String, - index=True, - nullable=False, - comment="Provider name like 'openai', 'gemini'", - ), + index=True, + nullable=False, description="Provider name like 'openai', 'gemini'", + sa_column_kwargs={"comment": "Provider name like 'openai', 'gemini'"}, ) credential: str = Field( - sa_column=sa.Column( - sa.String, - nullable=False, - comment="Encrypted JSON string containing provider-specific API credentials", - ), + nullable=False, description="Encrypted JSON string containing provider-specific API credentials", + sa_column_kwargs={ + "comment": "Encrypted JSON string containing provider-specific API credentials" + }, ) inserted_at: datetime = Field( default_factory=now, - sa_column=sa.Column( - sa.DateTime, - default=datetime.utcnow, - nullable=False, - comment="Timestamp when the credential was created", - ), + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the credential was created"}, ) updated_at: datetime = Field( default_factory=now, - sa_column=sa.Column( - sa.DateTime, - onupdate=datetime.utcnow, - nullable=False, - comment="Timestamp when the credential was last updated", - ), + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the credential was last updated"}, ) organization: Optional["Organization"] = Relationship(back_populates="creds") From 1499d704a18458a9b4bff91c931c83c7e8f5cfae Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Wed, 10 Dec 2025 13:34:41 +0530 Subject: [PATCH 03/13] updating migration --- .../{6d3cfa2cf283_.py => 8d3c3e8ce7b8_add_db_comments.py} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename backend/app/alembic/versions/{6d3cfa2cf283_.py => 8d3c3e8ce7b8_add_db_comments.py} (97%) diff --git a/backend/app/alembic/versions/6d3cfa2cf283_.py b/backend/app/alembic/versions/8d3c3e8ce7b8_add_db_comments.py similarity index 97% rename from backend/app/alembic/versions/6d3cfa2cf283_.py rename to backend/app/alembic/versions/8d3c3e8ce7b8_add_db_comments.py index fa3a3116a..a38446787 100644 --- a/backend/app/alembic/versions/6d3cfa2cf283_.py +++ b/backend/app/alembic/versions/8d3c3e8ce7b8_add_db_comments.py @@ -1,8 +1,8 @@ -"""empty message +"""add_db_comments -Revision ID: 6d3cfa2cf283 +Revision ID: 8d3c3e8ce7b8 Revises: eed36ae3c79a -Create Date: 2025-12-09 15:11:09.763758 +Create Date: 2025-12-10 13:33:44.172685 """ from alembic import op @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "6d3cfa2cf283" +revision = "8d3c3e8ce7b8" down_revision = "eed36ae3c79a" branch_labels = None depends_on = None From 50795530687253b777eb07bd5eebd8a27bcc051a Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 11 Dec 2025 13:10:56 +0530 Subject: [PATCH 04/13] first stab at comments --- backend/app/models/api_key.py | 73 +++++++++--- backend/app/models/assistants.py | 85 +++++++++++-- backend/app/models/batch_job.py | 48 ++++++-- backend/app/models/collection.py | 50 ++++++-- backend/app/models/collection_job.py | 49 ++++++-- backend/app/models/credentials.py | 23 ++-- backend/app/models/doc_transformation_job.py | 55 +++++++-- backend/app/models/document.py | 34 ++++-- backend/app/models/document_collection.py | 12 +- backend/app/models/evaluation.py | 118 ++++++++++++++++--- backend/app/models/fine_tuning.py | 114 +++++++++++++----- backend/app/models/job.py | 39 ++++-- backend/app/models/model_evaluation.py | 107 ++++++++++++----- backend/app/models/openai_conversation.py | 94 +++++++++++++-- backend/app/models/organization.py | 32 ++++- backend/app/models/project.py | 48 ++++++-- backend/app/models/threads.py | 50 ++++++-- backend/app/models/user.py | 39 ++++-- 18 files changed, 852 insertions(+), 218 deletions(-) diff --git a/backend/app/models/api_key.py b/backend/app/models/api_key.py index 1da563828..d06ad1fbf 100644 --- a/backend/app/models/api_key.py +++ b/backend/app/models/api_key.py @@ -1,21 +1,32 @@ -from uuid import UUID, uuid4 -import secrets -import base64 from datetime import datetime -from typing import Optional, List -from sqlmodel import SQLModel, Field, Relationship +from uuid import UUID, uuid4 + +from sqlmodel import Field, SQLModel from app.core.util import now class APIKeyBase(SQLModel): organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, + ) + user_id: int = Field( + foreign_key="user.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={ + "comment": "Reference to the user for whom the API key was created" + }, ) - user_id: int = Field(foreign_key="user.id", nullable=False, ondelete="CASCADE") class APIKeyPublic(APIKeyBase): @@ -32,14 +43,42 @@ class APIKeyCreateResponse(APIKeyPublic): class APIKey(APIKeyBase, table=True): - id: UUID = Field(default_factory=uuid4, primary_key=True) + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the API key"}, + ) key_prefix: str = Field( - unique=True, index=True, nullable=False - ) # Unique identifier from the key - key_hash: str = Field(nullable=False) # bcrypt hash of the secret portion - - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) - is_deleted: bool = Field(default=False, nullable=False) - deleted_at: Optional[datetime] = Field(default=None, nullable=True) + unique=True, + index=True, + nullable=False, + sa_column_kwargs={ + "comment": "Unique prefix portion of the API key for identification" + }, + ) + key_hash: str = Field( + nullable=False, + sa_column_kwargs={"comment": "Bcrypt hash of the secret of the API key"}, + ) + + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the API key was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the API key was last updated"}, + ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the API key was deleted"}, + ) diff --git a/backend/app/models/assistants.py b/backend/app/models/assistants.py index 7e7070686..6d40fa896 100644 --- a/backend/app/models/assistants.py +++ b/backend/app/models/assistants.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import List, Optional +from typing import TYPE_CHECKING from sqlalchemy import Column, String, Text from sqlalchemy.dialects.postgresql import ARRAY @@ -7,6 +7,10 @@ from app.core.util import now +if TYPE_CHECKING: + from .organization import Organization + from .project import Project + class AssistantBase(SQLModel): __table_args__ = ( @@ -17,7 +21,7 @@ class AssistantBase(SQLModel): name: str instructions: str = Field(sa_column=Column(Text, nullable=False)) model: str - vector_store_ids: List[str] = Field( + vector_store_ids: list[str] = Field( default_factory=list, sa_column=Column(ARRAY(String)) ) temperature: float = 0.1 @@ -31,13 +35,80 @@ class AssistantBase(SQLModel): class Assistant(AssistantBase, table=True): + """OpenAI assistant configuration and metadata.""" + __tablename__ = "openai_assistant" - id: int = Field(default=None, primary_key=True) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) - is_deleted: bool = Field(default=False, nullable=False) - deleted_at: Optional[datetime] = Field(default=None, nullable=True) + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the assistant"}, + ) + assistant_id: str = Field( + index=True, + sa_column_kwargs={"comment": "Unique identifier for the assistant at OpenAI"}, + ) + name: str = Field( + sa_column_kwargs={"comment": "Name of the assistant"}, + ) + instructions: str = Field( + sa_column=Column( + Text, nullable=False, comment="System instructions for the assistant" + ) + ) + model: str = Field( + sa_column_kwargs={"comment": "OpenAI model used by the assistant"}, + ) + vector_store_ids: list[str] = Field( + default_factory=list, + sa_column=Column( + ARRAY(String), comment="List of OpenAI vector store IDs attached" + ), + ) + temperature: float = Field( + default=0.1, + sa_column_kwargs={ + "comment": "Parameter that controls the creativity or randomness of the text generated by model" + }, + ) + max_num_results: int = Field( + default=20, + sa_column_kwargs={ + "comment": "Parameter that controls maximum number of results to return" + }, + ) + project_id: int = Field( + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, + ) + organization_id: int = Field( + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, + ) + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the assistant was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the assistant was last updated"}, + ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the assistant was deleted"}, + ) # Relationships project: "Project" = Relationship(back_populates="assistants") diff --git a/backend/app/models/batch_job.py b/backend/app/models/batch_job.py index 68b79762a..d34c040fd 100644 --- a/backend/app/models/batch_job.py +++ b/backend/app/models/batch_job.py @@ -13,7 +13,7 @@ class BatchJob(SQLModel, table=True): - """Batch job table for tracking async LLM batch operations.""" + """Database model for BatchJob operations.""" __tablename__ = "batch_job" __table_args__ = ( @@ -21,26 +21,37 @@ class BatchJob(SQLModel, table=True): Index("idx_batch_job_status_project", "provider_status", "project_id"), ) - id: int | None = Field(default=None, primary_key=True) + id: int | None = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the batch job"}, + ) # Provider and job type provider: str = Field( description="LLM provider name (e.g., 'openai', 'anthropic')", + sa_column_kwargs={"comment": "LLM provider name (e.g., openai, anthropic)"}, ) job_type: str = Field( index=True, description=( "Type of batch job (e.g., 'evaluation', 'classification', 'embedding')" ), + sa_column_kwargs={ + "comment": "Type of batch job (e.g., evaluation, classification, embedding)" + }, ) # Batch configuration - stores all provider-specific config config: dict[str, Any] = Field( default_factory=dict, - sa_column=Column(JSONB, nullable=False), + sa_column=Column( + JSONB, + nullable=False, + comment="Complete batch configuration including model, temperature, instructions, tools, etc.", + ), description=( - "Complete batch configuration including model, temperature, " - "instructions, tools, etc." + "Complete batch configuration including model, temperature, instructions, tools, etc." ), ) @@ -48,14 +59,17 @@ class BatchJob(SQLModel, table=True): provider_batch_id: str | None = Field( default=None, description="Provider's batch job ID (e.g., OpenAI batch_id)", + sa_column_kwargs={"comment": "Provider's batch job ID (e.g., OpenAI batch_id)"}, ) provider_file_id: str | None = Field( default=None, description="Provider's input file ID", + sa_column_kwargs={"comment": "Provider's input file ID"}, ) provider_output_file_id: str | None = Field( default=None, description="Provider's output file ID", + sa_column_kwargs={"comment": "Provider's output file ID"}, ) # Provider status tracking @@ -65,40 +79,56 @@ class BatchJob(SQLModel, table=True): "Provider-specific status (e.g., OpenAI: validating, in_progress, " "finalizing, completed, failed, expired, cancelling, cancelled)" ), + sa_column_kwargs={ + "comment": "Provider-specific status (e.g., validating, in_progress, completed, failed)" + }, ) # Raw results (before parent-specific processing) raw_output_url: str | None = Field( default=None, description="S3 URL of raw batch output file", + sa_column_kwargs={"comment": "S3 URL of raw batch output file"}, ) total_items: int = Field( default=0, description="Total number of items in the batch", + sa_column_kwargs={"comment": "Total number of items in the batch"}, ) # Error handling error_message: str | None = Field( default=None, - sa_column=Column(Text, nullable=True), + sa_column=Column(Text, nullable=True, comment="Error message if batch failed"), description="Error message if batch failed", ) # Foreign keys organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE", index=True + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + index=True, + sa_column_kwargs={"comment": "Reference to the organization"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE", index=True + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + index=True, + sa_column_kwargs={"comment": "Reference to the project"}, ) # Timestamps inserted_at: datetime = Field( - default_factory=now, description="The timestamp when the batch job was started" + default_factory=now, + description="The timestamp when the batch job was started", + sa_column_kwargs={"comment": "Timestamp when the batch job was started"}, ) updated_at: datetime = Field( default_factory=now, description="The timestamp when the batch job was last updated", + sa_column_kwargs={"comment": "Timestamp when the batch job was last updated"}, ) # Relationships diff --git a/backend/app/models/collection.py b/backend/app/models/collection.py index e09f56226..e063d0f9b 100644 --- a/backend/app/models/collection.py +++ b/backend/app/models/collection.py @@ -1,37 +1,63 @@ -from uuid import UUID, uuid4 from datetime import datetime -from typing import Any, Optional +from typing import Any +from uuid import UUID, uuid4 -from sqlmodel import Field, Relationship, SQLModel from pydantic import HttpUrl, model_validator +from sqlmodel import Field, Relationship, SQLModel from app.core.util import now from app.models.document import DocumentPublic + from .organization import Organization from .project import Project class Collection(SQLModel, table=True): - id: UUID = Field(default_factory=uuid4, primary_key=True) + """Database model for Collection operations.""" + + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the collection"}, + ) organization_id: int = Field( foreign_key="organization.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) project_id: int = Field( foreign_key="project.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) - llm_service_id: str = Field(nullable=False) - llm_service_name: str = Field(nullable=False) + llm_service_id: str = Field( + nullable=False, + sa_column_kwargs={ + "comment": "External LLM service identifier (e.g., OpenAI vector store ID)" + }, + ) + llm_service_name: str = Field( + nullable=False, + sa_column_kwargs={"comment": "Name of the LLM service provider"}, + ) - inserted_at: datetime = Field(default_factory=now) - updated_at: datetime = Field(default_factory=now) - deleted_at: Optional[datetime] = None + inserted_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the collection was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the collection was last updated"}, + ) + deleted_at: datetime | None = Field( + default=None, + sa_column_kwargs={"comment": "Timestamp when the collection was deleted"}, + ) organization: Organization = Relationship(back_populates="collections") project: Project = Relationship(back_populates="collections") @@ -59,7 +85,7 @@ class AssistantOptions(SQLModel): # Fields to be passed along to OpenAI. They must be a subset of # parameters accepted by the OpenAI.clien.beta.assistants.create # API. - model: Optional[str] = Field( + model: str | None = Field( default=None, description=( "**[To Be Deprecated]** " @@ -69,7 +95,7 @@ class AssistantOptions(SQLModel): ), ) - instructions: Optional[str] = Field( + instructions: str | None = Field( default=None, description=( "**[To Be Deprecated]** " @@ -112,7 +138,7 @@ def norm(x: Any) -> Any: class CallbackRequest(SQLModel): - callback_url: Optional[HttpUrl] = Field( + callback_url: HttpUrl | None = Field( default=None, description="URL to call to report endpoint status", ) diff --git a/backend/app/models/collection_job.py b/backend/app/models/collection_job.py index 4739b16c0..f81d68d04 100644 --- a/backend/app/models/collection_job.py +++ b/backend/app/models/collection_job.py @@ -1,12 +1,11 @@ +from datetime import datetime from enum import Enum from uuid import UUID, uuid4 -from datetime import datetime -from sqlmodel import Field, SQLModel, Column, Text -from pydantic import ConfigDict +from sqlmodel import Column, Field, SQLModel, Text from app.core.util import now -from app.models.collection import CollectionPublic, CollectionIDPublic +from app.models.collection import CollectionIDPublic, CollectionPublic class CollectionJobStatus(str, Enum): @@ -22,41 +21,67 @@ class CollectionActionType(str, Enum): class CollectionJob(SQLModel, table=True): - """Database model for tracking collection operations.""" + """Database model for CollectionJob operations.""" __tablename__ = "collection_jobs" - id: UUID = Field(default_factory=uuid4, primary_key=True) + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the collection job"}, + ) status: CollectionJobStatus = Field( default=CollectionJobStatus.PENDING, nullable=False, description="Current job status", + sa_column_kwargs={ + "comment": "Current job status (PENDING, PROCESSING, SUCCESSFUL, FAILED)" + }, ) action_type: CollectionActionType = Field( - nullable=False, description="Type of operation" + nullable=False, + description="Type of operation", + sa_column_kwargs={"comment": "Type of operation (CREATE, DELETE)"}, ) collection_id: UUID | None = Field( - foreign_key="collection.id", nullable=True, ondelete="CASCADE" + foreign_key="collection.id", + nullable=True, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the collection"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, + ) + task_id: str = Field( + nullable=True, + sa_column_kwargs={"comment": "Celery task ID for async processing"}, ) - task_id: str = Field(nullable=True) trace_id: str | None = Field( - default=None, description="Tracing ID for correlating logs and traces." + default=None, + description="Tracing ID for correlating logs and traces.", + sa_column_kwargs={"comment": "Tracing ID for correlating logs and traces"}, ) - error_message: str | None = Field(sa_column=Column(Text, nullable=True)) + error_message: str | None = Field( + sa_column=Column( + Text, nullable=True, comment="Error message if the job failed" + ), + ) inserted_at: datetime = Field( default_factory=now, nullable=False, description="When the job record was created", + sa_column_kwargs={"comment": "Timestamp when the job was created"}, ) updated_at: datetime = Field( default_factory=now, nullable=False, description="Last time the job record was updated", + sa_column_kwargs={"comment": "Timestamp when the job was last updated"}, ) @property diff --git a/backend/app/models/credentials.py b/backend/app/models/credentials.py index 3e9d28928..150797c5e 100644 --- a/backend/app/models/credentials.py +++ b/backend/app/models/credentials.py @@ -1,12 +1,19 @@ -from typing import Dict, Any, Optional +from datetime import datetime +from typing import TYPE_CHECKING, Any + import sqlalchemy as sa from sqlmodel import Field, Relationship, SQLModel -from datetime import datetime from app.core.util import now +if TYPE_CHECKING: + from .organization import Organization + from .project import Project + class CredsBase(SQLModel): + """Database model for CredsBase operations.""" + organization_id: int = Field( foreign_key="organization.id", nullable=False, @@ -35,7 +42,7 @@ class CredsCreate(SQLModel): """ is_active: bool = True - credential: Dict[str, Any] = Field( + credential: dict[str, Any] = Field( default=None, description="Dictionary mapping provider names to their credentials", ) @@ -49,10 +56,10 @@ class CredsUpdate(SQLModel): provider: str = Field( description="Name of the provider to update/add credentials for" ) - credential: Dict[str, Any] = Field( + credential: dict[str, Any] = Field( description="Credentials for the specified provider", ) - is_active: Optional[bool] = Field( + is_active: bool | None = Field( default=None, description="Whether the credentials are active" ) @@ -100,8 +107,8 @@ class Credential(CredsBase, table=True): sa_column_kwargs={"comment": "Timestamp when the credential was last updated"}, ) - organization: Optional["Organization"] = Relationship(back_populates="creds") - project: Optional["Project"] = Relationship(back_populates="creds") + organization: "Organization | None" = Relationship(back_populates="creds") + project: "Project | None" = Relationship(back_populates="creds") def to_public(self) -> "CredsPublic": """Convert the database model to a public model with decrypted credentials.""" @@ -126,6 +133,6 @@ class CredsPublic(CredsBase): id: int provider: str - credential: Optional[Dict[str, Any]] = None + credential: dict[str, Any] | None = None inserted_at: datetime updated_at: datetime diff --git a/backend/app/models/doc_transformation_job.py b/backend/app/models/doc_transformation_job.py index 139825ee0..a80745373 100644 --- a/backend/app/models/doc_transformation_job.py +++ b/backend/app/models/doc_transformation_job.py @@ -1,9 +1,9 @@ import enum -from uuid import UUID, uuid4 from datetime import datetime +from uuid import UUID, uuid4 -from sqlmodel import SQLModel, Field from pydantic import ConfigDict +from sqlmodel import Field, SQLModel from app.core.util import now @@ -16,21 +16,54 @@ class TransformationStatus(str, enum.Enum): class DocTransformationJob(SQLModel, table=True): + """Database model for DocTransformationJob operations.""" + __tablename__ = "doc_transformation_job" - id: UUID = Field(default_factory=uuid4, primary_key=True) - source_document_id: UUID = Field(foreign_key="document.id") + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the transformation job"}, + ) + source_document_id: UUID = Field( + foreign_key="document.id", + sa_column_kwargs={ + "comment": "Reference to the source document being transformed" + }, + ) transformed_document_id: UUID | None = Field( - default=None, foreign_key="document.id" + default=None, + foreign_key="document.id", + sa_column_kwargs={"comment": "Reference to the resulting transformed document"}, + ) + status: TransformationStatus = Field( + default=TransformationStatus.PENDING, + sa_column_kwargs={ + "comment": "Current status (PENDING, PROCESSING, COMPLETED, FAILED)" + }, + ) + task_id: str | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Celery task ID for async processing"}, ) - status: TransformationStatus = Field(default=TransformationStatus.PENDING) - task_id: str | None = Field(default=None, nullable=True) trace_id: str | None = Field( - default=None, description="Tracing ID for correlating logs and traces." + default=None, + description="Tracing ID for correlating logs and traces.", + sa_column_kwargs={"comment": "Tracing ID for correlating logs and traces"}, + ) + error_message: str | None = Field( + default=None, + sa_column_kwargs={"comment": "Error message if transformation failed"}, + ) + inserted_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the job was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the job was last updated"}, ) - error_message: str | None = Field(default=None) - inserted_at: datetime = Field(default_factory=now) - updated_at: datetime = Field(default_factory=now) @property def job_id(self) -> UUID: diff --git a/backend/app/models/document.py b/backend/app/models/document.py index 60d281423..126f64e31 100644 --- a/backend/app/models/document.py +++ b/backend/app/models/document.py @@ -1,6 +1,5 @@ -from uuid import UUID, uuid4 from datetime import datetime -from typing import Optional +from uuid import UUID, uuid4 from sqlmodel import Field, SQLModel @@ -14,8 +13,12 @@ class DocumentBase(SQLModel): foreign_key="project.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, + ) + fname: str = Field( + description="The original filename of the document", + sa_column_kwargs={"comment": "Original filename of the document"}, ) - fname: str = Field(description="The original filename of the document") class Document(DocumentBase, table=True): @@ -23,21 +26,36 @@ class Document(DocumentBase, table=True): default_factory=uuid4, primary_key=True, description="The unique identifier of the document", + sa_column_kwargs={"comment": "Unique identifier for the document"}, + ) + object_store_url: str = Field( + sa_column_kwargs={"comment": "Cloud storage URL for the document"}, ) - object_store_url: str inserted_at: datetime = Field( - default_factory=now, description="The timestamp when the document was inserted" + default_factory=now, + description="The timestamp when the document was inserted", + sa_column_kwargs={"comment": "Timestamp when the document was uploaded"}, ) updated_at: datetime = Field( default_factory=now, description="The timestamp when the document was last updated", + sa_column_kwargs={"comment": "Timestamp when the document was last updated"}, ) - is_deleted: bool = Field(default=False) - deleted_at: datetime | None - source_document_id: Optional[UUID] = Field( + is_deleted: bool = Field( + default=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + deleted_at: datetime | None = Field( + default=None, + sa_column_kwargs={"comment": "Timestamp when the document was deleted"}, + ) + source_document_id: UUID | None = Field( default=None, foreign_key="document.id", nullable=True, + sa_column_kwargs={ + "comment": "Reference to source document if this is a transformation" + }, ) diff --git a/backend/app/models/document_collection.py b/backend/app/models/document_collection.py index 0e43259e0..93db6df31 100644 --- a/backend/app/models/document_collection.py +++ b/backend/app/models/document_collection.py @@ -1,23 +1,27 @@ from uuid import UUID -from typing import Optional from sqlmodel import Field, SQLModel -from app.core.util import now - class DocumentCollection(SQLModel, table=True): - id: Optional[int] = Field( + """Junction table linking documents to collections.""" + + id: int | None = Field( default=None, primary_key=True, + sa_column_kwargs={ + "comment": "Unique identifier for the document-collection link" + }, ) document_id: UUID = Field( foreign_key="document.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the document"}, ) collection_id: UUID = Field( foreign_key="collection.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the collection"}, ) diff --git a/backend/app/models/evaluation.py b/backend/app/models/evaluation.py index d86db8929..cac6a4582 100644 --- a/backend/app/models/evaluation.py +++ b/backend/app/models/evaluation.py @@ -83,18 +83,32 @@ class EvaluationDataset(SQLModel, table=True): ), ) - id: int = SQLField(default=None, primary_key=True) + id: int = SQLField( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the dataset"}, + ) # Dataset information - name: str = SQLField(index=True, description="Name of the dataset") + name: str = SQLField( + index=True, + description="Name of the dataset", + sa_column_kwargs={"comment": "Name of the evaluation dataset"}, + ) description: str | None = SQLField( - default=None, description="Optional description of the dataset" + default=None, + description="Optional description of the dataset", + sa_column_kwargs={"comment": "Description of the dataset"}, ) # Dataset metadata stored as JSONB dataset_metadata: dict[str, Any] = SQLField( default_factory=dict, - sa_column=Column(JSONB, nullable=False), + sa_column=Column( + JSONB, + nullable=False, + comment="Dataset metadata (item counts, duplication factor, etc.)", + ), description=( "Dataset metadata (original_items_count, total_items_count, " "duplication_factor)" @@ -103,23 +117,47 @@ class EvaluationDataset(SQLModel, table=True): # Storage references object_store_url: str | None = SQLField( - default=None, description="Object store URL where CSV is stored" + default=None, + description="Object store URL where CSV is stored", + sa_column_kwargs={"comment": "S3 URL where the dataset CSV is stored"}, ) langfuse_dataset_id: str | None = SQLField( - default=None, description="Langfuse dataset ID for reference" + default=None, + description="Langfuse dataset ID for reference", + sa_column_kwargs={ + "comment": "Langfuse dataset ID for observability integration" + }, ) # Foreign keys organization_id: int = SQLField( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) project_id: int = SQLField( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) # Timestamps - inserted_at: datetime = SQLField(default_factory=now, nullable=False) - updated_at: datetime = SQLField(default_factory=now, nullable=False) + inserted_at: datetime = SQLField( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the evaluation dataset was created" + }, + ) + updated_at: datetime = SQLField( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the evaluation dataset was last updated" + }, + ) # Relationships project: "Project" = Relationship() @@ -138,16 +176,31 @@ class EvaluationRun(SQLModel, table=True): Index("idx_eval_run_status_project", "status", "project_id"), ) - id: int = SQLField(default=None, primary_key=True) + id: int = SQLField( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the evaluation run"}, + ) # Input fields (provided by user) - run_name: str = SQLField(index=True, description="Name of the evaluation run") - dataset_name: str = SQLField(description="Name of the Langfuse dataset") + run_name: str = SQLField( + index=True, + description="Name of the evaluation run", + sa_column_kwargs={"comment": "Name of the evaluation run"}, + ) + dataset_name: str = SQLField( + description="Name of the Langfuse dataset", + sa_column_kwargs={"comment": "Name of the Langfuse dataset used"}, + ) # Config field - dict requires sa_column config: dict[str, Any] = SQLField( default_factory=dict, - sa_column=Column(JSONB, nullable=False), + sa_column=Column( + JSONB, + nullable=False, + comment="Evaluation configuration (model, instructions, etc.)", + ), description="Evaluation configuration", ) @@ -157,6 +210,7 @@ class EvaluationRun(SQLModel, table=True): nullable=False, ondelete="CASCADE", description="Reference to the evaluation_dataset used for this run", + sa_column_kwargs={"comment": "Reference to the evaluation dataset"}, ) # Batch job references @@ -167,6 +221,7 @@ class EvaluationRun(SQLModel, table=True): description=( "Reference to the batch_job that processes this evaluation (responses)" ), + sa_column_kwargs={"comment": "Reference to the batch job for responses"}, ) embedding_batch_job_id: int | None = SQLField( default=None, @@ -174,51 +229,76 @@ class EvaluationRun(SQLModel, table=True): nullable=True, ondelete="SET NULL", description="Reference to the batch_job for embedding-based similarity scoring", + sa_column_kwargs={ + "comment": "Reference to the batch job for embedding similarity scoring" + }, ) # Output/Status fields (updated by system during processing) status: str = SQLField( default="pending", description="Overall evaluation status: pending, processing, completed, failed", + sa_column_kwargs={ + "comment": "Evaluation status (pending, processing, completed, failed)" + }, ) object_store_url: str | None = SQLField( default=None, description="Object store URL of processed evaluation results for future reference", + sa_column_kwargs={"comment": "S3 URL of processed evaluation results"}, ) total_items: int = SQLField( - default=0, description="Total number of items evaluated (set during processing)" + default=0, + description="Total number of items evaluated (set during processing)", + sa_column_kwargs={"comment": "Total number of items evaluated"}, ) # Score field - dict requires sa_column score: dict[str, Any] | None = SQLField( default=None, - sa_column=Column(JSONB, nullable=True), + sa_column=Column( + JSONB, + nullable=True, + comment="Evaluation scores (correctness, cosine_similarity, etc.)", + ), description="Evaluation scores (e.g., correctness, cosine_similarity, etc.)", ) # Error message field error_message: str | None = SQLField( default=None, - sa_column=Column(Text, nullable=True), + sa_column=Column( + Text, nullable=True, comment="Error message if evaluation failed" + ), description="Error message if failed", ) # Foreign keys organization_id: int = SQLField( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) project_id: int = SQLField( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) # Timestamps inserted_at: datetime = Field( default_factory=now, description="The timestamp when the evaluation run was started", + sa_column_kwargs={"comment": "Timestamp when the evaluation run was started"}, ) updated_at: datetime = Field( default_factory=now, description="The timestamp when the evaluation run was last updated", + sa_column_kwargs={ + "comment": "Timestamp when the evaluation run was last updated" + }, ) # Relationships diff --git a/backend/app/models/fine_tuning.py b/backend/app/models/fine_tuning.py index 4e326ee52..e2d3dfa3c 100644 --- a/backend/app/models/fine_tuning.py +++ b/backend/app/models/fine_tuning.py @@ -1,14 +1,18 @@ -from typing import Optional -from uuid import UUID -from enum import Enum from datetime import datetime +from enum import Enum +from typing import TYPE_CHECKING +from uuid import UUID -from sqlalchemy import Column, Text from pydantic import field_validator -from sqlmodel import SQLModel, Field, Relationship +from sqlalchemy import Column, Text +from sqlmodel import Field, Relationship, SQLModel from app.core.util import now +if TYPE_CHECKING: + from .model_evaluation import ModelEvaluation + from .project import Project + class FineTuningStatus(str, Enum): pending = "pending" @@ -22,7 +26,7 @@ class FineTuningJobBase(SQLModel): base_model: str = Field(nullable=False, description="Base model for fine-tuning") split_ratio: float = Field(nullable=False) document_id: UUID = Field(foreign_key="document.id", nullable=False) - training_file_id: Optional[str] = Field(default=None) + training_file_id: str | None = Field(default=None) system_prompt: str = Field(sa_column=Column(Text, nullable=False)) @@ -55,50 +59,102 @@ def check_prompt(cls, v): class Fine_Tuning(FineTuningJobBase, table=True): """Database model for tracking fine-tuning jobs.""" - id: int = Field(primary_key=True) + id: int = Field( + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the fine-tuning job"}, + ) + base_model: str = Field( + nullable=False, + sa_column_kwargs={"comment": "Base model used for fine-tuning"}, + ) + split_ratio: float = Field( + nullable=False, + sa_column_kwargs={"comment": "Train/test split ratio for the dataset"}, + ) + document_id: UUID = Field( + foreign_key="document.id", + nullable=False, + sa_column_kwargs={"comment": "Reference to the training document"}, + ) + training_file_id: str | None = Field( + default=None, + sa_column_kwargs={"comment": "OpenAI training file identifier"}, + ) + system_prompt: str = Field( + sa_column=Column( + Text, nullable=False, comment="System prompt used during fine-tuning" + ) + ) provider_job_id: str | None = Field( - default=None, description="Fine tuning Job ID returned by OpenAI" + default=None, + sa_column_kwargs={"comment": "Fine-tuning job ID returned by the provider"}, ) - status: FineTuningStatus = ( - Field(default=FineTuningStatus.pending, description="Fine tuning status"), + status: FineTuningStatus = Field( + default=FineTuningStatus.pending, + sa_column_kwargs={"comment": "Current status of the fine-tuning job"}, ) fine_tuned_model: str | None = Field( - default=None, description="Final fine tuned model name from OpenAI" + default=None, + sa_column_kwargs={"comment": "Name of the resulting fine-tuned model"}, ) train_data_s3_object: str | None = Field( - default=None, description="S3 URI of the training data stored ins S3" + default=None, + sa_column_kwargs={"comment": "S3 URI of the training data"}, ) test_data_s3_object: str | None = Field( - default=None, description="S3 URI of the testing data stored ins S3" + default=None, + sa_column_kwargs={"comment": "S3 URI of the testing data"}, ) error_message: str | None = Field( - default=None, description="error message for when something failed" + default=None, + sa_column_kwargs={"comment": "Error message if the job failed"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, + ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the job was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the job was last updated"}, + ) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the job was deleted"}, ) - is_deleted: bool = Field(default=False, nullable=False) - - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) - deleted_at: datetime | None = Field(default=None, nullable=True) project: "Project" = Relationship(back_populates="fine_tuning") model_evaluation: "ModelEvaluation" = Relationship(back_populates="fine_tuning") class FineTuningUpdate(SQLModel): - training_file_id: Optional[str] = None - train_data_s3_object: Optional[str] = None - test_data_s3_object: Optional[str] = None - split_ratio: Optional[float] = None - provider_job_id: Optional[str] = None - fine_tuned_model: Optional[str] = None - status: Optional[str] = None - error_message: Optional[str] = None + training_file_id: str | None = None + train_data_s3_object: str | None = None + test_data_s3_object: str | None = None + split_ratio: float | None = None + provider_job_id: str | None = None + fine_tuned_model: str | None = None + status: str | None = None + error_message: str | None = None class FineTuningJobPublic(SQLModel): diff --git a/backend/app/models/job.py b/backend/app/models/job.py index 62851f5fd..e31941816 100644 --- a/backend/app/models/job.py +++ b/backend/app/models/job.py @@ -1,8 +1,9 @@ from datetime import datetime from enum import Enum -from uuid import uuid4, UUID +from uuid import UUID, uuid4 + +from sqlmodel import Field, SQLModel -from sqlmodel import SQLModel, Field from app.core.util import now @@ -24,24 +25,44 @@ class Job(SQLModel, table=True): id: UUID = Field( default_factory=uuid4, primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the job"}, ) task_id: str | None = Field( - nullable=True, description="Celery task ID returned when job is queued." + nullable=True, + description="Celery task ID returned when job is queued.", + sa_column_kwargs={"comment": "Celery task ID returned when job is queued"}, ) trace_id: str | None = Field( - default=None, description="Tracing ID for correlating logs and traces." + default=None, + description="Tracing ID for correlating logs and traces.", + sa_column_kwargs={"comment": "Tracing ID for correlating logs and traces"}, ) error_message: str | None = Field( - default=None, description="Error details if the job fails." + default=None, + description="Error details if the job fails.", + sa_column_kwargs={"comment": "Error details if the job fails"}, ) status: JobStatus = Field( - default=JobStatus.PENDING, description="Current state of the job." + default=JobStatus.PENDING, + description="Current state of the job.", + sa_column_kwargs={ + "comment": "Current state of the job (PENDING, PROCESSING, SUCCESS, FAILED)" + }, ) job_type: JobType = Field( - description="Type of job being executed (e.g., response, ingestion)." + description="Type of job being executed (e.g., response, ingestion).", + sa_column_kwargs={ + "comment": "Type of job being executed (e.g., RESPONSE, LLM_API)" + }, + ) + created_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the job was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the job was last updated"}, ) - created_at: datetime = Field(default_factory=now) - updated_at: datetime = Field(default_factory=now) class JobUpdate(SQLModel): diff --git a/backend/app/models/model_evaluation.py b/backend/app/models/model_evaluation.py index 900b57b6c..b4c9f447d 100644 --- a/backend/app/models/model_evaluation.py +++ b/backend/app/models/model_evaluation.py @@ -1,15 +1,19 @@ -from typing import Optional -from uuid import UUID -from enum import Enum from datetime import datetime +from enum import Enum +from typing import TYPE_CHECKING +from uuid import UUID -from sqlmodel import SQLModel, Field, Relationship +from pydantic import field_validator from sqlalchemy import Column, Text from sqlalchemy.dialects.postgresql import JSON -from pydantic import field_validator +from sqlmodel import Field, Relationship, SQLModel from app.core.util import now +if TYPE_CHECKING: + from .fine_tuning import Fine_Tuning + from .project import Project + class ModelEvaluationStatus(str, Enum): pending = "pending" @@ -36,60 +40,103 @@ def dedupe_ids(cls, v: list[int]) -> list[int]: class ModelEvaluation(ModelEvaluationBase, table=True): - """Database model for keeping a record of model evaluation""" + """Database model for keeping a record of model evaluation.""" __tablename__ = "model_evaluation" - id: int = Field(primary_key=True) - + id: int = Field( + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the evaluation"}, + ) + fine_tuning_id: int = Field( + foreign_key="fine_tuning.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the fine-tuning job"}, + ) document_id: UUID = Field( foreign_key="document.id", nullable=False, + sa_column_kwargs={"comment": "Reference to the evaluation document"}, + ) + fine_tuned_model: str = Field( + sa_column_kwargs={"comment": "Name of the fine-tuned model being evaluated"}, ) - fine_tuned_model: str = Field(description="fine tuned model name from OpenAI") test_data_s3_object: str = Field( - description="S3 URI of the testing data stored in S3" + sa_column_kwargs={"comment": "S3 URI of the testing data"}, + ) + base_model: str = Field( + nullable=False, + sa_column_kwargs={"comment": "Base model used for fine-tuning"}, ) - base_model: str = Field(nullable=False, description="Base model for fine-tuning") split_ratio: float = Field( - nullable=False, description="the ratio the dataset was divided in" + nullable=False, + sa_column_kwargs={"comment": "Train/test split ratio used"}, + ) + system_prompt: str = Field( + sa_column=Column( + Text, nullable=False, comment="System prompt used during evaluation" + ) ) - system_prompt: str = Field(sa_column=Column(Text, nullable=False)) score: dict[str, float] | None = Field( - sa_column=Column(JSON, nullable=True), - description="Evaluation scores per metric (e.g., {'mcc': 0.85})", + sa_column=Column( + JSON, nullable=True, comment="Evaluation scores per metric (e.g., MCC)" + ), ) prediction_data_s3_object: str | None = Field( default=None, - description="S3 URL where the prediction data generated by the fine-tuned model is stored", + sa_column_kwargs={"comment": "S3 URL where the prediction data is stored"}, ) - status: ModelEvaluationStatus = ( - Field(default=ModelEvaluationStatus.pending, description="Evaluation status"), + status: ModelEvaluationStatus = Field( + default=ModelEvaluationStatus.pending, + sa_column_kwargs={"comment": "Current status of the evaluation"}, ) error_message: str | None = Field( - default=None, description="error message for when something failed" + default=None, + sa_column_kwargs={"comment": "Error message if evaluation failed"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, + ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the evaluation was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the evaluation was last updated"}, + ) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the evaluation was deleted"}, ) - is_deleted: bool = Field(default=False, nullable=False) - - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) - deleted_at: datetime | None = Field(default=None, nullable=True) project: "Project" = Relationship() fine_tuning: "Fine_Tuning" = Relationship(back_populates="model_evaluation") class ModelEvaluationUpdate(SQLModel): - score: Optional[dict[str, float]] = None - status: Optional[ModelEvaluationStatus] = None - error_message: Optional[str] = None - prediction_data_s3_object: Optional[str] = None + score: dict[str, float] | None = None + status: ModelEvaluationStatus | None = None + error_message: str | None = None + prediction_data_s3_object: str | None = None class ModelEvaluationPublic(ModelEvaluationBase): diff --git a/backend/app/models/openai_conversation.py b/backend/app/models/openai_conversation.py index 6003c7205..2bc63f836 100644 --- a/backend/app/models/openai_conversation.py +++ b/backend/app/models/openai_conversation.py @@ -1,12 +1,16 @@ import re - from datetime import datetime -from typing import Optional +from typing import TYPE_CHECKING + from pydantic import field_validator from sqlmodel import Field, Relationship, SQLModel from app.core.util import now +if TYPE_CHECKING: + from .organization import Organization + from .project import Project + def validate_response_id_pattern(v: str) -> str: """Shared validation function for response ID patterns""" @@ -26,17 +30,17 @@ class OpenAIConversationBase(SQLModel): index=True, description="Ancestor response ID for conversation threading", ) - previous_response_id: Optional[str] = Field( + previous_response_id: str | None = Field( default=None, index=True, description="Previous response ID in the conversation" ) user_question: str = Field(description="User's question/input") - response: Optional[str] = Field(default=None, description="AI response") + response: str | None = Field(default=None, description="AI response") # there are models with small name like o1 and usually fine tuned models have long names model: str = Field( description="The model used for the response", min_length=1, max_length=150 ) # usually follow the pattern of asst_WD9bumYqTtpSvxxxxx - assistant_id: Optional[str] = Field( + assistant_id: str | None = Field( default=None, description="The assistant ID used", min_length=10, @@ -56,13 +60,77 @@ def validate_response_ids(cls, v): class OpenAIConversation(OpenAIConversationBase, table=True): + """Stores OpenAI conversation history and responses.""" + __tablename__ = "openai_conversation" - id: int = Field(default=None, primary_key=True) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) - is_deleted: bool = Field(default=False, nullable=False) - deleted_at: Optional[datetime] = Field(default=None, nullable=True) + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the conversation record"}, + ) + response_id: str = Field( + index=True, + min_length=10, + sa_column_kwargs={"comment": "OpenAI response identifier"}, + ) + ancestor_response_id: str = Field( + index=True, + sa_column_kwargs={"comment": "Root response ID for conversation threading"}, + ) + previous_response_id: str | None = Field( + default=None, + index=True, + sa_column_kwargs={"comment": "Previous response ID in the conversation chain"}, + ) + user_question: str = Field( + sa_column_kwargs={"comment": "User's question or input text"}, + ) + response: str | None = Field( + default=None, + sa_column_kwargs={"comment": "Response generated by OpenAI"}, + ) + model: str = Field( + sa_column_kwargs={"comment": "Model used to generate the response"}, + ) + assistant_id: str | None = Field( + default=None, + sa_column_kwargs={"comment": "OpenAI assistant identifier if used"}, + ) + project_id: int = Field( + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, + ) + organization_id: int = Field( + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, + ) + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the conversation was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the conversation was last updated" + }, + ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the conversation was deleted"}, + ) # Relationships project: "Project" = Relationship(back_populates="openai_conversations") @@ -75,17 +143,17 @@ class OpenAIConversationCreate(SQLModel): ancestor_response_id: str = Field( description="Ancestor response ID for conversation threading" ) - previous_response_id: Optional[str] = Field( + previous_response_id: str | None = Field( default=None, description="Previous response ID in the conversation" ) user_question: str = Field(description="User's question/input", min_length=1) - response: Optional[str] = Field(default=None, description="AI response") + response: str | None = Field(default=None, description="AI response") # there are models with small name like o1 and usually fine tuned models have long names model: str = Field( description="The model used for the response", min_length=1, max_length=150 ) # usually follow the pattern of asst_WD9bumYqTtpSvxxxxx - assistant_id: Optional[str] = Field( + assistant_id: str | None = Field( default=None, description="The assistant ID used", min_length=10, diff --git a/backend/app/models/organization.py b/backend/app/models/organization.py index db660891a..83bef9f54 100644 --- a/backend/app/models/organization.py +++ b/backend/app/models/organization.py @@ -15,8 +15,16 @@ # Shared properties for an Organization class OrganizationBase(SQLModel): - name: str = Field(unique=True, index=True, max_length=255) - is_active: bool = True + name: str = Field( + unique=True, + index=True, + max_length=255, + sa_column_kwargs={"comment": "Organization name (unique identifier)"}, + ) + is_active: bool = Field( + default=True, + sa_column_kwargs={"comment": "Flag indicating if the organization is active"}, + ) # Properties to receive via API on creation @@ -32,9 +40,23 @@ class OrganizationUpdate(SQLModel): # Database model for Organization class Organization(OrganizationBase, table=True): - id: int = Field(default=None, primary_key=True) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the organization"}, + ) + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the organization was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the organization was last updated" + }, + ) # Relationship back to Creds creds: list["Credential"] = Relationship( diff --git a/backend/app/models/project.py b/backend/app/models/project.py index c0d8a87ac..6357970e2 100644 --- a/backend/app/models/project.py +++ b/backend/app/models/project.py @@ -17,9 +17,20 @@ # Shared properties for a Project class ProjectBase(SQLModel): - name: str = Field(index=True, max_length=255) - description: str | None = Field(default=None, max_length=500) - is_active: bool = True + name: str = Field( + index=True, + max_length=255, + sa_column_kwargs={"comment": "Project name"}, + ) + description: str | None = Field( + default=None, + max_length=500, + sa_column_kwargs={"comment": "Project description"}, + ) + is_active: bool = Field( + default=True, + sa_column_kwargs={"comment": "Flag indicating if the project is active"}, + ) # Properties to receive via API on creation @@ -40,13 +51,34 @@ class Project(ProjectBase, table=True): UniqueConstraint("name", "organization_id", name="uq_project_name_org_id"), ) - id: int = Field(default=None, primary_key=True) + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the project"}, + ) organization_id: int = Field( - foreign_key="organization.id", index=True, nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + index=True, + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, + ) + storage_path: UUID = Field( + default_factory=uuid4, + nullable=False, + unique=True, + sa_column_kwargs={"comment": "Unique UUID used for cloud storage path"}, + ) + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the project was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the project was last updated"}, ) - storage_path: UUID = Field(default_factory=uuid4, nullable=False, unique=True) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) creds: list["Credential"] = Relationship( back_populates="project", cascade_delete=True diff --git a/backend/app/models/threads.py b/backend/app/models/threads.py index e353c6760..5394cc73a 100644 --- a/backend/app/models/threads.py +++ b/backend/app/models/threads.py @@ -1,14 +1,16 @@ -from sqlmodel import SQLModel, Field -from typing import Optional from datetime import datetime +from sqlmodel import Field, SQLModel + +from app.core.util import now + class OpenAIThreadBase(SQLModel): thread_id: str = Field(index=True, unique=True) prompt: str - response: Optional[str] = None - status: Optional[str] = None - error: Optional[str] = None + response: str | None = None + status: str | None = None + error: str | None = None class OpenAIThreadCreate(OpenAIThreadBase): @@ -16,6 +18,38 @@ class OpenAIThreadCreate(OpenAIThreadBase): class OpenAI_Thread(OpenAIThreadBase, table=True): - id: int = Field(default=None, primary_key=True) - inserted_at: datetime = Field(default_factory=datetime.utcnow) - updated_at: datetime = Field(default_factory=datetime.utcnow) + """Stores OpenAI thread interactions and their responses.""" + + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the thread record"}, + ) + thread_id: str = Field( + index=True, + unique=True, + sa_column_kwargs={"comment": "OpenAI thread identifier"}, + ) + prompt: str = Field( + sa_column_kwargs={"comment": "User prompt sent to the thread"}, + ) + response: str | None = Field( + default=None, + sa_column_kwargs={"comment": "Response received from OpenAI"}, + ) + status: str | None = Field( + default=None, + sa_column_kwargs={"comment": "Current status of the thread interaction"}, + ) + error: str | None = Field( + default=None, + sa_column_kwargs={"comment": "Error message if the interaction failed"}, + ) + inserted_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the record was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the record was last updated"}, + ) diff --git a/backend/app/models/user.py b/backend/app/models/user.py index 82a982624..1fbac459b 100644 --- a/backend/app/models/user.py +++ b/backend/app/models/user.py @@ -1,15 +1,30 @@ -import uuid - from pydantic import EmailStr -from sqlmodel import Field, Relationship, SQLModel +from sqlmodel import Field, SQLModel # Shared properties class UserBase(SQLModel): - email: EmailStr = Field(unique=True, index=True, max_length=255) - is_active: bool = True - is_superuser: bool = False - full_name: str | None = Field(default=None, max_length=255) + email: EmailStr = Field( + unique=True, + index=True, + max_length=255, + sa_column_kwargs={"comment": "User's email address (unique identifier)"}, + ) + is_active: bool = Field( + default=True, + sa_column_kwargs={"comment": "Flag indicating if the user account is active"}, + ) + is_superuser: bool = Field( + default=False, + sa_column_kwargs={ + "comment": "Flag indicating if user has superuser privileges" + }, + ) + full_name: str | None = Field( + default=None, + max_length=255, + sa_column_kwargs={"comment": "User's full name"}, + ) # Properties to receive via API on creation @@ -46,8 +61,14 @@ class UpdatePassword(SQLModel): # Database model, database table inferred from class name class User(UserBase, table=True): - id: int = Field(default=None, primary_key=True) - hashed_password: str + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the user"}, + ) + hashed_password: str = Field( + sa_column_kwargs={"comment": "Bcrypt hash of the user's password"}, + ) class UserOrganization(UserBase): From c75c5fe2b92f5cb85d062e949f86c86d33665285 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 11 Dec 2025 13:32:57 +0530 Subject: [PATCH 05/13] cleanups --- backend/app/models/api_key.py | 17 +++++--- backend/app/models/assistants.py | 14 ++++--- backend/app/models/collection.py | 25 ++++++------ backend/app/models/collection_job.py | 30 +++++++------- backend/app/models/credentials.py | 21 ++++++---- backend/app/models/doc_transformation_job.py | 26 +++++++----- backend/app/models/document.py | 42 ++++++++++++-------- backend/app/models/fine_tuning.py | 25 +++++++----- backend/app/models/job.py | 4 ++ backend/app/models/model_evaluation.py | 37 +++++++++-------- backend/app/models/openai_conversation.py | 14 ++++--- backend/app/models/organization.py | 8 +++- backend/app/models/project.py | 21 +++++++--- backend/app/models/threads.py | 2 + backend/app/models/user.py | 4 ++ 15 files changed, 180 insertions(+), 110 deletions(-) diff --git a/backend/app/models/api_key.py b/backend/app/models/api_key.py index d06ad1fbf..516073f2d 100644 --- a/backend/app/models/api_key.py +++ b/backend/app/models/api_key.py @@ -7,6 +7,9 @@ class APIKeyBase(SQLModel): + """Base model for API keys with foreign key fields.""" + + # Foreign keys organization_id: int = Field( foreign_key="organization.id", nullable=False, @@ -43,12 +46,13 @@ class APIKeyCreateResponse(APIKeyPublic): class APIKey(APIKeyBase, table=True): + """Database model for API keys.""" + id: UUID = Field( default_factory=uuid4, primary_key=True, sa_column_kwargs={"comment": "Unique identifier for the API key"}, ) - key_prefix: str = Field( unique=True, index=True, @@ -61,7 +65,13 @@ class APIKey(APIKeyBase, table=True): nullable=False, sa_column_kwargs={"comment": "Bcrypt hash of the secret of the API key"}, ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + # Timestamps inserted_at: datetime = Field( default_factory=now, nullable=False, @@ -72,11 +82,6 @@ class APIKey(APIKeyBase, table=True): nullable=False, sa_column_kwargs={"comment": "Timestamp when the API key was last updated"}, ) - is_deleted: bool = Field( - default=False, - nullable=False, - sa_column_kwargs={"comment": "Soft delete flag"}, - ) deleted_at: datetime | None = Field( default=None, nullable=True, diff --git a/backend/app/models/assistants.py b/backend/app/models/assistants.py index 6d40fa896..42d7c99a6 100644 --- a/backend/app/models/assistants.py +++ b/backend/app/models/assistants.py @@ -77,6 +77,13 @@ class Assistant(AssistantBase, table=True): "comment": "Parameter that controls maximum number of results to return" }, ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + + # Foreign keys project_id: int = Field( foreign_key="project.id", nullable=False, @@ -89,6 +96,8 @@ class Assistant(AssistantBase, table=True): ondelete="CASCADE", sa_column_kwargs={"comment": "Reference to the organization"}, ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, nullable=False, @@ -99,11 +108,6 @@ class Assistant(AssistantBase, table=True): nullable=False, sa_column_kwargs={"comment": "Timestamp when the assistant was last updated"}, ) - is_deleted: bool = Field( - default=False, - nullable=False, - sa_column_kwargs={"comment": "Soft delete flag"}, - ) deleted_at: datetime | None = Field( default=None, nullable=True, diff --git a/backend/app/models/collection.py b/backend/app/models/collection.py index e063d0f9b..353deef00 100644 --- a/backend/app/models/collection.py +++ b/backend/app/models/collection.py @@ -20,14 +20,24 @@ class Collection(SQLModel, table=True): primary_key=True, sa_column_kwargs={"comment": "Unique identifier for the collection"}, ) + llm_service_id: str = Field( + nullable=False, + sa_column_kwargs={ + "comment": "External LLM service identifier (e.g., OpenAI vector store ID)" + }, + ) + llm_service_name: str = Field( + nullable=False, + sa_column_kwargs={"comment": "Name of the LLM service provider"}, + ) + # Foreign keys organization_id: int = Field( foreign_key="organization.id", nullable=False, ondelete="CASCADE", sa_column_kwargs={"comment": "Reference to the organization"}, ) - project_id: int = Field( foreign_key="project.id", nullable=False, @@ -35,17 +45,7 @@ class Collection(SQLModel, table=True): sa_column_kwargs={"comment": "Reference to the project"}, ) - llm_service_id: str = Field( - nullable=False, - sa_column_kwargs={ - "comment": "External LLM service identifier (e.g., OpenAI vector store ID)" - }, - ) - llm_service_name: str = Field( - nullable=False, - sa_column_kwargs={"comment": "Name of the LLM service provider"}, - ) - + # Timestamps inserted_at: datetime = Field( default_factory=now, sa_column_kwargs={"comment": "Timestamp when the collection was created"}, @@ -59,6 +59,7 @@ class Collection(SQLModel, table=True): sa_column_kwargs={"comment": "Timestamp when the collection was deleted"}, ) + # Relationships organization: Organization = Relationship(back_populates="collections") project: Project = Relationship(back_populates="collections") diff --git a/backend/app/models/collection_job.py b/backend/app/models/collection_job.py index f81d68d04..d60817a7f 100644 --- a/backend/app/models/collection_job.py +++ b/backend/app/models/collection_job.py @@ -43,18 +43,6 @@ class CollectionJob(SQLModel, table=True): description="Type of operation", sa_column_kwargs={"comment": "Type of operation (CREATE, DELETE)"}, ) - collection_id: UUID | None = Field( - foreign_key="collection.id", - nullable=True, - ondelete="CASCADE", - sa_column_kwargs={"comment": "Reference to the collection"}, - ) - project_id: int = Field( - foreign_key="project.id", - nullable=False, - ondelete="CASCADE", - sa_column_kwargs={"comment": "Reference to the project"}, - ) task_id: str = Field( nullable=True, sa_column_kwargs={"comment": "Celery task ID for async processing"}, @@ -64,19 +52,33 @@ class CollectionJob(SQLModel, table=True): description="Tracing ID for correlating logs and traces.", sa_column_kwargs={"comment": "Tracing ID for correlating logs and traces"}, ) - error_message: str | None = Field( sa_column=Column( Text, nullable=True, comment="Error message if the job failed" ), ) + + # Foreign keys + collection_id: UUID | None = Field( + foreign_key="collection.id", + nullable=True, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the collection"}, + ) + project_id: int = Field( + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, + ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, nullable=False, description="When the job record was created", sa_column_kwargs={"comment": "Timestamp when the job was created"}, ) - updated_at: datetime = Field( default_factory=now, nullable=False, diff --git a/backend/app/models/credentials.py b/backend/app/models/credentials.py index 150797c5e..31c85303a 100644 --- a/backend/app/models/credentials.py +++ b/backend/app/models/credentials.py @@ -12,8 +12,17 @@ class CredsBase(SQLModel): - """Database model for CredsBase operations.""" + """Base model for credentials with foreign keys and common fields.""" + is_active: bool = Field( + default=True, + nullable=False, + sa_column_kwargs={ + "comment": "Flag indicating if this credential is currently active and usable" + }, + ) + + # Foreign keys organization_id: int = Field( foreign_key="organization.id", nullable=False, @@ -26,13 +35,6 @@ class CredsBase(SQLModel): ondelete="CASCADE", sa_column_kwargs={"comment": "Reference to the project"}, ) - is_active: bool = Field( - default=True, - nullable=False, - sa_column_kwargs={ - "comment": "Flag indicating if this credential is currently active and usable" - }, - ) class CredsCreate(SQLModel): @@ -96,6 +98,8 @@ class Credential(CredsBase, table=True): "comment": "Encrypted JSON string containing provider-specific API credentials" }, ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, nullable=False, @@ -107,6 +111,7 @@ class Credential(CredsBase, table=True): sa_column_kwargs={"comment": "Timestamp when the credential was last updated"}, ) + # Relationships organization: "Organization | None" = Relationship(back_populates="creds") project: "Project | None" = Relationship(back_populates="creds") diff --git a/backend/app/models/doc_transformation_job.py b/backend/app/models/doc_transformation_job.py index a80745373..e91d14eaa 100644 --- a/backend/app/models/doc_transformation_job.py +++ b/backend/app/models/doc_transformation_job.py @@ -25,17 +25,6 @@ class DocTransformationJob(SQLModel, table=True): primary_key=True, sa_column_kwargs={"comment": "Unique identifier for the transformation job"}, ) - source_document_id: UUID = Field( - foreign_key="document.id", - sa_column_kwargs={ - "comment": "Reference to the source document being transformed" - }, - ) - transformed_document_id: UUID | None = Field( - default=None, - foreign_key="document.id", - sa_column_kwargs={"comment": "Reference to the resulting transformed document"}, - ) status: TransformationStatus = Field( default=TransformationStatus.PENDING, sa_column_kwargs={ @@ -56,6 +45,21 @@ class DocTransformationJob(SQLModel, table=True): default=None, sa_column_kwargs={"comment": "Error message if transformation failed"}, ) + + # Foreign keys + source_document_id: UUID = Field( + foreign_key="document.id", + sa_column_kwargs={ + "comment": "Reference to the source document being transformed" + }, + ) + transformed_document_id: UUID | None = Field( + default=None, + foreign_key="document.id", + sa_column_kwargs={"comment": "Reference to the resulting transformed document"}, + ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, sa_column_kwargs={"comment": "Timestamp when the job was created"}, diff --git a/backend/app/models/document.py b/backend/app/models/document.py index 126f64e31..bffa7b39c 100644 --- a/backend/app/models/document.py +++ b/backend/app/models/document.py @@ -8,6 +8,14 @@ class DocumentBase(SQLModel): + """Base model for documents with common fields.""" + + fname: str = Field( + description="The original filename of the document", + sa_column_kwargs={"comment": "Original filename of the document"}, + ) + + # Foreign keys project_id: int = Field( description="The ID of the project to which the document belongs", foreign_key="project.id", @@ -15,13 +23,11 @@ class DocumentBase(SQLModel): ondelete="CASCADE", sa_column_kwargs={"comment": "Reference to the project"}, ) - fname: str = Field( - description="The original filename of the document", - sa_column_kwargs={"comment": "Original filename of the document"}, - ) class Document(DocumentBase, table=True): + """Database model for documents.""" + id: UUID = Field( default_factory=uuid4, primary_key=True, @@ -31,6 +37,22 @@ class Document(DocumentBase, table=True): object_store_url: str = Field( sa_column_kwargs={"comment": "Cloud storage URL for the document"}, ) + is_deleted: bool = Field( + default=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + + # Foreign keys + source_document_id: UUID | None = Field( + default=None, + foreign_key="document.id", + nullable=True, + sa_column_kwargs={ + "comment": "Reference to source document if this is a transformation" + }, + ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, description="The timestamp when the document was inserted", @@ -41,22 +63,10 @@ class Document(DocumentBase, table=True): description="The timestamp when the document was last updated", sa_column_kwargs={"comment": "Timestamp when the document was last updated"}, ) - is_deleted: bool = Field( - default=False, - sa_column_kwargs={"comment": "Soft delete flag"}, - ) deleted_at: datetime | None = Field( default=None, sa_column_kwargs={"comment": "Timestamp when the document was deleted"}, ) - source_document_id: UUID | None = Field( - default=None, - foreign_key="document.id", - nullable=True, - sa_column_kwargs={ - "comment": "Reference to source document if this is a transformation" - }, - ) class DocumentPublic(DocumentBase): diff --git a/backend/app/models/fine_tuning.py b/backend/app/models/fine_tuning.py index e2d3dfa3c..5cad32378 100644 --- a/backend/app/models/fine_tuning.py +++ b/backend/app/models/fine_tuning.py @@ -71,11 +71,6 @@ class Fine_Tuning(FineTuningJobBase, table=True): nullable=False, sa_column_kwargs={"comment": "Train/test split ratio for the dataset"}, ) - document_id: UUID = Field( - foreign_key="document.id", - nullable=False, - sa_column_kwargs={"comment": "Reference to the training document"}, - ) training_file_id: str | None = Field( default=None, sa_column_kwargs={"comment": "OpenAI training file identifier"}, @@ -109,6 +104,18 @@ class Fine_Tuning(FineTuningJobBase, table=True): default=None, sa_column_kwargs={"comment": "Error message if the job failed"}, ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + + # Foreign keys + document_id: UUID = Field( + foreign_key="document.id", + nullable=False, + sa_column_kwargs={"comment": "Reference to the training document"}, + ) project_id: int = Field( foreign_key="project.id", nullable=False, @@ -121,11 +128,8 @@ class Fine_Tuning(FineTuningJobBase, table=True): ondelete="CASCADE", sa_column_kwargs={"comment": "Reference to the organization"}, ) - is_deleted: bool = Field( - default=False, - nullable=False, - sa_column_kwargs={"comment": "Soft delete flag"}, - ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, nullable=False, @@ -142,6 +146,7 @@ class Fine_Tuning(FineTuningJobBase, table=True): sa_column_kwargs={"comment": "Timestamp when the job was deleted"}, ) + # Relationships project: "Project" = Relationship(back_populates="fine_tuning") model_evaluation: "ModelEvaluation" = Relationship(back_populates="fine_tuning") diff --git a/backend/app/models/job.py b/backend/app/models/job.py index e31941816..b6a1a5ae7 100644 --- a/backend/app/models/job.py +++ b/backend/app/models/job.py @@ -20,6 +20,8 @@ class JobType(str, Enum): class Job(SQLModel, table=True): + """Database model for tracking async jobs.""" + __tablename__ = "job" id: UUID = Field( @@ -55,6 +57,8 @@ class Job(SQLModel, table=True): "comment": "Type of job being executed (e.g., RESPONSE, LLM_API)" }, ) + + # Timestamps created_at: datetime = Field( default_factory=now, sa_column_kwargs={"comment": "Timestamp when the job was created"}, diff --git a/backend/app/models/model_evaluation.py b/backend/app/models/model_evaluation.py index b4c9f447d..72bbdaf0f 100644 --- a/backend/app/models/model_evaluation.py +++ b/backend/app/models/model_evaluation.py @@ -48,17 +48,6 @@ class ModelEvaluation(ModelEvaluationBase, table=True): primary_key=True, sa_column_kwargs={"comment": "Unique identifier for the evaluation"}, ) - fine_tuning_id: int = Field( - foreign_key="fine_tuning.id", - nullable=False, - ondelete="CASCADE", - sa_column_kwargs={"comment": "Reference to the fine-tuning job"}, - ) - document_id: UUID = Field( - foreign_key="document.id", - nullable=False, - sa_column_kwargs={"comment": "Reference to the evaluation document"}, - ) fine_tuned_model: str = Field( sa_column_kwargs={"comment": "Name of the fine-tuned model being evaluated"}, ) @@ -95,6 +84,24 @@ class ModelEvaluation(ModelEvaluationBase, table=True): default=None, sa_column_kwargs={"comment": "Error message if evaluation failed"}, ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + + # Foreign keys + fine_tuning_id: int = Field( + foreign_key="fine_tuning.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the fine-tuning job"}, + ) + document_id: UUID = Field( + foreign_key="document.id", + nullable=False, + sa_column_kwargs={"comment": "Reference to the evaluation document"}, + ) project_id: int = Field( foreign_key="project.id", nullable=False, @@ -107,11 +114,8 @@ class ModelEvaluation(ModelEvaluationBase, table=True): ondelete="CASCADE", sa_column_kwargs={"comment": "Reference to the organization"}, ) - is_deleted: bool = Field( - default=False, - nullable=False, - sa_column_kwargs={"comment": "Soft delete flag"}, - ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, nullable=False, @@ -128,6 +132,7 @@ class ModelEvaluation(ModelEvaluationBase, table=True): sa_column_kwargs={"comment": "Timestamp when the evaluation was deleted"}, ) + # Relationships project: "Project" = Relationship() fine_tuning: "Fine_Tuning" = Relationship(back_populates="model_evaluation") diff --git a/backend/app/models/openai_conversation.py b/backend/app/models/openai_conversation.py index 2bc63f836..7305aec4b 100644 --- a/backend/app/models/openai_conversation.py +++ b/backend/app/models/openai_conversation.py @@ -97,6 +97,13 @@ class OpenAIConversation(OpenAIConversationBase, table=True): default=None, sa_column_kwargs={"comment": "OpenAI assistant identifier if used"}, ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + + # Foreign keys project_id: int = Field( foreign_key="project.id", nullable=False, @@ -109,6 +116,8 @@ class OpenAIConversation(OpenAIConversationBase, table=True): ondelete="CASCADE", sa_column_kwargs={"comment": "Reference to the organization"}, ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, nullable=False, @@ -121,11 +130,6 @@ class OpenAIConversation(OpenAIConversationBase, table=True): "comment": "Timestamp when the conversation was last updated" }, ) - is_deleted: bool = Field( - default=False, - nullable=False, - sa_column_kwargs={"comment": "Soft delete flag"}, - ) deleted_at: datetime | None = Field( default=None, nullable=True, diff --git a/backend/app/models/organization.py b/backend/app/models/organization.py index 83bef9f54..0f936607e 100644 --- a/backend/app/models/organization.py +++ b/backend/app/models/organization.py @@ -15,6 +15,8 @@ # Shared properties for an Organization class OrganizationBase(SQLModel): + """Base model for organizations with common data fields.""" + name: str = Field( unique=True, index=True, @@ -40,11 +42,15 @@ class OrganizationUpdate(SQLModel): # Database model for Organization class Organization(OrganizationBase, table=True): + """Database model for organizations.""" + id: int = Field( default=None, primary_key=True, sa_column_kwargs={"comment": "Unique identifier for the organization"}, ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, nullable=False, @@ -58,7 +64,7 @@ class Organization(OrganizationBase, table=True): }, ) - # Relationship back to Creds + # Relationships creds: list["Credential"] = Relationship( back_populates="organization", cascade_delete=True ) diff --git a/backend/app/models/project.py b/backend/app/models/project.py index 6357970e2..66111d0cd 100644 --- a/backend/app/models/project.py +++ b/backend/app/models/project.py @@ -17,6 +17,8 @@ # Shared properties for a Project class ProjectBase(SQLModel): + """Base model for projects with common data fields.""" + name: str = Field( index=True, max_length=255, @@ -47,6 +49,8 @@ class ProjectUpdate(SQLModel): # Database model for Project class Project(ProjectBase, table=True): + """Database model for projects.""" + __table_args__ = ( UniqueConstraint("name", "organization_id", name="uq_project_name_org_id"), ) @@ -56,6 +60,14 @@ class Project(ProjectBase, table=True): primary_key=True, sa_column_kwargs={"comment": "Unique identifier for the project"}, ) + storage_path: UUID = Field( + default_factory=uuid4, + nullable=False, + unique=True, + sa_column_kwargs={"comment": "Unique UUID used for cloud storage path"}, + ) + + # Foreign keys organization_id: int = Field( foreign_key="organization.id", index=True, @@ -63,12 +75,8 @@ class Project(ProjectBase, table=True): ondelete="CASCADE", sa_column_kwargs={"comment": "Reference to the organization"}, ) - storage_path: UUID = Field( - default_factory=uuid4, - nullable=False, - unique=True, - sa_column_kwargs={"comment": "Unique UUID used for cloud storage path"}, - ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, nullable=False, @@ -80,6 +88,7 @@ class Project(ProjectBase, table=True): sa_column_kwargs={"comment": "Timestamp when the project was last updated"}, ) + # Relationships creds: list["Credential"] = Relationship( back_populates="project", cascade_delete=True ) diff --git a/backend/app/models/threads.py b/backend/app/models/threads.py index 5394cc73a..37d605087 100644 --- a/backend/app/models/threads.py +++ b/backend/app/models/threads.py @@ -45,6 +45,8 @@ class OpenAI_Thread(OpenAIThreadBase, table=True): default=None, sa_column_kwargs={"comment": "Error message if the interaction failed"}, ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, sa_column_kwargs={"comment": "Timestamp when the record was created"}, diff --git a/backend/app/models/user.py b/backend/app/models/user.py index 1fbac459b..413a1ab96 100644 --- a/backend/app/models/user.py +++ b/backend/app/models/user.py @@ -4,6 +4,8 @@ # Shared properties class UserBase(SQLModel): + """Base model for users with common data fields.""" + email: EmailStr = Field( unique=True, index=True, @@ -61,6 +63,8 @@ class UpdatePassword(SQLModel): # Database model, database table inferred from class name class User(UserBase, table=True): + """Database model for users.""" + id: int = Field( default=None, primary_key=True, From d5bce318aaa1b06ddfaad6b85b47c6ae0b2c7a88 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 11 Dec 2025 13:55:22 +0530 Subject: [PATCH 06/13] updated migration --- .../versions/707b8035b64c_add_db_comments.py | 3026 +++++++++++++++++ .../versions/8d3c3e8ce7b8_add_db_comments.py | 149 - 2 files changed, 3026 insertions(+), 149 deletions(-) create mode 100644 backend/app/alembic/versions/707b8035b64c_add_db_comments.py delete mode 100644 backend/app/alembic/versions/8d3c3e8ce7b8_add_db_comments.py diff --git a/backend/app/alembic/versions/707b8035b64c_add_db_comments.py b/backend/app/alembic/versions/707b8035b64c_add_db_comments.py new file mode 100644 index 000000000..265c1d55e --- /dev/null +++ b/backend/app/alembic/versions/707b8035b64c_add_db_comments.py @@ -0,0 +1,3026 @@ +"""add_db_comments + +Revision ID: 707b8035b64c +Revises: eed36ae3c79a +Create Date: 2025-12-11 13:55:04.965381 + +""" +from alembic import op +import sqlalchemy as sa +import sqlmodel.sql.sqltypes +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "707b8035b64c" +down_revision = "eed36ae3c79a" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "apikey", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "user_id", + existing_type=sa.INTEGER(), + comment="Reference to the user for whom the API key was created", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the API key", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "key_prefix", + existing_type=sa.VARCHAR(), + comment="Unique prefix portion of the API key for identification", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "key_hash", + existing_type=sa.VARCHAR(), + comment="Bcrypt hash of the secret of the API key", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the API key was created", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the API key was last updated", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the API key was deleted", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the batch job", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "batch_job", + "provider", + existing_type=sa.VARCHAR(), + comment="LLM provider name (e.g., openai, anthropic)", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "job_type", + existing_type=sa.VARCHAR(), + comment="Type of batch job (e.g., evaluation, classification, embedding)", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "config", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment="Complete batch configuration including model, temperature, instructions, tools, etc.", + existing_nullable=False, + existing_server_default=sa.text("'{}'::jsonb"), + ) + op.alter_column( + "batch_job", + "provider_batch_id", + existing_type=sa.VARCHAR(), + comment="Provider's batch job ID (e.g., OpenAI batch_id)", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_file_id", + existing_type=sa.VARCHAR(), + comment="Provider's input file ID", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_output_file_id", + existing_type=sa.VARCHAR(), + comment="Provider's output file ID", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_status", + existing_type=sa.VARCHAR(), + comment="Provider-specific status (e.g., validating, in_progress, completed, failed)", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "raw_output_url", + existing_type=sa.VARCHAR(), + comment="S3 URL of raw batch output file", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "total_items", + existing_type=sa.INTEGER(), + comment="Total number of items in the batch", + existing_nullable=False, + existing_server_default=sa.text("0"), + ) + op.alter_column( + "batch_job", + "error_message", + existing_type=sa.TEXT(), + comment="Error message if batch failed", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the batch job was started", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the batch job was last updated", + existing_nullable=False, + ) + op.alter_column( + "collection", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the collection", + existing_nullable=False, + ) + op.alter_column( + "collection", + "llm_service_id", + existing_type=sa.VARCHAR(), + comment="External LLM service identifier (e.g., OpenAI vector store ID)", + existing_nullable=False, + ) + op.alter_column( + "collection", + "llm_service_name", + existing_type=sa.VARCHAR(), + comment="Name of the LLM service provider", + existing_nullable=False, + ) + op.alter_column( + "collection", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "collection", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "collection", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the collection was created", + existing_nullable=False, + ) + op.alter_column( + "collection", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the collection was last updated", + existing_nullable=False, + ) + op.alter_column( + "collection", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the collection was deleted", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the collection job", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "SUCCESSFUL", "FAILED", name="collectionjobstatus" + ), + comment="Current job status (PENDING, PROCESSING, SUCCESSFUL, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "action_type", + existing_type=postgresql.ENUM("CREATE", "DELETE", name="collectionactiontype"), + comment="Type of operation (CREATE, DELETE)", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "task_id", + existing_type=sa.VARCHAR(), + comment="Celery task ID for async processing", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "trace_id", + existing_type=sa.VARCHAR(), + comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "error_message", + existing_type=sa.TEXT(), + comment="Error message if the job failed", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "collection_id", + existing_type=sa.UUID(), + comment="Reference to the collection", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "credential", + "is_active", + existing_type=sa.BOOLEAN(), + comment="Flag indicating if this credential is currently active and usable", + existing_nullable=False, + ) + op.alter_column( + "credential", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "credential", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "credential", + "id", + existing_type=sa.INTEGER(), + comment="Unique ID for the credential", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "credential", + "provider", + existing_type=sa.VARCHAR(), + comment="Provider name like 'openai', 'gemini'", + existing_nullable=False, + ) + op.alter_column( + "credential", + "credential", + existing_type=sa.VARCHAR(), + comment="Encrypted JSON string containing provider-specific API credentials", + existing_nullable=False, + ) + op.alter_column( + "credential", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the credential was created", + existing_nullable=False, + ) + op.alter_column( + "credential", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the credential was last updated", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the transformation job", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "COMPLETED", "FAILED", name="transformationstatus" + ), + comment="Current status (PENDING, PROCESSING, COMPLETED, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "task_id", + existing_type=sa.VARCHAR(), + comment="Celery task ID for async processing", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "trace_id", + existing_type=sa.VARCHAR(), + comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "error_message", + existing_type=sa.VARCHAR(), + comment="Error message if transformation failed", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "source_document_id", + existing_type=sa.UUID(), + comment="Reference to the source document being transformed", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "transformed_document_id", + existing_type=sa.UUID(), + comment="Reference to the resulting transformed document", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "document", + "fname", + existing_type=sa.VARCHAR(), + comment="Original filename of the document", + existing_nullable=False, + ) + op.alter_column( + "document", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "document", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the document", + existing_nullable=False, + ) + op.alter_column( + "document", + "object_store_url", + existing_type=sa.VARCHAR(), + comment="Cloud storage URL for the document", + existing_nullable=False, + ) + op.alter_column( + "document", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "document", + "source_document_id", + existing_type=sa.UUID(), + comment="Reference to source document if this is a transformation", + existing_nullable=True, + ) + op.alter_column( + "document", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the document was uploaded", + existing_nullable=False, + ) + op.alter_column( + "document", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the document was last updated", + existing_nullable=False, + ) + op.alter_column( + "document", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the document was deleted", + existing_nullable=True, + ) + op.alter_column( + "documentcollection", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the document-collection link", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "documentcollection", + "document_id", + existing_type=sa.UUID(), + comment="Reference to the document", + existing_nullable=False, + ) + op.alter_column( + "documentcollection", + "collection_id", + existing_type=sa.UUID(), + comment="Reference to the collection", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the dataset", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text( + "nextval('evaluation_dataset_id_seq'::regclass)" + ), + ) + op.alter_column( + "evaluation_dataset", + "name", + existing_type=sa.VARCHAR(), + comment="Name of the evaluation dataset", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "description", + existing_type=sa.VARCHAR(), + comment="Description of the dataset", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "dataset_metadata", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment="Dataset metadata (item counts, duplication factor, etc.)", + existing_nullable=False, + existing_server_default=sa.text("'{}'::jsonb"), + ) + op.alter_column( + "evaluation_dataset", + "object_store_url", + existing_type=sa.VARCHAR(), + comment="S3 URL where the dataset CSV is stored", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "langfuse_dataset_id", + existing_type=sa.VARCHAR(), + comment="Langfuse dataset ID for observability integration", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the evaluation dataset was created", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the evaluation dataset was last updated", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the evaluation run", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "evaluation_run", + "run_name", + existing_type=sa.VARCHAR(), + comment="Name of the evaluation run", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "dataset_name", + existing_type=sa.VARCHAR(), + comment="Name of the Langfuse dataset used", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "config", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment="Evaluation configuration (model, instructions, etc.)", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "dataset_id", + existing_type=sa.INTEGER(), + comment="Reference to the evaluation dataset", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "batch_job_id", + existing_type=sa.INTEGER(), + comment="Reference to the batch job for responses", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "embedding_batch_job_id", + existing_type=sa.INTEGER(), + comment="Reference to the batch job for embedding similarity scoring", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "status", + existing_type=sa.VARCHAR(), + comment="Evaluation status (pending, processing, completed, failed)", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "object_store_url", + existing_type=sa.VARCHAR(), + comment="S3 URL of processed evaluation results", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "total_items", + existing_type=sa.INTEGER(), + comment="Total number of items evaluated", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "score", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment="Evaluation scores (correctness, cosine_similarity, etc.)", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "error_message", + existing_type=sa.TEXT(), + comment="Error message if evaluation failed", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "base_model", + existing_type=sa.VARCHAR(), + comment="Base model used for fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "split_ratio", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment="Train/test split ratio for the dataset", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "training_file_id", + existing_type=sa.VARCHAR(), + comment="OpenAI training file identifier", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "system_prompt", + existing_type=sa.TEXT(), + comment="System prompt used during fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the fine-tuning job", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "fine_tuning", + "provider_job_id", + existing_type=sa.VARCHAR(), + comment="Fine-tuning job ID returned by the provider", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "status", + existing_type=postgresql.ENUM( + "pending", "running", "completed", "failed", name="finetuningstatus" + ), + comment="Current status of the fine-tuning job", + existing_nullable=False, + existing_server_default=sa.text("'pending'::finetuningstatus"), + ) + op.alter_column( + "fine_tuning", + "fine_tuned_model", + existing_type=sa.VARCHAR(), + comment="Name of the resulting fine-tuned model", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "train_data_s3_object", + existing_type=sa.VARCHAR(), + comment="S3 URI of the training data", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "test_data_s3_object", + existing_type=sa.VARCHAR(), + comment="S3 URI of the testing data", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "error_message", + existing_type=sa.VARCHAR(), + comment="Error message if the job failed", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "document_id", + existing_type=sa.UUID(), + comment="Reference to the training document", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was deleted", + existing_nullable=True, + ) + op.alter_column( + "job", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the job", + existing_nullable=False, + ) + op.alter_column( + "job", + "task_id", + existing_type=sa.VARCHAR(), + comment="Celery task ID returned when job is queued", + existing_nullable=True, + ) + op.alter_column( + "job", + "trace_id", + existing_type=sa.VARCHAR(), + comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "job", + "error_message", + existing_type=sa.VARCHAR(), + comment="Error details if the job fails", + existing_nullable=True, + ) + op.alter_column( + "job", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "SUCCESS", "FAILED", name="jobstatus" + ), + comment="Current state of the job (PENDING, PROCESSING, SUCCESS, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "job", + "job_type", + existing_type=postgresql.ENUM("RESPONSE", "LLM_API", name="jobtype"), + comment="Type of job being executed (e.g., RESPONSE, LLM_API)", + existing_nullable=False, + ) + op.alter_column( + "job", + "created_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the evaluation", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "model_evaluation", + "fine_tuned_model", + existing_type=sa.VARCHAR(), + comment="Name of the fine-tuned model being evaluated", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "test_data_s3_object", + existing_type=sa.VARCHAR(), + comment="S3 URI of the testing data", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "base_model", + existing_type=sa.VARCHAR(), + comment="Base model used for fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "split_ratio", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment="Train/test split ratio used", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "system_prompt", + existing_type=sa.TEXT(), + comment="System prompt used during evaluation", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "score", + existing_type=postgresql.JSON(astext_type=sa.Text()), + comment="Evaluation scores per metric (e.g., MCC)", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "prediction_data_s3_object", + existing_type=sa.VARCHAR(), + comment="S3 URL where the prediction data is stored", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "status", + existing_type=postgresql.ENUM( + "pending", "running", "completed", "failed", name="modelevaluationstatus" + ), + comment="Current status of the evaluation", + existing_nullable=False, + existing_server_default=sa.text("'pending'::modelevaluationstatus"), + ) + op.alter_column( + "model_evaluation", + "error_message", + existing_type=sa.VARCHAR(), + comment="Error message if evaluation failed", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "fine_tuning_id", + existing_type=sa.INTEGER(), + comment="Reference to the fine-tuning job", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "document_id", + existing_type=sa.UUID(), + comment="Reference to the evaluation document", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the evaluation was created", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the evaluation was last updated", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the evaluation was deleted", + existing_nullable=True, + ) + op.alter_column( + "openai_assistant", + "assistant_id", + existing_type=sa.VARCHAR(length=255), + comment="Unique identifier for the assistant at OpenAI", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "name", + existing_type=sa.VARCHAR(length=255), + comment="Name of the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "instructions", + existing_type=sa.TEXT(), + comment="System instructions for the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "model", + existing_type=sa.VARCHAR(), + comment="OpenAI model used by the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "vector_store_ids", + existing_type=postgresql.ARRAY(sa.VARCHAR()), + comment="List of OpenAI vector store IDs attached", + existing_nullable=True, + ) + op.alter_column( + "openai_assistant", + "temperature", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment="Parameter that controls the creativity or randomness of the text generated by model", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "max_num_results", + existing_type=sa.INTEGER(), + comment="Parameter that controls maximum number of results to return", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the assistant", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_assistant", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + existing_server_default=sa.text("false"), + ) + op.alter_column( + "openai_assistant", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the assistant was created", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the assistant was last updated", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the assistant was deleted", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "response_id", + existing_type=sa.VARCHAR(), + comment="OpenAI response identifier", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "ancestor_response_id", + existing_type=sa.VARCHAR(), + comment="Root response ID for conversation threading", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "previous_response_id", + existing_type=sa.VARCHAR(), + comment="Previous response ID in the conversation chain", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "user_question", + existing_type=sa.VARCHAR(), + comment="User's question or input text", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "response", + existing_type=sa.VARCHAR(), + comment="Response generated by OpenAI", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "model", + existing_type=sa.VARCHAR(), + comment="Model used to generate the response", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "assistant_id", + existing_type=sa.VARCHAR(), + comment="OpenAI assistant identifier if used", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the conversation record", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_conversation", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the conversation was created", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the conversation was last updated", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the conversation was deleted", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "thread_id", + existing_type=sa.VARCHAR(), + comment="OpenAI thread identifier", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "prompt", + existing_type=sa.VARCHAR(), + comment="User prompt sent to the thread", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "response", + existing_type=sa.VARCHAR(), + comment="Response received from OpenAI", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "status", + existing_type=sa.VARCHAR(), + comment="Current status of the thread interaction", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "error", + existing_type=sa.VARCHAR(), + comment="Error message if the interaction failed", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the thread record", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_thread", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the record was created", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the record was last updated", + existing_nullable=False, + ) + op.alter_column( + "organization", + "name", + existing_type=sa.VARCHAR(length=255), + comment="Organization name (unique identifier)", + existing_nullable=False, + ) + op.alter_column( + "organization", + "is_active", + existing_type=sa.BOOLEAN(), + comment="Flag indicating if the organization is active", + existing_nullable=False, + ) + op.alter_column( + "organization", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the organization", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('organization_id_seq'::regclass)"), + ) + op.alter_column( + "organization", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the organization was created", + existing_nullable=False, + existing_server_default=sa.text("now()"), + ) + op.alter_column( + "organization", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the organization was last updated", + existing_nullable=False, + ) + op.alter_column( + "project", + "name", + existing_type=sa.VARCHAR(length=255), + comment="Project name", + existing_nullable=False, + ) + op.alter_column( + "project", + "description", + existing_type=sa.VARCHAR(length=500), + comment="Project description", + existing_nullable=True, + ) + op.alter_column( + "project", + "is_active", + existing_type=sa.BOOLEAN(), + comment="Flag indicating if the project is active", + existing_nullable=False, + ) + op.alter_column( + "project", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the project", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('project_id_seq'::regclass)"), + ) + op.alter_column( + "project", + "storage_path", + existing_type=sa.UUID(), + comment="Unique UUID used for cloud storage path", + existing_nullable=False, + ) + op.alter_column( + "project", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "project", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the project was created", + existing_nullable=False, + existing_server_default=sa.text("now()"), + ) + op.alter_column( + "project", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the project was last updated", + existing_nullable=False, + ) + op.alter_column( + "user", + "email", + existing_type=sa.VARCHAR(length=255), + comment="User's email address (unique identifier)", + existing_nullable=False, + ) + op.alter_column( + "user", + "is_active", + existing_type=sa.BOOLEAN(), + comment="Flag indicating if the user account is active", + existing_nullable=False, + ) + op.alter_column( + "user", + "is_superuser", + existing_type=sa.BOOLEAN(), + comment="Flag indicating if user has superuser privileges", + existing_nullable=False, + ) + op.alter_column( + "user", + "full_name", + existing_type=sa.VARCHAR(length=255), + comment="User's full name", + existing_nullable=True, + ) + op.alter_column( + "user", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the user", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('user_id_seq'::regclass)"), + ) + op.alter_column( + "user", + "hashed_password", + existing_type=sa.VARCHAR(), + comment="Bcrypt hash of the user's password", + existing_nullable=False, + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "user", + "hashed_password", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Bcrypt hash of the user's password", + existing_nullable=False, + ) + op.alter_column( + "user", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the user", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('user_id_seq'::regclass)"), + ) + op.alter_column( + "user", + "full_name", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="User's full name", + existing_nullable=True, + ) + op.alter_column( + "user", + "is_superuser", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Flag indicating if user has superuser privileges", + existing_nullable=False, + ) + op.alter_column( + "user", + "is_active", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Flag indicating if the user account is active", + existing_nullable=False, + ) + op.alter_column( + "user", + "email", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="User's email address (unique identifier)", + existing_nullable=False, + ) + op.alter_column( + "project", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the project was last updated", + existing_nullable=False, + ) + op.alter_column( + "project", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the project was created", + existing_nullable=False, + existing_server_default=sa.text("now()"), + ) + op.alter_column( + "project", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "project", + "storage_path", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique UUID used for cloud storage path", + existing_nullable=False, + ) + op.alter_column( + "project", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the project", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('project_id_seq'::regclass)"), + ) + op.alter_column( + "project", + "is_active", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Flag indicating if the project is active", + existing_nullable=False, + ) + op.alter_column( + "project", + "description", + existing_type=sa.VARCHAR(length=500), + comment=None, + existing_comment="Project description", + existing_nullable=True, + ) + op.alter_column( + "project", + "name", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="Project name", + existing_nullable=False, + ) + op.alter_column( + "organization", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the organization was last updated", + existing_nullable=False, + ) + op.alter_column( + "organization", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the organization was created", + existing_nullable=False, + existing_server_default=sa.text("now()"), + ) + op.alter_column( + "organization", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the organization", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('organization_id_seq'::regclass)"), + ) + op.alter_column( + "organization", + "is_active", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Flag indicating if the organization is active", + existing_nullable=False, + ) + op.alter_column( + "organization", + "name", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="Organization name (unique identifier)", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the record was last updated", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the record was created", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the thread record", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_thread", + "error", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Error message if the interaction failed", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "status", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Current status of the thread interaction", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "response", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Response received from OpenAI", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "prompt", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="User prompt sent to the thread", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "thread_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="OpenAI thread identifier", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the conversation was deleted", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the conversation was last updated", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the conversation was created", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the conversation record", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_conversation", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "assistant_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="OpenAI assistant identifier if used", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Model used to generate the response", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "response", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Response generated by OpenAI", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "user_question", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="User's question or input text", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "previous_response_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Previous response ID in the conversation chain", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "ancestor_response_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Root response ID for conversation threading", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "response_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="OpenAI response identifier", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the assistant was deleted", + existing_nullable=True, + ) + op.alter_column( + "openai_assistant", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the assistant was last updated", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the assistant was created", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + existing_server_default=sa.text("false"), + ) + op.alter_column( + "openai_assistant", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the assistant", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_assistant", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "max_num_results", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Parameter that controls maximum number of results to return", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "temperature", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment=None, + existing_comment="Parameter that controls the creativity or randomness of the text generated by model", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "vector_store_ids", + existing_type=postgresql.ARRAY(sa.VARCHAR()), + comment=None, + existing_comment="List of OpenAI vector store IDs attached", + existing_nullable=True, + ) + op.alter_column( + "openai_assistant", + "model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="OpenAI model used by the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "instructions", + existing_type=sa.TEXT(), + comment=None, + existing_comment="System instructions for the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "name", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="Name of the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "assistant_id", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="Unique identifier for the assistant at OpenAI", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the evaluation was deleted", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the evaluation was last updated", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the evaluation was created", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the evaluation document", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "fine_tuning_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the fine-tuning job", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "error_message", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Error message if evaluation failed", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "status", + existing_type=postgresql.ENUM( + "pending", "running", "completed", "failed", name="modelevaluationstatus" + ), + comment=None, + existing_comment="Current status of the evaluation", + existing_nullable=False, + existing_server_default=sa.text("'pending'::modelevaluationstatus"), + ) + op.alter_column( + "model_evaluation", + "prediction_data_s3_object", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URL where the prediction data is stored", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "score", + existing_type=postgresql.JSON(astext_type=sa.Text()), + comment=None, + existing_comment="Evaluation scores per metric (e.g., MCC)", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "system_prompt", + existing_type=sa.TEXT(), + comment=None, + existing_comment="System prompt used during evaluation", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "split_ratio", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment=None, + existing_comment="Train/test split ratio used", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "base_model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Base model used for fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "test_data_s3_object", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URI of the testing data", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "fine_tuned_model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the fine-tuned model being evaluated", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the evaluation", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "job", + "created_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "job", + "job_type", + existing_type=postgresql.ENUM("RESPONSE", "LLM_API", name="jobtype"), + comment=None, + existing_comment="Type of job being executed (e.g., RESPONSE, LLM_API)", + existing_nullable=False, + ) + op.alter_column( + "job", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "SUCCESS", "FAILED", name="jobstatus" + ), + comment=None, + existing_comment="Current state of the job (PENDING, PROCESSING, SUCCESS, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "job", + "error_message", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Error details if the job fails", + existing_nullable=True, + ) + op.alter_column( + "job", + "trace_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "job", + "task_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Celery task ID returned when job is queued", + existing_nullable=True, + ) + op.alter_column( + "job", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the job", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was deleted", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the training document", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "error_message", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Error message if the job failed", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "test_data_s3_object", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URI of the testing data", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "train_data_s3_object", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URI of the training data", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "fine_tuned_model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the resulting fine-tuned model", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "status", + existing_type=postgresql.ENUM( + "pending", "running", "completed", "failed", name="finetuningstatus" + ), + comment=None, + existing_comment="Current status of the fine-tuning job", + existing_nullable=False, + existing_server_default=sa.text("'pending'::finetuningstatus"), + ) + op.alter_column( + "fine_tuning", + "provider_job_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Fine-tuning job ID returned by the provider", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the fine-tuning job", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "fine_tuning", + "system_prompt", + existing_type=sa.TEXT(), + comment=None, + existing_comment="System prompt used during fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "training_file_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="OpenAI training file identifier", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "split_ratio", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment=None, + existing_comment="Train/test split ratio for the dataset", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "base_model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Base model used for fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "error_message", + existing_type=sa.TEXT(), + comment=None, + existing_comment="Error message if evaluation failed", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "score", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment="Evaluation scores (correctness, cosine_similarity, etc.)", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "total_items", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Total number of items evaluated", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "object_store_url", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URL of processed evaluation results", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "status", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Evaluation status (pending, processing, completed, failed)", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "embedding_batch_job_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the batch job for embedding similarity scoring", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "batch_job_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the batch job for responses", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "dataset_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the evaluation dataset", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "config", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment="Evaluation configuration (model, instructions, etc.)", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "dataset_name", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the Langfuse dataset used", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "run_name", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the evaluation run", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the evaluation run", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "evaluation_dataset", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the evaluation dataset was last updated", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the evaluation dataset was created", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "langfuse_dataset_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Langfuse dataset ID for observability integration", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "object_store_url", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URL where the dataset CSV is stored", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "dataset_metadata", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment="Dataset metadata (item counts, duplication factor, etc.)", + existing_nullable=False, + existing_server_default=sa.text("'{}'::jsonb"), + ) + op.alter_column( + "evaluation_dataset", + "description", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Description of the dataset", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "name", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the evaluation dataset", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the dataset", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text( + "nextval('evaluation_dataset_id_seq'::regclass)" + ), + ) + op.alter_column( + "documentcollection", + "collection_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the collection", + existing_nullable=False, + ) + op.alter_column( + "documentcollection", + "document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the document", + existing_nullable=False, + ) + op.alter_column( + "documentcollection", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the document-collection link", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "document", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the document was deleted", + existing_nullable=True, + ) + op.alter_column( + "document", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the document was last updated", + existing_nullable=False, + ) + op.alter_column( + "document", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the document was uploaded", + existing_nullable=False, + ) + op.alter_column( + "document", + "source_document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to source document if this is a transformation", + existing_nullable=True, + ) + op.alter_column( + "document", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "document", + "object_store_url", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Cloud storage URL for the document", + existing_nullable=False, + ) + op.alter_column( + "document", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the document", + existing_nullable=False, + ) + op.alter_column( + "document", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "document", + "fname", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Original filename of the document", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "transformed_document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the resulting transformed document", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "source_document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the source document being transformed", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "error_message", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Error message if transformation failed", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "trace_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "task_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Celery task ID for async processing", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "COMPLETED", "FAILED", name="transformationstatus" + ), + comment=None, + existing_comment="Current status (PENDING, PROCESSING, COMPLETED, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the transformation job", + existing_nullable=False, + ) + op.alter_column( + "credential", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the credential was last updated", + existing_nullable=False, + ) + op.alter_column( + "credential", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the credential was created", + existing_nullable=False, + ) + op.alter_column( + "credential", + "credential", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Encrypted JSON string containing provider-specific API credentials", + existing_nullable=False, + ) + op.alter_column( + "credential", + "provider", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Provider name like 'openai', 'gemini'", + existing_nullable=False, + ) + op.alter_column( + "credential", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique ID for the credential", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "credential", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "credential", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "credential", + "is_active", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Flag indicating if this credential is currently active and usable", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "collection_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the collection", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "error_message", + existing_type=sa.TEXT(), + comment=None, + existing_comment="Error message if the job failed", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "trace_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "task_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Celery task ID for async processing", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "action_type", + existing_type=postgresql.ENUM("CREATE", "DELETE", name="collectionactiontype"), + comment=None, + existing_comment="Type of operation (CREATE, DELETE)", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "SUCCESSFUL", "FAILED", name="collectionjobstatus" + ), + comment=None, + existing_comment="Current job status (PENDING, PROCESSING, SUCCESSFUL, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the collection job", + existing_nullable=False, + ) + op.alter_column( + "collection", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the collection was deleted", + existing_nullable=True, + ) + op.alter_column( + "collection", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the collection was last updated", + existing_nullable=False, + ) + op.alter_column( + "collection", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the collection was created", + existing_nullable=False, + ) + op.alter_column( + "collection", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "collection", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "collection", + "llm_service_name", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the LLM service provider", + existing_nullable=False, + ) + op.alter_column( + "collection", + "llm_service_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="External LLM service identifier (e.g., OpenAI vector store ID)", + existing_nullable=False, + ) + op.alter_column( + "collection", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the collection", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the batch job was last updated", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the batch job was started", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "error_message", + existing_type=sa.TEXT(), + comment=None, + existing_comment="Error message if batch failed", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "total_items", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Total number of items in the batch", + existing_nullable=False, + existing_server_default=sa.text("0"), + ) + op.alter_column( + "batch_job", + "raw_output_url", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URL of raw batch output file", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_status", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Provider-specific status (e.g., validating, in_progress, completed, failed)", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_output_file_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Provider's output file ID", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_file_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Provider's input file ID", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_batch_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Provider's batch job ID (e.g., OpenAI batch_id)", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "config", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment="Complete batch configuration including model, temperature, instructions, tools, etc.", + existing_nullable=False, + existing_server_default=sa.text("'{}'::jsonb"), + ) + op.alter_column( + "batch_job", + "job_type", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Type of batch job (e.g., evaluation, classification, embedding)", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "provider", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="LLM provider name (e.g., openai, anthropic)", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the batch job", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "apikey", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the API key was deleted", + existing_nullable=True, + ) + op.alter_column( + "apikey", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the API key was last updated", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the API key was created", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "key_hash", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Bcrypt hash of the secret of the API key", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "key_prefix", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Unique prefix portion of the API key for identification", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the API key", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "user_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the user for whom the API key was created", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + # ### end Alembic commands ### diff --git a/backend/app/alembic/versions/8d3c3e8ce7b8_add_db_comments.py b/backend/app/alembic/versions/8d3c3e8ce7b8_add_db_comments.py deleted file mode 100644 index a38446787..000000000 --- a/backend/app/alembic/versions/8d3c3e8ce7b8_add_db_comments.py +++ /dev/null @@ -1,149 +0,0 @@ -"""add_db_comments - -Revision ID: 8d3c3e8ce7b8 -Revises: eed36ae3c79a -Create Date: 2025-12-10 13:33:44.172685 - -""" -from alembic import op -import sqlalchemy as sa -import sqlmodel.sql.sqltypes -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = "8d3c3e8ce7b8" -down_revision = "eed36ae3c79a" -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "credential", - "organization_id", - existing_type=sa.INTEGER(), - comment="Reference to the organization", - existing_nullable=False, - ) - op.alter_column( - "credential", - "project_id", - existing_type=sa.INTEGER(), - comment="Reference to the project", - existing_nullable=False, - ) - op.alter_column( - "credential", - "is_active", - existing_type=sa.BOOLEAN(), - comment="Flag indicating if this credential is currently active and usable", - existing_nullable=False, - ) - op.alter_column( - "credential", - "id", - existing_type=sa.INTEGER(), - comment="Unique ID for the credential", - existing_nullable=False, - autoincrement=True, - ) - op.alter_column( - "credential", - "provider", - existing_type=sa.VARCHAR(), - comment="Provider name like 'openai', 'gemini'", - existing_nullable=False, - ) - op.alter_column( - "credential", - "credential", - existing_type=sa.VARCHAR(), - comment="Encrypted JSON string containing provider-specific API credentials", - existing_nullable=False, - ) - op.alter_column( - "credential", - "inserted_at", - existing_type=postgresql.TIMESTAMP(), - comment="Timestamp when the credential was created", - existing_nullable=False, - ) - op.alter_column( - "credential", - "updated_at", - existing_type=postgresql.TIMESTAMP(), - comment="Timestamp when the credential was last updated", - existing_nullable=False, - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "credential", - "updated_at", - existing_type=postgresql.TIMESTAMP(), - comment=None, - existing_comment="Timestamp when the credential was last updated", - existing_nullable=False, - ) - op.alter_column( - "credential", - "inserted_at", - existing_type=postgresql.TIMESTAMP(), - comment=None, - existing_comment="Timestamp when the credential was created", - existing_nullable=False, - ) - op.alter_column( - "credential", - "credential", - existing_type=sa.VARCHAR(), - comment=None, - existing_comment="Encrypted JSON string containing provider-specific API credentials", - existing_nullable=False, - ) - op.alter_column( - "credential", - "provider", - existing_type=sa.VARCHAR(), - comment=None, - existing_comment="Provider name like 'openai', 'gemini'", - existing_nullable=False, - ) - op.alter_column( - "credential", - "id", - existing_type=sa.INTEGER(), - comment=None, - existing_comment="Unique ID for the credential", - existing_nullable=False, - autoincrement=True, - ) - op.alter_column( - "credential", - "is_active", - existing_type=sa.BOOLEAN(), - comment=None, - existing_comment="Flag indicating if this credential is currently active and usable", - existing_nullable=False, - ) - op.alter_column( - "credential", - "project_id", - existing_type=sa.INTEGER(), - comment=None, - existing_comment="Reference to the project", - existing_nullable=False, - ) - op.alter_column( - "credential", - "organization_id", - existing_type=sa.INTEGER(), - comment=None, - existing_comment="Reference to the organization", - existing_nullable=False, - ) - # ### end Alembic commands ### From c7c4214a4e8d78581536a3df0eeeab2dd032a795 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 11 Dec 2025 14:07:27 +0530 Subject: [PATCH 07/13] cleanup --- backend/app/models/assistants.py | 11 ++++------- backend/app/models/credentials.py | 12 +++++------- backend/app/models/fine_tuning.py | 8 ++------ backend/app/models/model_evaluation.py | 8 ++------ backend/app/models/openai_conversation.py | 11 ++++------- 5 files changed, 17 insertions(+), 33 deletions(-) diff --git a/backend/app/models/assistants.py b/backend/app/models/assistants.py index 42d7c99a6..bb9b3318b 100644 --- a/backend/app/models/assistants.py +++ b/backend/app/models/assistants.py @@ -1,15 +1,12 @@ from datetime import datetime -from typing import TYPE_CHECKING from sqlalchemy import Column, String, Text from sqlalchemy.dialects.postgresql import ARRAY from sqlmodel import Field, Relationship, SQLModel, UniqueConstraint from app.core.util import now - -if TYPE_CHECKING: - from .organization import Organization - from .project import Project +from app.models.organization import Organization +from app.models.project import Project class AssistantBase(SQLModel): @@ -115,8 +112,8 @@ class Assistant(AssistantBase, table=True): ) # Relationships - project: "Project" = Relationship(back_populates="assistants") - organization: "Organization" = Relationship(back_populates="assistants") + project: Project = Relationship(back_populates="assistants") + organization: Organization = Relationship(back_populates="assistants") class AssistantCreate(SQLModel): diff --git a/backend/app/models/credentials.py b/backend/app/models/credentials.py index 31c85303a..6e284dbf9 100644 --- a/backend/app/models/credentials.py +++ b/backend/app/models/credentials.py @@ -1,14 +1,12 @@ from datetime import datetime -from typing import TYPE_CHECKING, Any +from typing import Any import sqlalchemy as sa from sqlmodel import Field, Relationship, SQLModel from app.core.util import now - -if TYPE_CHECKING: - from .organization import Organization - from .project import Project +from app.models.organization import Organization +from app.models.project import Project class CredsBase(SQLModel): @@ -112,8 +110,8 @@ class Credential(CredsBase, table=True): ) # Relationships - organization: "Organization | None" = Relationship(back_populates="creds") - project: "Project | None" = Relationship(back_populates="creds") + organization: Organization | None = Relationship(back_populates="creds") + project: Project | None = Relationship(back_populates="creds") def to_public(self) -> "CredsPublic": """Convert the database model to a public model with decrypted credentials.""" diff --git a/backend/app/models/fine_tuning.py b/backend/app/models/fine_tuning.py index 5cad32378..3647475f2 100644 --- a/backend/app/models/fine_tuning.py +++ b/backend/app/models/fine_tuning.py @@ -1,6 +1,5 @@ from datetime import datetime from enum import Enum -from typing import TYPE_CHECKING from uuid import UUID from pydantic import field_validator @@ -8,10 +7,7 @@ from sqlmodel import Field, Relationship, SQLModel from app.core.util import now - -if TYPE_CHECKING: - from .model_evaluation import ModelEvaluation - from .project import Project +from app.models.project import Project class FineTuningStatus(str, Enum): @@ -147,7 +143,7 @@ class Fine_Tuning(FineTuningJobBase, table=True): ) # Relationships - project: "Project" = Relationship(back_populates="fine_tuning") + project: Project = Relationship(back_populates="fine_tuning") model_evaluation: "ModelEvaluation" = Relationship(back_populates="fine_tuning") diff --git a/backend/app/models/model_evaluation.py b/backend/app/models/model_evaluation.py index 72bbdaf0f..46cea4191 100644 --- a/backend/app/models/model_evaluation.py +++ b/backend/app/models/model_evaluation.py @@ -1,6 +1,5 @@ from datetime import datetime from enum import Enum -from typing import TYPE_CHECKING from uuid import UUID from pydantic import field_validator @@ -9,10 +8,7 @@ from sqlmodel import Field, Relationship, SQLModel from app.core.util import now - -if TYPE_CHECKING: - from .fine_tuning import Fine_Tuning - from .project import Project +from app.models.project import Project class ModelEvaluationStatus(str, Enum): @@ -133,7 +129,7 @@ class ModelEvaluation(ModelEvaluationBase, table=True): ) # Relationships - project: "Project" = Relationship() + project: Project = Relationship() fine_tuning: "Fine_Tuning" = Relationship(back_populates="model_evaluation") diff --git a/backend/app/models/openai_conversation.py b/backend/app/models/openai_conversation.py index 7305aec4b..54d0d932e 100644 --- a/backend/app/models/openai_conversation.py +++ b/backend/app/models/openai_conversation.py @@ -1,15 +1,12 @@ import re from datetime import datetime -from typing import TYPE_CHECKING from pydantic import field_validator from sqlmodel import Field, Relationship, SQLModel from app.core.util import now - -if TYPE_CHECKING: - from .organization import Organization - from .project import Project +from app.models.organization import Organization +from app.models.project import Project def validate_response_id_pattern(v: str) -> str: @@ -137,8 +134,8 @@ class OpenAIConversation(OpenAIConversationBase, table=True): ) # Relationships - project: "Project" = Relationship(back_populates="openai_conversations") - organization: "Organization" = Relationship(back_populates="openai_conversations") + project: Project = Relationship(back_populates="openai_conversations") + organization: Organization = Relationship(back_populates="openai_conversations") class OpenAIConversationCreate(SQLModel): From ee3b646b3ad2f4af65bb72d2db37d49da0939bb1 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 11 Dec 2025 15:45:42 +0530 Subject: [PATCH 08/13] coderabbit suggestions --- backend/app/models/collection_job.py | 1 + backend/app/models/document_collection.py | 10 ++- backend/app/models/evaluation.py | 2 + backend/app/models/openai_conversation.py | 81 +++++++++-------------- backend/app/models/threads.py | 5 +- 5 files changed, 47 insertions(+), 52 deletions(-) diff --git a/backend/app/models/collection_job.py b/backend/app/models/collection_job.py index d60817a7f..60be4eec7 100644 --- a/backend/app/models/collection_job.py +++ b/backend/app/models/collection_job.py @@ -44,6 +44,7 @@ class CollectionJob(SQLModel, table=True): sa_column_kwargs={"comment": "Type of operation (CREATE, DELETE)"}, ) task_id: str = Field( + default=None, nullable=True, sa_column_kwargs={"comment": "Celery task ID for async processing"}, ) diff --git a/backend/app/models/document_collection.py b/backend/app/models/document_collection.py index 93db6df31..4ec1de7f3 100644 --- a/backend/app/models/document_collection.py +++ b/backend/app/models/document_collection.py @@ -17,11 +17,17 @@ class DocumentCollection(SQLModel, table=True): foreign_key="document.id", nullable=False, ondelete="CASCADE", - sa_column_kwargs={"comment": "Reference to the document"}, + sa_column_kwargs={ + "comment": "Reference to the document", + "ondelete": "CASCADE", + }, ) collection_id: UUID = Field( foreign_key="collection.id", nullable=False, ondelete="CASCADE", - sa_column_kwargs={"comment": "Reference to the collection"}, + sa_column_kwargs={ + "comment": "Reference to the collection", + "ondelete": "CASCADE", + }, ) diff --git a/backend/app/models/evaluation.py b/backend/app/models/evaluation.py index cac6a4582..f99fbb27e 100644 --- a/backend/app/models/evaluation.py +++ b/backend/app/models/evaluation.py @@ -290,11 +290,13 @@ class EvaluationRun(SQLModel, table=True): # Timestamps inserted_at: datetime = Field( default_factory=now, + nullable=False, description="The timestamp when the evaluation run was started", sa_column_kwargs={"comment": "Timestamp when the evaluation run was started"}, ) updated_at: datetime = Field( default_factory=now, + nullable=False, description="The timestamp when the evaluation run was last updated", sa_column_kwargs={ "comment": "Timestamp when the evaluation run was last updated" diff --git a/backend/app/models/openai_conversation.py b/backend/app/models/openai_conversation.py index 54d0d932e..c319f9de7 100644 --- a/backend/app/models/openai_conversation.py +++ b/backend/app/models/openai_conversation.py @@ -22,19 +22,37 @@ def validate_response_id_pattern(v: str) -> str: class OpenAIConversationBase(SQLModel): # usually follow the pattern of resp_688704e41190819db512c30568xxxxxxx - response_id: str = Field(index=True, min_length=10) + response_id: str = Field( + index=True, + min_length=10, + sa_column_kwargs={"comment": "OpenAI response identifier"}, + ) ancestor_response_id: str = Field( index=True, description="Ancestor response ID for conversation threading", + sa_column_kwargs={"comment": "Root response ID for conversation threading"}, ) previous_response_id: str | None = Field( - default=None, index=True, description="Previous response ID in the conversation" + default=None, + index=True, + description="Previous response ID in the conversation", + sa_column_kwargs={"comment": "Previous response ID in the conversation chain"}, + ) + user_question: str = Field( + description="User's question/input", + sa_column_kwargs={"comment": "User's question or input text"}, + ) + response: str | None = Field( + default=None, + description="AI response", + sa_column_kwargs={"comment": "Response generated by OpenAI"}, ) - user_question: str = Field(description="User's question/input") - response: str | None = Field(default=None, description="AI response") # there are models with small name like o1 and usually fine tuned models have long names model: str = Field( - description="The model used for the response", min_length=1, max_length=150 + description="The model used for the response", + min_length=1, + max_length=150, + sa_column_kwargs={"comment": "Model used to generate the response"}, ) # usually follow the pattern of asst_WD9bumYqTtpSvxxxxx assistant_id: str | None = Field( @@ -42,12 +60,19 @@ class OpenAIConversationBase(SQLModel): description="The assistant ID used", min_length=10, max_length=50, + sa_column_kwargs={"comment": "OpenAI assistant identifier if used"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) @field_validator("response_id", "ancestor_response_id", "previous_response_id") @@ -66,54 +91,12 @@ class OpenAIConversation(OpenAIConversationBase, table=True): primary_key=True, sa_column_kwargs={"comment": "Unique identifier for the conversation record"}, ) - response_id: str = Field( - index=True, - min_length=10, - sa_column_kwargs={"comment": "OpenAI response identifier"}, - ) - ancestor_response_id: str = Field( - index=True, - sa_column_kwargs={"comment": "Root response ID for conversation threading"}, - ) - previous_response_id: str | None = Field( - default=None, - index=True, - sa_column_kwargs={"comment": "Previous response ID in the conversation chain"}, - ) - user_question: str = Field( - sa_column_kwargs={"comment": "User's question or input text"}, - ) - response: str | None = Field( - default=None, - sa_column_kwargs={"comment": "Response generated by OpenAI"}, - ) - model: str = Field( - sa_column_kwargs={"comment": "Model used to generate the response"}, - ) - assistant_id: str | None = Field( - default=None, - sa_column_kwargs={"comment": "OpenAI assistant identifier if used"}, - ) is_deleted: bool = Field( default=False, nullable=False, sa_column_kwargs={"comment": "Soft delete flag"}, ) - # Foreign keys - project_id: int = Field( - foreign_key="project.id", - nullable=False, - ondelete="CASCADE", - sa_column_kwargs={"comment": "Reference to the project"}, - ) - organization_id: int = Field( - foreign_key="organization.id", - nullable=False, - ondelete="CASCADE", - sa_column_kwargs={"comment": "Reference to the organization"}, - ) - # Timestamps inserted_at: datetime = Field( default_factory=now, diff --git a/backend/app/models/threads.py b/backend/app/models/threads.py index 37d605087..2753f72cb 100644 --- a/backend/app/models/threads.py +++ b/backend/app/models/threads.py @@ -53,5 +53,8 @@ class OpenAI_Thread(OpenAIThreadBase, table=True): ) updated_at: datetime = Field( default_factory=now, - sa_column_kwargs={"comment": "Timestamp when the record was last updated"}, + sa_column_kwargs={ + "comment": "Timestamp when the record was last updated", + "onupdate": now, + }, ) From ec359a1f202992f53b3ed8221d046c6ee4fb3b34 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Thu, 11 Dec 2025 16:05:05 +0530 Subject: [PATCH 09/13] cleanups --- backend/app/models/fine_tuning.py | 7 +++++++ backend/app/models/model_evaluation.py | 12 +++++++++++- backend/app/models/user.py | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/backend/app/models/fine_tuning.py b/backend/app/models/fine_tuning.py index 3647475f2..6b174da9b 100644 --- a/backend/app/models/fine_tuning.py +++ b/backend/app/models/fine_tuning.py @@ -61,6 +61,7 @@ class Fine_Tuning(FineTuningJobBase, table=True): ) base_model: str = Field( nullable=False, + description="Base model for fine-tuning", sa_column_kwargs={"comment": "Base model used for fine-tuning"}, ) split_ratio: float = Field( @@ -78,26 +79,32 @@ class Fine_Tuning(FineTuningJobBase, table=True): ) provider_job_id: str | None = Field( default=None, + description="Fine tuning Job ID returned by OpenAI", sa_column_kwargs={"comment": "Fine-tuning job ID returned by the provider"}, ) status: FineTuningStatus = Field( default=FineTuningStatus.pending, + description="Fine tuning status", sa_column_kwargs={"comment": "Current status of the fine-tuning job"}, ) fine_tuned_model: str | None = Field( default=None, + description="Final fine tuned model name from OpenAI", sa_column_kwargs={"comment": "Name of the resulting fine-tuned model"}, ) train_data_s3_object: str | None = Field( default=None, + description="S3 URI of the training data stored in S3", sa_column_kwargs={"comment": "S3 URI of the training data"}, ) test_data_s3_object: str | None = Field( default=None, + description="S3 URI of the testing data stored in S3", sa_column_kwargs={"comment": "S3 URI of the testing data"}, ) error_message: str | None = Field( default=None, + description="Error message for when something failed", sa_column_kwargs={"comment": "Error message if the job failed"}, ) is_deleted: bool = Field( diff --git a/backend/app/models/model_evaluation.py b/backend/app/models/model_evaluation.py index 46cea4191..5354d6c35 100644 --- a/backend/app/models/model_evaluation.py +++ b/backend/app/models/model_evaluation.py @@ -45,44 +45,54 @@ class ModelEvaluation(ModelEvaluationBase, table=True): sa_column_kwargs={"comment": "Unique identifier for the evaluation"}, ) fine_tuned_model: str = Field( + description="Fine-tuned model name from OpenAI", sa_column_kwargs={"comment": "Name of the fine-tuned model being evaluated"}, ) test_data_s3_object: str = Field( + description="S3 URI of the testing data stored in S3", sa_column_kwargs={"comment": "S3 URI of the testing data"}, ) base_model: str = Field( nullable=False, + description="Base model used for fine-tuning", sa_column_kwargs={"comment": "Base model used for fine-tuning"}, ) split_ratio: float = Field( nullable=False, + description="The ratio the dataset was divided in", sa_column_kwargs={"comment": "Train/test split ratio used"}, ) system_prompt: str = Field( + description="System prompt used during evaluation", sa_column=Column( Text, nullable=False, comment="System prompt used during evaluation" - ) + ), ) score: dict[str, float] | None = Field( + description="Evaluation scores per metric (e.g., {'mcc': 0.85})", sa_column=Column( JSON, nullable=True, comment="Evaluation scores per metric (e.g., MCC)" ), ) prediction_data_s3_object: str | None = Field( default=None, + description="S3 URL where the prediction data generated by the fine-tuned model is stored", sa_column_kwargs={"comment": "S3 URL where the prediction data is stored"}, ) status: ModelEvaluationStatus = Field( default=ModelEvaluationStatus.pending, + description="Current status of the evaluation", sa_column_kwargs={"comment": "Current status of the evaluation"}, ) error_message: str | None = Field( default=None, + description="Error message if evaluation failed", sa_column_kwargs={"comment": "Error message if evaluation failed"}, ) is_deleted: bool = Field( default=False, nullable=False, + description="Soft delete flag", sa_column_kwargs={"comment": "Soft delete flag"}, ) diff --git a/backend/app/models/user.py b/backend/app/models/user.py index 413a1ab96..b3d309741 100644 --- a/backend/app/models/user.py +++ b/backend/app/models/user.py @@ -10,7 +10,7 @@ class UserBase(SQLModel): unique=True, index=True, max_length=255, - sa_column_kwargs={"comment": "User's email address (unique identifier)"}, + sa_column_kwargs={"comment": "User's email address"}, ) is_active: bool = Field( default=True, From 7214199d7ad534b6643ec6090fe456a91f9a9773 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Fri, 12 Dec 2025 09:57:16 +0530 Subject: [PATCH 10/13] cleanups --- backend/app/models/document_collection.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/backend/app/models/document_collection.py b/backend/app/models/document_collection.py index 4ec1de7f3..93db6df31 100644 --- a/backend/app/models/document_collection.py +++ b/backend/app/models/document_collection.py @@ -17,17 +17,11 @@ class DocumentCollection(SQLModel, table=True): foreign_key="document.id", nullable=False, ondelete="CASCADE", - sa_column_kwargs={ - "comment": "Reference to the document", - "ondelete": "CASCADE", - }, + sa_column_kwargs={"comment": "Reference to the document"}, ) collection_id: UUID = Field( foreign_key="collection.id", nullable=False, ondelete="CASCADE", - sa_column_kwargs={ - "comment": "Reference to the collection", - "ondelete": "CASCADE", - }, + sa_column_kwargs={"comment": "Reference to the collection"}, ) From 89792061a0203b2f63cb55173dc44adb652e6d9d Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Fri, 12 Dec 2025 16:17:43 +0530 Subject: [PATCH 11/13] generated new migration with updated format --- ...d_db_comments.py => 040_add_db_comments.py} | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) rename backend/app/alembic/versions/{707b8035b64c_add_db_comments.py => 040_add_db_comments.py} (99%) diff --git a/backend/app/alembic/versions/707b8035b64c_add_db_comments.py b/backend/app/alembic/versions/040_add_db_comments.py similarity index 99% rename from backend/app/alembic/versions/707b8035b64c_add_db_comments.py rename to backend/app/alembic/versions/040_add_db_comments.py index 265c1d55e..4b96d017e 100644 --- a/backend/app/alembic/versions/707b8035b64c_add_db_comments.py +++ b/backend/app/alembic/versions/040_add_db_comments.py @@ -1,8 +1,8 @@ """add_db_comments -Revision ID: 707b8035b64c -Revises: eed36ae3c79a -Create Date: 2025-12-11 13:55:04.965381 +Revision ID: 040 +Revises: 039 +Create Date: 2025-12-12 16:17:16.115000 """ from alembic import op @@ -11,8 +11,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "707b8035b64c" -down_revision = "eed36ae3c79a" +revision = "040" +down_revision = "039" branch_labels = None depends_on = None @@ -96,6 +96,7 @@ def upgrade(): comment="Unique identifier for the batch job", existing_nullable=False, autoincrement=True, + existing_server_default=sa.text("nextval('batch_job_id_seq'::regclass)"), ) op.alter_column( "batch_job", @@ -1381,7 +1382,7 @@ def upgrade(): "user", "email", existing_type=sa.VARCHAR(length=255), - comment="User's email address (unique identifier)", + comment="User's email address", existing_nullable=False, ) op.alter_column( @@ -1412,7 +1413,6 @@ def upgrade(): comment="Unique identifier for the user", existing_nullable=False, autoincrement=True, - existing_server_default=sa.text("nextval('user_id_seq'::regclass)"), ) op.alter_column( "user", @@ -1442,7 +1442,6 @@ def downgrade(): existing_comment="Unique identifier for the user", existing_nullable=False, autoincrement=True, - existing_server_default=sa.text("nextval('user_id_seq'::regclass)"), ) op.alter_column( "user", @@ -1473,7 +1472,7 @@ def downgrade(): "email", existing_type=sa.VARCHAR(length=255), comment=None, - existing_comment="User's email address (unique identifier)", + existing_comment="User's email address", existing_nullable=False, ) op.alter_column( @@ -2942,6 +2941,7 @@ def downgrade(): existing_comment="Unique identifier for the batch job", existing_nullable=False, autoincrement=True, + existing_server_default=sa.text("nextval('batch_job_id_seq'::regclass)"), ) op.alter_column( "apikey", From 1f281d0ad02f08f0aa9cc33507552cd025602cb3 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Fri, 12 Dec 2025 16:30:08 +0530 Subject: [PATCH 12/13] added comment for config as well --- .../alembic/versions/040_add_db_comments.py | 229 +++++++++++++++++- backend/app/models/config/config.py | 41 +++- backend/app/models/config/version.py | 39 ++- 3 files changed, 295 insertions(+), 14 deletions(-) diff --git a/backend/app/alembic/versions/040_add_db_comments.py b/backend/app/alembic/versions/040_add_db_comments.py index 4b96d017e..12f5d717f 100644 --- a/backend/app/alembic/versions/040_add_db_comments.py +++ b/backend/app/alembic/versions/040_add_db_comments.py @@ -2,7 +2,7 @@ Revision ID: 040 Revises: 039 -Create Date: 2025-12-12 16:17:16.115000 +Create Date: 2025-12-12 16:29:47.694694 """ from alembic import op @@ -326,6 +326,111 @@ def upgrade(): comment="Timestamp when the job was last updated", existing_nullable=False, ) + op.alter_column( + "config", + "name", + existing_type=sa.VARCHAR(length=128), + comment="Configuration name", + existing_nullable=False, + ) + op.alter_column( + "config", + "description", + existing_type=sa.VARCHAR(length=512), + comment="Description of the configuration", + existing_nullable=True, + ) + op.alter_column( + "config", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the configuration", + existing_nullable=False, + ) + op.alter_column( + "config", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "config", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the configuration was created", + existing_nullable=False, + ) + op.alter_column( + "config", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the configuration was last updated", + existing_nullable=False, + ) + op.alter_column( + "config", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the configuration was deleted", + existing_nullable=True, + ) + op.alter_column( + "config_version", + "config_blob", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment="Provider-specific configuration parameters (temperature, max_tokens, etc.)", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "commit_message", + existing_type=sa.VARCHAR(length=512), + comment="Optional message describing the changes in this version", + existing_nullable=True, + ) + op.alter_column( + "config_version", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the configuration version", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "config_id", + existing_type=sa.UUID(), + comment="Reference to the parent configuration", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "version", + existing_type=sa.INTEGER(), + comment="Version number starting at 1", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the version was created", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the version was last updated", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the version was soft-deleted", + existing_nullable=True, + ) op.alter_column( "credential", "is_active", @@ -1413,6 +1518,7 @@ def upgrade(): comment="Unique identifier for the user", existing_nullable=False, autoincrement=True, + existing_server_default=sa.text("nextval('user_id_seq'::regclass)"), ) op.alter_column( "user", @@ -1442,6 +1548,7 @@ def downgrade(): existing_comment="Unique identifier for the user", existing_nullable=False, autoincrement=True, + existing_server_default=sa.text("nextval('user_id_seq'::regclass)"), ) op.alter_column( "user", @@ -2673,6 +2780,126 @@ def downgrade(): existing_comment="Flag indicating if this credential is currently active and usable", existing_nullable=False, ) + op.alter_column( + "config_version", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the version was soft-deleted", + existing_nullable=True, + ) + op.alter_column( + "config_version", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the version was last updated", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the version was created", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "version", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Version number starting at 1", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "config_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the parent configuration", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the configuration version", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "commit_message", + existing_type=sa.VARCHAR(length=512), + comment=None, + existing_comment="Optional message describing the changes in this version", + existing_nullable=True, + ) + op.alter_column( + "config_version", + "config_blob", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment="Provider-specific configuration parameters (temperature, max_tokens, etc.)", + existing_nullable=False, + ) + op.alter_column( + "config", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the configuration was deleted", + existing_nullable=True, + ) + op.alter_column( + "config", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the configuration was last updated", + existing_nullable=False, + ) + op.alter_column( + "config", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the configuration was created", + existing_nullable=False, + ) + op.alter_column( + "config", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "config", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the configuration", + existing_nullable=False, + ) + op.alter_column( + "config", + "description", + existing_type=sa.VARCHAR(length=512), + comment=None, + existing_comment="Description of the configuration", + existing_nullable=True, + ) + op.alter_column( + "config", + "name", + existing_type=sa.VARCHAR(length=128), + comment=None, + existing_comment="Configuration name", + existing_nullable=False, + ) op.alter_column( "collection_jobs", "updated_at", diff --git a/backend/app/models/config/config.py b/backend/app/models/config/config.py index 18bbbcdfa..9155254a2 100644 --- a/backend/app/models/config/config.py +++ b/backend/app/models/config/config.py @@ -13,9 +13,17 @@ class ConfigBase(SQLModel): """Base model for LLM configuration metadata""" - name: str = Field(min_length=1, max_length=128, description="Config name") + name: str = Field( + min_length=1, + max_length=128, + description="Config name", + sa_column_kwargs={"comment": "Configuration name"}, + ) description: str | None = Field( - default=None, max_length=512, description="Optional description" + default=None, + max_length=512, + description="Description of the configuration", + sa_column_kwargs={"comment": "Description of the configuration"}, ) @@ -39,18 +47,37 @@ class Config(ConfigBase, table=True): ), ) - id: UUID = Field(default_factory=uuid4, primary_key=True) + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the configuration"}, + ) project_id: int = Field( foreign_key="project.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the configuration was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the configuration was last updated" + }, + ) - deleted_at: datetime | None = Field(default=None, nullable=True) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the configuration was deleted"}, + ) class ConfigCreate(ConfigBase): @@ -61,7 +88,7 @@ class ConfigCreate(ConfigBase): commit_message: str | None = Field( default=None, max_length=512, - description="Optional message describing the changes in this version", + description="Message describing the changes in this version", ) @field_validator("config_blob") diff --git a/backend/app/models/config/version.py b/backend/app/models/config/version.py index bb44531d8..5a374582e 100644 --- a/backend/app/models/config/version.py +++ b/backend/app/models/config/version.py @@ -13,13 +13,20 @@ class ConfigVersionBase(SQLModel): config_blob: dict[str, Any] = Field( - sa_column=sa.Column(JSONB, nullable=False), + sa_column=sa.Column( + JSONB, + nullable=False, + comment="Provider-specific configuration parameters (temperature, max_tokens, etc.)", + ), description="Provider-specific configuration parameters (temperature, max_tokens, etc.)", ) commit_message: str | None = Field( default=None, max_length=512, description="Optional message describing the changes in this version", + sa_column_kwargs={ + "comment": "Optional message describing the changes in this version" + }, ) @field_validator("config_blob") @@ -43,21 +50,41 @@ class ConfigVersion(ConfigVersionBase, table=True): ), ) - id: UUID = Field(default_factory=uuid4, primary_key=True) + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the configuration version"}, + ) config_id: UUID = Field( foreign_key="config.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the parent configuration"}, ) version: int = Field( - nullable=False, description="Version number starting at 1", ge=1 + nullable=False, + description="Version number starting at 1", + ge=1, + sa_column_kwargs={"comment": "Version number starting at 1"}, ) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the version was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the version was last updated"}, + ) - deleted_at: datetime | None = Field(default=None, nullable=True) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the version was soft-deleted"}, + ) class ConfigVersionCreate(ConfigVersionBase): From c0217a75353cf7fdf37e2da41d09cca0a670dce6 Mon Sep 17 00:00:00 2001 From: AkhileshNegi Date: Mon, 15 Dec 2025 13:17:14 +0530 Subject: [PATCH 13/13] fixed minor comments --- .../app/alembic/versions/040_add_db_comments.py | 16 +++++++++------- backend/app/models/collection_job.py | 3 ++- backend/app/models/config/config.py | 2 +- backend/app/models/fine_tuning.py | 8 ++++---- backend/app/models/model_evaluation.py | 2 +- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/backend/app/alembic/versions/040_add_db_comments.py b/backend/app/alembic/versions/040_add_db_comments.py index 12f5d717f..18a4f28e8 100644 --- a/backend/app/alembic/versions/040_add_db_comments.py +++ b/backend/app/alembic/versions/040_add_db_comments.py @@ -2,7 +2,7 @@ Revision ID: 040 Revises: 039 -Create Date: 2025-12-12 16:29:47.694694 +Create Date: 2025-12-15 13:17:01.138399 """ from alembic import op @@ -847,6 +847,7 @@ def upgrade(): comment="Unique identifier for the fine-tuning job", existing_nullable=False, autoincrement=True, + existing_server_default=sa.text("nextval('fine_tuning_id_seq'::regclass)"), ) op.alter_column( "fine_tuning", @@ -876,14 +877,14 @@ def upgrade(): "fine_tuning", "train_data_s3_object", existing_type=sa.VARCHAR(), - comment="S3 URI of the training data", + comment="S3 URL of the training data", existing_nullable=True, ) op.alter_column( "fine_tuning", "test_data_s3_object", existing_type=sa.VARCHAR(), - comment="S3 URI of the testing data", + comment="S3 URL of the testing data", existing_nullable=True, ) op.alter_column( @@ -1004,7 +1005,7 @@ def upgrade(): "model_evaluation", "id", existing_type=sa.INTEGER(), - comment="Unique identifier for the evaluation", + comment="Unique identifier for the model evaluation", existing_nullable=False, autoincrement=True, ) @@ -2128,7 +2129,7 @@ def downgrade(): "id", existing_type=sa.INTEGER(), comment=None, - existing_comment="Unique identifier for the evaluation", + existing_comment="Unique identifier for the model evaluation", existing_nullable=False, autoincrement=True, ) @@ -2267,7 +2268,7 @@ def downgrade(): "test_data_s3_object", existing_type=sa.VARCHAR(), comment=None, - existing_comment="S3 URI of the testing data", + existing_comment="S3 URL of the testing data", existing_nullable=True, ) op.alter_column( @@ -2275,7 +2276,7 @@ def downgrade(): "train_data_s3_object", existing_type=sa.VARCHAR(), comment=None, - existing_comment="S3 URI of the training data", + existing_comment="S3 URL of the training data", existing_nullable=True, ) op.alter_column( @@ -2313,6 +2314,7 @@ def downgrade(): existing_comment="Unique identifier for the fine-tuning job", existing_nullable=False, autoincrement=True, + existing_server_default=sa.text("nextval('fine_tuning_id_seq'::regclass)"), ) op.alter_column( "fine_tuning", diff --git a/backend/app/models/collection_job.py b/backend/app/models/collection_job.py index 60be4eec7..7c55e8562 100644 --- a/backend/app/models/collection_job.py +++ b/backend/app/models/collection_job.py @@ -43,7 +43,7 @@ class CollectionJob(SQLModel, table=True): description="Type of operation", sa_column_kwargs={"comment": "Type of operation (CREATE, DELETE)"}, ) - task_id: str = Field( + task_id: str | None = Field( default=None, nullable=True, sa_column_kwargs={"comment": "Celery task ID for async processing"}, @@ -54,6 +54,7 @@ class CollectionJob(SQLModel, table=True): sa_column_kwargs={"comment": "Tracing ID for correlating logs and traces"}, ) error_message: str | None = Field( + default=None, sa_column=Column( Text, nullable=True, comment="Error message if the job failed" ), diff --git a/backend/app/models/config/config.py b/backend/app/models/config/config.py index 9155254a2..df3577e45 100644 --- a/backend/app/models/config/config.py +++ b/backend/app/models/config/config.py @@ -88,7 +88,7 @@ class ConfigCreate(ConfigBase): commit_message: str | None = Field( default=None, max_length=512, - description="Message describing the changes in this version", + description="Optional message describing the changes in this version", ) @field_validator("config_blob") diff --git a/backend/app/models/fine_tuning.py b/backend/app/models/fine_tuning.py index 6b174da9b..d16576fdb 100644 --- a/backend/app/models/fine_tuning.py +++ b/backend/app/models/fine_tuning.py @@ -94,13 +94,13 @@ class Fine_Tuning(FineTuningJobBase, table=True): ) train_data_s3_object: str | None = Field( default=None, - description="S3 URI of the training data stored in S3", - sa_column_kwargs={"comment": "S3 URI of the training data"}, + description="S3 URL of the training data stored in S3", + sa_column_kwargs={"comment": "S3 URL of the training data"}, ) test_data_s3_object: str | None = Field( default=None, - description="S3 URI of the testing data stored in S3", - sa_column_kwargs={"comment": "S3 URI of the testing data"}, + description="S3 URL of the testing data stored in S3", + sa_column_kwargs={"comment": "S3 URL of the testing data"}, ) error_message: str | None = Field( default=None, diff --git a/backend/app/models/model_evaluation.py b/backend/app/models/model_evaluation.py index 5354d6c35..3dbadb1b3 100644 --- a/backend/app/models/model_evaluation.py +++ b/backend/app/models/model_evaluation.py @@ -42,7 +42,7 @@ class ModelEvaluation(ModelEvaluationBase, table=True): id: int = Field( primary_key=True, - sa_column_kwargs={"comment": "Unique identifier for the evaluation"}, + sa_column_kwargs={"comment": "Unique identifier for the model evaluation"}, ) fine_tuned_model: str = Field( description="Fine-tuned model name from OpenAI",