ProjectTech4DevAI
diff --git a/‎.env.example‎
Lines changed: 6 additions & 0 deletions b/‎.env.example‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py‎
Lines changed: 249 additions & 0 deletions b/‎backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py‎
Lines changed: 249 additions & 0 deletions
diff --git a/‎backend/app/api/deps.py‎
Lines changed: 1 addition & 1 deletion b/‎backend/app/api/deps.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/app/api/docs/evaluation/create_evaluation.md‎
Lines changed: 80 additions & 0 deletions b/‎backend/app/api/docs/evaluation/create_evaluation.md‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎backend/app/api/docs/evaluation/delete_dataset.md‎
Lines changed: 18 additions & 0 deletions b/‎backend/app/api/docs/evaluation/delete_dataset.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎backend/app/api/docs/evaluation/get_dataset.md‎
Lines changed: 22 additions & 0 deletions b/‎backend/app/api/docs/evaluation/get_dataset.md‎
Lines changed: 22 additions & 0 deletions
@@ -23,6 +23,12 @@ FIRST_SUPERUSER=superuser@example.com
 FIRST_SUPERUSER_PASSWORD=changethis
 EMAIL_TEST_USER="test@example.com"
 
+# API Base URL for cron scripts (defaults to http://localhost:8000 if not set)
+API_BASE_URL=http://localhost:8000
+
+# Cron interval in minutes (defaults to 5 minutes if not set)
+CRON_INTERVAL_MINUTES=5
+
 # Postgres
 POSTGRES_SERVER=localhost
 POSTGRES_PORT=5432
 
@@ -0,0 +1,249 @@
+"""create_evaluation_run_table, batch_job_table, and evaluation_dataset_table
+
+Revision ID: 6fe772038a5a
+Revises: 219033c644de
+Create Date: 2025-11-05 22:47:18.266070
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+import sqlmodel.sql.sqltypes
+
+
+# revision identifiers, used by Alembic.
+revision = "6fe772038a5a"
+down_revision = "219033c644de"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # Create batch_job table first (as evaluation_run will reference it)
+    op.create_table(
+        "batch_job",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "provider",
+            sa.String(),
+            nullable=False,
+            comment="LLM provider name (e.g., 'openai', 'anthropic')",
+        ),
+        sa.Column(
+            "job_type",
+            sa.String(),
+            nullable=False,
+            comment="Type of batch job (e.g., 'evaluation', 'classification', 'embedding')",
+        ),
+        sa.Column(
+            "config",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+            server_default=sa.text("'{}'::jsonb"),
+            comment="Complete batch configuration",
+        ),
+        sa.Column(
+            "provider_batch_id",
+            sa.String(),
+            nullable=True,
+            comment="Provider's batch job ID",
+        ),
+        sa.Column(
+            "provider_file_id",
+            sa.String(),
+            nullable=True,
+            comment="Provider's input file ID",
+        ),
+        sa.Column(
+            "provider_output_file_id",
+            sa.String(),
+            nullable=True,
+            comment="Provider's output file ID",
+        ),
+        sa.Column(
+            "provider_status",
+            sa.String(),
+            nullable=True,
+            comment="Provider-specific status (e.g., OpenAI: validating, in_progress, completed, failed)",
+        ),
+        sa.Column(
+            "raw_output_url",
+            sa.String(),
+            nullable=True,
+            comment="S3 URL of raw batch output file",
+        ),
+        sa.Column(
+            "total_items",
+            sa.Integer(),
+            nullable=False,
+            server_default=sa.text("0"),
+            comment="Total number of items in the batch",
+        ),
+        sa.Column(
+            "error_message",
+            sa.Text(),
+            nullable=True,
+            comment="Error message if batch failed",
+        ),
+        sa.Column("organization_id", sa.Integer(), nullable=False),
+        sa.Column("project_id", sa.Integer(), nullable=False),
+        sa.Column("inserted_at", sa.DateTime(), nullable=False),
+        sa.Column("updated_at", sa.DateTime(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["organization_id"], ["organization.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_batch_job_job_type"), "batch_job", ["job_type"], unique=False
+    )
+    op.create_index(
+        op.f("ix_batch_job_organization_id"),
+        "batch_job",
+        ["organization_id"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_batch_job_project_id"), "batch_job", ["project_id"], unique=False
+    )
+    op.create_index(
+        "idx_batch_job_status_org",
+        "batch_job",
+        ["provider_status", "organization_id"],
+        unique=False,
+    )
+    op.create_index(
+        "idx_batch_job_status_project",
+        "batch_job",
+        ["provider_status", "project_id"],
+        unique=False,
+    )
+
+    # Create evaluation_dataset table
+    op.create_table(
+        "evaluation_dataset",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False),
+        sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
+        sa.Column(
+            "dataset_metadata",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+            server_default=sa.text("'{}'::jsonb"),
+        ),
+        sa.Column(
+            "object_store_url", sqlmodel.sql.sqltypes.AutoString(), nullable=True
+        ),
+        sa.Column(
+            "langfuse_dataset_id",
+            sqlmodel.sql.sqltypes.AutoString(),
+            nullable=True,
+        ),
+        sa.Column("organization_id", sa.Integer(), nullable=False),
+        sa.Column("project_id", sa.Integer(), nullable=False),
+        sa.Column("inserted_at", sa.DateTime(), nullable=False),
+        sa.Column("updated_at", sa.DateTime(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["organization_id"], ["organization.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint(
+            "name",
+            "organization_id",
+            "project_id",
+            name="uq_evaluation_dataset_name_org_project",
+        ),
+    )
+    op.create_index(
+        op.f("ix_evaluation_dataset_name"),
+        "evaluation_dataset",
+        ["name"],
+        unique=False,
+    )
+
+    # Create evaluation_run table with all columns and foreign key references
+    op.create_table(
+        "evaluation_run",
+        sa.Column("run_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False),
+        sa.Column("dataset_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False),
+        sa.Column("config", sa.JSON(), nullable=False),
+        sa.Column("batch_job_id", sa.Integer(), nullable=True),
+        sa.Column(
+            "embedding_batch_job_id",
+            sa.Integer(),
+            nullable=True,
+            comment="Reference to the batch_job for embedding-based similarity scoring",
+        ),
+        sa.Column("dataset_id", sa.Integer(), nullable=False),
+        sa.Column("status", sqlmodel.sql.sqltypes.AutoString(), nullable=False),
+        sa.Column(
+            "object_store_url", sqlmodel.sql.sqltypes.AutoString(), nullable=True
+        ),
+        sa.Column("total_items", sa.Integer(), nullable=False),
+        sa.Column("score", sa.JSON(), nullable=True),
+        sa.Column("error_message", sa.Text(), nullable=True),
+        sa.Column("organization_id", sa.Integer(), nullable=False),
+        sa.Column("project_id", sa.Integer(), nullable=False),
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("inserted_at", sa.DateTime(), nullable=False),
+        sa.Column("updated_at", sa.DateTime(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["batch_job_id"],
+            ["batch_job.id"],
+            ondelete="SET NULL",
+        ),
+        sa.ForeignKeyConstraint(
+            ["embedding_batch_job_id"],
+            ["batch_job.id"],
+            name="fk_evaluation_run_embedding_batch_job_id",
+            ondelete="SET NULL",
+        ),
+        sa.ForeignKeyConstraint(
+            ["dataset_id"],
+            ["evaluation_dataset.id"],
+            name="fk_evaluation_run_dataset_id",
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["organization_id"], ["organization.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_evaluation_run_run_name"), "evaluation_run", ["run_name"], unique=False
+    )
+    op.create_index(
+        "idx_eval_run_status_org",
+        "evaluation_run",
+        ["status", "organization_id"],
+        unique=False,
+    )
+    op.create_index(
+        "idx_eval_run_status_project",
+        "evaluation_run",
+        ["status", "project_id"],
+        unique=False,
+    )
+
+
+def downgrade():
+    # Drop evaluation_run table first (has foreign keys to batch_job and evaluation_dataset)
+    op.drop_index("idx_eval_run_status_project", table_name="evaluation_run")
+    op.drop_index("idx_eval_run_status_org", table_name="evaluation_run")
+    op.drop_index(op.f("ix_evaluation_run_run_name"), table_name="evaluation_run")
+    op.drop_table("evaluation_run")
+
+    # Drop evaluation_dataset table
+    op.drop_index(op.f("ix_evaluation_dataset_name"), table_name="evaluation_dataset")
+    op.drop_table("evaluation_dataset")
+
+    # Drop batch_job table
+    op.drop_index("idx_batch_job_status_project", table_name="batch_job")
+    op.drop_index("idx_batch_job_status_org", table_name="batch_job")
+    op.drop_index(op.f("ix_batch_job_project_id"), table_name="batch_job")
+    op.drop_index(op.f("ix_batch_job_organization_id"), table_name="batch_job")
+    op.drop_index(op.f("ix_batch_job_job_type"), table_name="batch_job")
+    op.drop_table("batch_job")
@@ -70,7 +70,7 @@ def get_current_user(
         if not user:
             raise HTTPException(status_code=404, detail="User not found")
         if not user.is_active:
-            raise HTTPException(status_code=400, detail="Inactive user")
+            raise HTTPException(status_code=403, detail="Inactive user")
 
         return user  # Return only User object
 
 
@@ -0,0 +1,80 @@
+Start an evaluation using OpenAI Batch API.
+
+This endpoint:
+1. Fetches the dataset from database and validates it has Langfuse dataset ID
+2. Creates an EvaluationRun record in the database
+3. Fetches dataset items from Langfuse
+4. Builds JSONL for batch processing (config is used as-is)
+5. Creates a batch job via the generic batch infrastructure
+6. Returns the evaluation run details with batch_job_id
+
+The batch will be processed asynchronously by Celery Beat (every 60s).
+Use GET /evaluations/{evaluation_id} to check progress.
+
+## Request Body
+
+- **dataset_id** (required): ID of the evaluation dataset (from /evaluations/datasets)
+- **experiment_name** (required): Name for this evaluation experiment/run
+- **config** (optional): Configuration dict that will be used as-is in JSONL generation. Can include any OpenAI Responses API parameters like:
+  - model: str (e.g., "gpt-4o", "gpt-5")
+  - instructions: str
+  - tools: list (e.g., [{"type": "file_search", "vector_store_ids": [...]}])
+  - reasoning: dict (e.g., {"effort": "low"})
+  - text: dict (e.g., {"verbosity": "low"})
+  - temperature: float
+  - include: list (e.g., ["file_search_call.results"])
+  - Note: "input" will be added automatically from the dataset
+- **assistant_id** (optional): Assistant ID to fetch configuration from. If provided, configuration will be fetched from the assistant in the database. Config can be passed as empty dict {} when using assistant_id.
+
+## Example with config
+
+```json
+{
+    "dataset_id": 123,
+    "experiment_name": "test_run",
+    "config": {
+        "model": "gpt-4.1",
+        "instructions": "You are a helpful FAQ assistant.",
+        "tools": [
+            {
+                "type": "file_search",
+                "vector_store_ids": ["vs_12345"],
+                "max_num_results": 3
+            }
+        ],
+        "include": ["file_search_call.results"]
+    }
+}
+```
+
+## Example with assistant_id
+
+```json
+{
+    "dataset_id": 123,
+    "experiment_name": "test_run",
+    "config": {},
+    "assistant_id": "asst_xyz"
+}
+```
+
+## Returns
+
+EvaluationRunPublic with batch details and status:
+- id: Evaluation run ID
+- run_name: Name of the evaluation run
+- dataset_name: Name of the dataset used
+- dataset_id: ID of the dataset used
+- config: Configuration used for the evaluation
+- batch_job_id: ID of the batch job processing this evaluation
+- status: Current status (pending, running, completed, failed)
+- total_items: Total number of items being evaluated
+- completed_items: Number of items completed so far
+- results: Evaluation results (when completed)
+- error_message: Error message if failed
+
+## Error Responses
+
+- **404**: Dataset or assistant not found or not accessible
+- **400**: Missing required credentials (OpenAI or Langfuse), dataset missing Langfuse ID, or config missing required fields
+- **500**: Failed to configure API clients or start batch evaluation
@@ -0,0 +1,18 @@
+Delete a dataset by ID.
+
+This will remove the dataset record from the database. The CSV file in object store (if exists) will remain for audit purposes, but the dataset will no longer be accessible for creating new evaluations.
+
+## Path Parameters
+
+- **dataset_id**: ID of the dataset to delete
+
+## Returns
+
+Success message with deleted dataset details:
+- message: Confirmation message
+- dataset_id: ID of the deleted dataset
+
+## Error Responses
+
+- **404**: Dataset not found or not accessible to your organization/project
+- **400**: Dataset cannot be deleted (e.g., has active evaluation runs)
@@ -0,0 +1,22 @@
+Get details of a specific dataset by ID.
+
+Retrieves comprehensive information about a dataset including metadata, object store URL, and Langfuse integration details.
+
+## Path Parameters
+
+- **dataset_id**: ID of the dataset to retrieve
+
+## Returns
+
+DatasetUploadResponse with dataset details:
+- dataset_id: Unique identifier for the dataset
+- dataset_name: Name of the dataset (sanitized)
+- total_items: Total number of items including duplication
+- original_items: Number of original items before duplication
+- duplication_factor: Factor by which items were duplicated
+- langfuse_dataset_id: ID of the dataset in Langfuse
+- object_store_url: URL to the CSV file in object storage
+
+## Error Responses
+
+- **404**: Dataset not found or not accessible to your organization/project