ProjectTech4DevAI · vprashrex · May 6, 2026 · Mar 31, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/backend/app/alembic/versions/055_add_assessment_manager_table.py b/backend/app/alembic/versions/055_add_assessment_manager_table.py
@@ -0,0 +1,227 @@
+"""add assessment and assessment_run tables
+
+Revision ID: 055
+Revises: 054
+Create Date: 2026-03-26 23:30:00.000000
+
+"""
+
+import sqlalchemy as sa
+import sqlmodel.sql.sqltypes
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "055"
+down_revision = "054"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.create_table(
+        "assessment",
+        sa.Column(
+            "id",
+            sa.Integer(),
+            nullable=False,
+            comment="Unique identifier for the assessment",
+        ),
+        sa.Column(
+            "experiment_name",
+            sqlmodel.sql.sqltypes.AutoString(),
+            nullable=False,
+            comment="Name of the experiment grouping its config runs",
+        ),
+        sa.Column(
+            "dataset_id",
+            sa.Integer(),
+            nullable=False,
+            comment="Reference to the evaluation dataset",
+        ),
+        sa.Column(
+            "status",
+            sqlmodel.sql.sqltypes.AutoString(),
+            nullable=False,
+            server_default="pending",
+            comment=(
+                "Aggregate status: pending, processing, completed, "
+                "completed_with_errors, failed"
+            ),
+        ),
+        sa.Column(
+            "organization_id",
+            sa.Integer(),
+            nullable=False,
+            comment="Reference to the organization",
+        ),
+        sa.Column(
+            "project_id",
+            sa.Integer(),
+            nullable=False,
+            comment="Reference to the project",
+        ),
+        sa.Column(
+            "inserted_at",
+            sa.DateTime(),
+            nullable=False,
+            comment="Timestamp when the assessment was created",
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(),
+            nullable=False,
+            comment="Timestamp when the assessment was last updated",
+        ),
+        sa.ForeignKeyConstraint(
+            ["dataset_id"],
+            ["evaluation_dataset.id"],
+            name="fk_assessment_dataset_id",
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["organization_id"],
+            ["organization.id"],
+            name="fk_assessment_organization_id",
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["project_id"],
+            ["project.id"],
+            name="fk_assessment_project_id",
+            ondelete="CASCADE",
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_assessment_experiment_name"),
+        "assessment",
+        ["experiment_name"],
+        unique=False,
+    )
+    op.create_index(
+        "idx_assessment_org_project",
+        "assessment",
+        ["organization_id", "project_id", "inserted_at"],
+        unique=False,
+    )
+    op.create_index(
+        "idx_assessment_status",
+        "assessment",
+        ["status"],
+        unique=False,
+    )
+
+    op.create_table(
+        "assessment_run",
+        sa.Column(
+            "id",
+            sa.Integer(),
+            nullable=False,
+            comment="Unique identifier for the assessment run",
+        ),
+        sa.Column(
+            "assessment_id",
+            sa.Integer(),
+            nullable=False,
+            comment="Reference to the parent assessment",
+        ),
+        sa.Column(
+            "config_id",
+            sa.Uuid(),
+            nullable=False,
+            comment="Reference to the stored config used",
+        ),
+        sa.Column(
+            "config_version",
+            sa.Integer(),
+            nullable=False,
+            comment="Version of the config used",
+        ),
+        sa.Column(
+            "status",
+            sqlmodel.sql.sqltypes.AutoString(),
+            nullable=False,
+            server_default="pending",
+            comment="Run status: pending, processing, completed, failed",
+        ),
+        sa.Column(
+            "batch_job_id",
+            sa.Integer(),
+            nullable=True,
+            comment="Reference to the batch job processing this run",
+        ),
+        sa.Column(
+            "total_items",
+            sa.Integer(),
+            nullable=False,
+            server_default="0",
+            comment="Total number of dataset items in this run",
+        ),
+        sa.Column(
+            "input",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+            comment=(
+                "Assessment input: prompt_template, text_columns, attachments, "
+                "output_schema"
+            ),
+        ),
+        sa.Column(
+            "object_store_url",
+            sqlmodel.sql.sqltypes.AutoString(),
+            nullable=True,
+            comment="S3 URL of processed batch results",
+        ),
+        sa.Column(
+            "error_message",
+            sa.Text(),
+            nullable=True,
+            comment="Error message if the run failed",
+        ),
+        sa.Column(
+            "inserted_at",
+            sa.DateTime(),
+            nullable=False,
+            comment="Timestamp when the run was created",
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(),
+            nullable=False,
+            comment="Timestamp when the run was last updated",
+        ),
+        sa.ForeignKeyConstraint(
+            ["assessment_id"],
+            ["assessment.id"],
+            name="fk_assessment_run_assessment_id",
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["config_id"],
+            ["config.id"],
+            name="fk_assessment_run_config_id",
+        ),
+        sa.ForeignKeyConstraint(
+            ["batch_job_id"],
+            ["batch_job.id"],
+            name="fk_assessment_run_batch_job_id",
+            ondelete="SET NULL",
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        "idx_assessment_run_assessment_id",
+        "assessment_run",
+        ["assessment_id"],
+        unique=False,
+    )
+
+
+def downgrade():
+    op.drop_index("idx_assessment_run_assessment_id", table_name="assessment_run")
+    op.drop_table("assessment_run")
+    op.drop_index("idx_assessment_status", table_name="assessment")
+    op.drop_index("idx_assessment_org_project", table_name="assessment")
+    op.drop_index(op.f("ix_assessment_experiment_name"), table_name="assessment")
+    op.drop_table("assessment")
diff --git a/backend/app/alembic/versions/056_add_config_tag.py b/backend/app/alembic/versions/056_add_config_tag.py
@@ -0,0 +1,82 @@
+"""add tag column to config table
+
+Revision ID: 056
+Revises: 055
+Create Date: 2026-05-03 12:00:00.000000
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "056"
+down_revision = "055"
+branch_labels = None
+depends_on = None
+
+
+CONFIG_TAG_VALUES = ("default", "ASSESSMENT")
+DEFAULT_TAG_SERVER_DEFAULT = sa.text("'default'::config_tag")
+
+
+def upgrade():
+    config_tag = postgresql.ENUM(
+        *CONFIG_TAG_VALUES,
+        name="config_tag",
+        create_type=False,
+    )
+    config_tag.create(op.get_bind(), checkfirst=True)
+
+    with op.get_context().autocommit_block():
+        op.execute("ALTER TYPE config_tag ADD VALUE IF NOT EXISTS 'default'")
+        op.execute("ALTER TYPE config_tag ADD VALUE IF NOT EXISTS 'ASSESSMENT'")
+
+    op.add_column(
+        "config",
+        sa.Column(
+            "tag",
+            config_tag,
+            nullable=False,
+            server_default=DEFAULT_TAG_SERVER_DEFAULT,
+            comment=(
+                "Tag classifying the config: "
+                "'default' for general use, 'ASSESSMENT' for configs used in assessments. "
+            ),
+        ),
+    )
+
+    op.execute(
+        """
+        UPDATE config
+        SET tag = 'ASSESSMENT'
+        FROM (
+            SELECT DISTINCT config_id
+            FROM assessment_run
+        ) AS assessment_configs
+        WHERE config.id = assessment_configs.config_id
+        """
+    )
+
+    with op.get_context().autocommit_block():
+        op.create_index(
+            "idx_config_project_id_tag_active",
+            "config",
+            ["project_id", "tag", sa.text("updated_at DESC")],
+            unique=False,
+            postgresql_where=sa.text("deleted_at IS NULL"),
+            postgresql_concurrently=True,
+        )
+
+
+def downgrade():
+    with op.get_context().autocommit_block():
+        op.drop_index(
+            "idx_config_project_id_tag_active",
+            table_name="config",
+            postgresql_concurrently=True,
+        )
+
+    op.drop_column("config", "tag")
+    sa.Enum(name="config_tag").drop(op.get_bind(), checkfirst=True)
diff --git a/backend/app/api/docs/assessment/create_run.md b/backend/app/api/docs/assessment/create_run.md
@@ -0,0 +1,7 @@
+Start an assessment across one or more stored config versions.
+
+Creates an assessment and one child assessment run per config, then submits each
+run to batch processing.
+
+Optional `system_instruction` is forwarded into each generated provider request
+as the system/developer instruction for that assessment run.
diff --git a/backend/app/api/docs/assessment/delete_dataset.md b/backend/app/api/docs/assessment/delete_dataset.md
@@ -0,0 +1,4 @@
+Delete an assessment dataset.
+
+This removes dataset metadata and associated storage references for the
+given dataset in the current organization and project.
diff --git a/backend/app/api/docs/assessment/export_assessment_results.md b/backend/app/api/docs/assessment/export_assessment_results.md
@@ -0,0 +1,4 @@
+Export results for all child runs under an assessment.
+
+For `json`, returns a flat list in the API response. For `csv`/`xlsx`,
+returns one file for a single run or a ZIP archive when multiple runs exist.
diff --git a/backend/app/api/docs/assessment/export_run_results.md b/backend/app/api/docs/assessment/export_run_results.md
@@ -0,0 +1,3 @@
+Export results for a single assessment run.
+
+Supports `json`, `csv`, and `xlsx` output formats.
diff --git a/backend/app/api/docs/assessment/get_assessment.md b/backend/app/api/docs/assessment/get_assessment.md
@@ -0,0 +1,3 @@
+Get an assessment by ID.
+
+Returns aggregate run counts and status metadata for the assessment.
diff --git a/backend/app/api/docs/assessment/get_dataset.md b/backend/app/api/docs/assessment/get_dataset.md
@@ -0,0 +1,3 @@
+Get a single assessment dataset by ID.
+
+Optionally include a signed URL to download the original uploaded file.
diff --git a/backend/app/api/docs/assessment/get_run.md b/backend/app/api/docs/assessment/get_run.md
@@ -0,0 +1,3 @@
+Get a single assessment run by ID.
+
+Returns run metadata, status, config reference, and assessment input payload.
diff --git a/backend/app/api/docs/assessment/list_assessments.md b/backend/app/api/docs/assessment/list_assessments.md
@@ -0,0 +1,3 @@
+List assessments runs for the current organization/project.
+
+Each record includes aggregate status counters across its child runs.
diff --git a/backend/app/api/docs/assessment/list_datasets.md b/backend/app/api/docs/assessment/list_datasets.md
@@ -0,0 +1,3 @@
+List assessment datasets for the current organization and project.
+
+Supports pagination via `limit` and `offset`.
diff --git a/backend/app/api/docs/assessment/list_runs.md b/backend/app/api/docs/assessment/list_runs.md
@@ -0,0 +1,4 @@
+List assessment runs for the current organization/project.
+
+Optionally filter by `assessment_id` to list runs for a specific parent
+assessment.
diff --git a/backend/app/api/docs/assessment/retry_assessment.md b/backend/app/api/docs/assessment/retry_assessment.md
@@ -0,0 +1,4 @@
+Retry an existing assessment.
+
+Reuses the original dataset and config references from the selected
+assessment and creates a fresh assessment with new child runs.
diff --git a/backend/app/api/docs/assessment/retry_run.md b/backend/app/api/docs/assessment/retry_run.md
@@ -0,0 +1,4 @@
+Retry a single assessment run.
+
+Creates a new assessment using the same dataset and config used by the
+selected child run.
diff --git a/backend/app/api/docs/assessment/upload_dataset.md b/backend/app/api/docs/assessment/upload_dataset.md
@@ -0,0 +1,4 @@
+Upload a CSV or Excel dataset for assessment workflows.
+
+The file is stored in object storage and indexed as an assessment dataset
+for the current organization and project.
diff --git a/backend/app/api/docs/config/create_version.md b/backend/app/api/docs/config/create_version.md
@@ -6,6 +6,10 @@ create a new version under the same configuration with an incremented version nu
 Version numbers are automatically incremented sequentially (1, 2, 3, etc.)
 and cannot be manually set or skipped.
 
+When `tag` is omitted, this endpoint only resolves general configurations:
+configs tagged `default`. Pass an explicit
+tag such as `ASSESSMENT` for tagged config surfaces.
+
 ## Important
 - This endpoint accepts partial updates using dict[str, Any] for config_blob.
 - Only the fields that need to be updated should be provided.

diff --git a/backend/app/api/docs/config/get_version.md b/backend/app/api/docs/config/get_version.md
@@ -1,4 +1,8 @@
 Retrieve a specific version of a configuration.
 
+When `tag` is omitted, this endpoint only resolves versions for general
+configurations: configs tagged `default`. Pass
+an explicit tag such as `ASSESSMENT` for tagged config surfaces.
+
 Returns the complete version details including the full configuration
 blob (config_blob) with all LLM parameters.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Export results for a single assessment run.

		Supports `json`, `csv`, and `xlsx` output formats.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Get an assessment by ID.

		Returns aggregate run counts and status metadata for the assessment.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Get a single assessment dataset by ID.

		Optionally include a signed URL to download the original uploaded file.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Get a single assessment run by ID.

		Returns run metadata, status, config reference, and assessment input payload.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		List assessments runs for the current organization/project.

		Each record includes aggregate status counters across its child runs.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		List assessment datasets for the current organization and project.

		Supports pagination via `limit` and `offset`.