ProjectTech4DevAI · vprashrex · Apr 15, 2026 · Mar 12, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/backend/app/alembic/versions/052_create_model_config_table.py b/backend/app/alembic/versions/052_create_model_config_table.py
@@ -0,0 +1,130 @@
+"""create model_config table
+
+Revision ID: 052
+Revises: 051
+Create Date: 2026-03-12 00:00:00.000000
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "052"
+down_revision = "051"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.create_table(
+        "model_config",
+        sa.Column(
+            "id",
+            sa.Integer(),
+            sa.Identity(always=False),
+            nullable=False,
+            comment="unique identifier for model config table",
+        ),
+        sa.Column(
+            "provider",
+            sa.String(),
+            nullable=False,
+            comment="provider name (e.g. openai, google)",
+        ),
+        sa.Column(
+            "model_name",
+            sa.String(),
+            nullable=False,
+            comment="model name (e.g. gpt-4o, gemini-3-flash-preview)",
+        ),
+        sa.Column(
+            "config",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+            comment="model adhoc configuration",
+        ),
+        sa.Column(
+            "input_modalities",
+            postgresql.ARRAY(sa.String()),
+            nullable=False,
+            server_default="{}",
+            comment="supported input modalities: TEXT, IMAGE, FILES, AUDIO",
+        ),
+        sa.Column(
+            "output_modalities",
+            postgresql.ARRAY(sa.String()),
+            nullable=False,
+            server_default="{}",
+            comment="supported output modalities: TEXT, AUDIO",
+        ),
+        sa.Column(
+            "pricing",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+            comment=(
+                "pricing per 1M tokens in USD. "
+                "Structure: {response: {input_token_cost, output_token_cost}, batch: {input_token_cost, output_token_cost}}"
+            ),
+        ),
+        sa.Column(
+            "is_active",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.text("true"),
+            comment="whether this model is available",
+        ),
+        sa.Column(
+            "inserted_at",
+            sa.DateTime(),
+            nullable=False,
+            comment="timestamp when model configuration was created",
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(),
+            nullable=False,
+            comment="timestamp when model configuration was updated",
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("provider", "model_name"),
+        schema="global",
+    )
+
+    # Seed default model configurations
+    op.execute(
+        """
+        INSERT INTO global.model_config (id, provider, model_name, config, input_modalities, output_modalities, pricing, is_active, inserted_at, updated_at)
+        VALUES
+            (1, 'openai', 'gpt-4o-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.15, "output_token_cost": 0.6}, "batch": {"input_token_cost": 0.075, "output_token_cost": 0.3}}', true, NOW(), NOW()),
+            (2, 'openai', 'gpt-4o', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 10}, "batch": {"input_token_cost": 1.25, "output_token_cost": 5}}', true, NOW(), NOW()),
+            (3, 'openai', 'gpt-4.1', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()),
+            (4, 'openai', 'gpt-4.1-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.4, "output_token_cost": 1.6}, "batch": {"input_token_cost": 0.2, "output_token_cost": 0.8}}', true, NOW(), NOW()),
+            (5, 'openai', 'gpt-4.1-nano', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.1, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.05, "output_token_cost": 0.2}}', true, NOW(), NOW()),
+            (6, 'openai', 'o3-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()),
+            (7, 'openai', 'o3', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()),
+            (8, 'openai', 'o4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()),
+            (9, 'openai', 'gpt-5', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()),
+            (10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.25, "output_token_cost": 2}, "batch": {"input_token_cost": 0.125, "output_token_cost": 1}}', true, NOW(), NOW()),
+            (11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.05, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.025, "output_token_cost": 0.2}}', true, NOW(), NOW()),
+            (12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()),
+            (13, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()),
+            (14, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 21, "output_token_cost": 168}, "batch": {"input_token_cost": 10.5, "output_token_cost": 84}}', true, NOW(), NOW()),
+            (15, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()),
+            (16, 'openai', 'gpt-5.4', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 15}, "batch": {"input_token_cost": 1.25, "output_token_cost": 7.5}}', true, NOW(), NOW()),
+            (17, 'openai', 'gpt-5.4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.75, "output_token_cost": 4.5}, "batch": {"input_token_cost": 0.375, "output_token_cost": 2.25}}', true, NOW(), NOW()),
+            (18, 'openai', 'gpt-5.4-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.2, "output_token_cost": 1.25}, "batch": {"input_token_cost": 0.1, "output_token_cost": 0.625}}', true, NOW(), NOW()),
+            (19, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 30, "output_token_cost": 180}, "batch": {"input_token_cost": 15, "output_token_cost": 90}}', true, NOW(), NOW())
+        """
+    )
+
+    # Reset the id sequence to continue after the last seeded id
+    op.execute(
+        "SELECT setval(pg_get_serial_sequence('global.model_config', 'id'), "
+        "(SELECT MAX(id) FROM global.model_config))"
+    )
+
+
+def downgrade():
+    op.drop_table("model_config", schema="global")
diff --git a/backend/app/api/docs/model_config/get_model.md b/backend/app/api/docs/model_config/get_model.md
@@ -0,0 +1,67 @@
+## Endpoint
+
+**GET** `/api/v1/models/{provider}/{model_name}`
+
+Retrieve a specific model configuration by provider and model name.
+
+Returns model details including supported config parameters, input/output modalities, pricing, and active status.
+
+### Path Parameters
+
+- **`provider`** (required) — Provider name (e.g. `openai`, `google`)
+- **`model_name`** (required) — Model name (e.g. `gpt-4o`, `gpt-4o-mini`)
+
+### Example Response
+
+```json
+{
+  "success": true,
+  "data": {
+    "id": 2,
+    "provider": "openai",
+    "model_name": "gpt-4o",
+    "config": {
+      "temperature": {
+        "type": "float",
+        "default": 1.0,
+        "min": 0.0,
+        "max": 2.0,
+        "description": "Controls randomness. Lower = more deterministic."
+      },
+      "top_p": {
+        "type": "float",
+        "default": 1.0,
+        "min": 0.0,
+        "max": 1.0,
+        "description": "Nucleus sampling. Use either this or temperature, not both."
+      },
+      "max_output_tokens": {
+        "type": "int",
+        "default": 2048,
+        "min": 1,
+        "max": 32768,
+        "description": "Max tokens in the response."
+      }
+    },
+    "input_modalities": ["TEXT", "IMAGE"],
+    "output_modalities": ["TEXT"],
+    "pricing": {
+      "response": {
+        "input_token_cost": 2.5,
+        "output_token_cost": 10
+      },
+      "batch": {
+        "input_token_cost": 1.25,
+        "output_token_cost": 5
+      }
+    },
+    "is_active": true,
+    "inserted_at": "2026-03-12T00:00:00",
+    "updated_at": "2026-03-12T00:00:00"
+  }
+}
+```
+
+### Error Response
+
+- `404 Not Found` — Model not found for the given `provider` and `model_name`.
diff --git a/backend/app/api/docs/model_config/list_models.md b/backend/app/api/docs/model_config/list_models.md
@@ -0,0 +1,74 @@
+## Endpoint
+
+**GET** `/api/v1/models`
+
+Retrieve a list of all active model configurations.
+
+Returns model details including provider, model name, supported config parameters, input/output modalities, pricing, and active status.
+
+Optionally filter by provider (e.g. openai, google).
+
+### Query Parameters
+
+- **`provider`** (optional) — Filter by provider name (e.g. `openai`, `google`)
+- **`skip`** (optional, default 0) — Number of records to skip for pagination
+- **`limit`** (optional, default 100, max 100) — Maximum number of records to return
+
+### Example Response
+
+```json
+{
+  "success": true,
+  "metadata": {
+    "has_more": true
+  },
+  "data": {
+    "data": [
+      {
+        "id": 1,
+        "provider": "openai",
+        "model_name": "gpt-4o-mini",
+        "config": {
+          "temperature": {
+            "type": "float",
+            "default": 1.0,
+            "min": 0.0,
+            "max": 2.0,
+            "description": "Controls randomness. Lower = more deterministic."
+          },
+          "top_p": {
+            "type": "float",
+            "default": 1.0,
+            "min": 0.0,
+            "max": 1.0,
+            "description": "Nucleus sampling. Use either this or temperature, not both."
+          },
+          "max_output_tokens": {
+            "type": "int",
+            "default": 2048,
+            "min": 1,
+            "max": 32768,
+            "description": "Max tokens in the response."
+          }
+        },
+        "input_modalities": ["TEXT", "IMAGE"],
+        "output_modalities": ["TEXT"],
+        "pricing": {
+          "response": {
+            "input_token_cost": 0.15,
+            "output_token_cost": 0.6
+          },
+          "batch": {
+            "input_token_cost": 0.075,
+            "output_token_cost": 0.3
+          }
+        },
+        "is_active": true,
+        "inserted_at": "2026-03-12T00:00:00",
+        "updated_at": "2026-03-12T00:00:00"
+      }
+    ],
+    "count": 1
+  }
+}
+```
diff --git a/backend/app/api/docs/model_config/list_models_grouped.md b/backend/app/api/docs/model_config/list_models_grouped.md
@@ -0,0 +1,52 @@
+## Endpoint
+
+**GET** `/api/v1/models/grouped`
+
+Retrieve active models grouped by provider.
+
+Supports pagination of model rows before grouping:
+- `skip` (default `0`)
+- `limit` (default `100`, max `100`)
+
+Returns a dictionary where each key is a provider present in the paginated slice, and each value is a list of active model configurations for that provider.
+Includes `metadata.has_more` when additional model rows exist.
+
+### Example Response
+
+```json
+{
+  "success": true,
+  "metadata": {
+    "has_more": true
+  },
+  "data": {
+    "openai": [
+      {
+        "id": 2,
+        "provider": "openai",
+        "model_name": "gpt-4o",
+        "config": {
+          "temperature": {
+            "type": "float",
+            "default": 1.0,
+            "min": 0.0,
+            "max": 2.0,
+            "description": "Controls randomness. Lower = more deterministic."
+          }
+        },
+        "input_modalities": ["TEXT", "IMAGE"],
+        "output_modalities": ["TEXT"],
+        "pricing": {
+          "response": {
+            "input_token_cost": 2.5,
+            "output_token_cost": 10
+          }
+        },
+        "is_active": true,
+        "inserted_at": "2026-03-12T00:00:00",
+        "updated_at": "2026-03-12T00:00:00"
+      }
+    ]
+  }
+}
+```
diff --git a/backend/app/api/docs/model_config/list_providers.md b/backend/app/api/docs/model_config/list_providers.md
@@ -0,0 +1,16 @@
+## Endpoint
+
+**GET** `/api/v1/models/providers`
+
+Retrieve the list of providers that currently have active models.
+
+Returns provider names sorted in ascending order.
+
+### Example Response
+
+```json
+{
+  "success": true,
+  "data": ["google", "openai"]
+}
+```
diff --git a/backend/app/api/main.py b/backend/app/api/main.py
@@ -27,6 +27,7 @@
     fine_tuning,
     model_evaluation,
     collection_job,
+    model_config,
 )
 from app.api.routes import evaluations
 from app.core.config import settings
@@ -58,7 +59,7 @@
 api_router.include_router(utils.router)
 api_router.include_router(fine_tuning.router)
 api_router.include_router(model_evaluation.router)
-
+api_router.include_router(model_config.router)
 
 if settings.ENVIRONMENT in ["development", "testing"]:
     api_router.include_router(private.router)