Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions backend/app/alembic/versions/052_create_model_config_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
"""create model_config table

Revision ID: 052
Revises: 051
Create Date: 2026-03-12 00:00:00.000000

"""

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "052"
down_revision = "051"
branch_labels = None
depends_on = None
Comment thread
coderabbitai[bot] marked this conversation as resolved.


def upgrade():
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Add return annotations to the Alembic entrypoints.

upgrade and downgrade are new Python functions and currently miss return type hints.

✏️ Proposed fix
-def upgrade():
+def upgrade() -> None:
     op.create_table(
         "model_config",
         ...
     )

-def downgrade():
+def downgrade() -> None:
     op.drop_table("model_config", schema="global")
As per coding guidelines, `**/*.py`: Always add type hints to all function parameters and return values in Python code.

Also applies to: 131-131

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/app/alembic/versions/050_create_model_config_table.py` at line 20,
Add explicit return type hints to the Alembic entrypoint functions: update the
upgrade() and downgrade() function definitions to include return annotations
(e.g., def upgrade() -> None: and def downgrade() -> None:) so both functions
have explicit return types per the project typing guidelines; ensure the
annotations appear on the existing upgrade and downgrade functions in the
migration module.

op.create_table(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add inserted_at and updated_at

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the table does have this two columns

"model_config",
sa.Column(
"id",
sa.Integer(),
sa.Identity(always=False),
nullable=False,
comment="unique identifier for model config table",
),
sa.Column(
"provider",
sa.String(),
nullable=False,
comment="provider name (e.g. openai, google)",
),
sa.Column(
"model_name",
sa.String(),
nullable=False,
comment="model name (e.g. gpt-4o, gemini-3-flash-preview)",
),
sa.Column(
"config",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
comment="model adhoc configuration",
),
sa.Column(
"input_modalities",
postgresql.ARRAY(sa.String()),
nullable=False,
server_default="{}",
comment="supported input modalities: TEXT, IMAGE, FILES, AUDIO",
),
sa.Column(
"output_modalities",
postgresql.ARRAY(sa.String()),
nullable=False,
server_default="{}",
comment="supported output modalities: TEXT, AUDIO",
),
sa.Column(
"pricing",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
comment=(
"pricing per 1M tokens in USD. "
"Structure: {response: {input_token_cost, output_token_cost}, batch: {input_token_cost, output_token_cost}}"
),
),
sa.Column(
"is_active",
sa.Boolean(),
nullable=False,
server_default=sa.text("true"),
comment="whether this model is available",
),
sa.Column(
"inserted_at",
sa.DateTime(),
nullable=False,
comment="timestamp when model configuration was created",
),
sa.Column(
"updated_at",
sa.DateTime(),
nullable=False,
comment="timestamp when model configuration was updated",
),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("provider", "model_name"),
schema="global",
)

# Seed default model configurations
op.execute(
"""
INSERT INTO global.model_config (id, provider, model_name, config, input_modalities, output_modalities, pricing, is_active, inserted_at, updated_at)
VALUES
(1, 'openai', 'gpt-4o-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.15, "output_token_cost": 0.6}, "batch": {"input_token_cost": 0.075, "output_token_cost": 0.3}}', true, NOW(), NOW()),
(2, 'openai', 'gpt-4o', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 10}, "batch": {"input_token_cost": 1.25, "output_token_cost": 5}}', true, NOW(), NOW()),
(3, 'openai', 'gpt-4.1', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()),
(4, 'openai', 'gpt-4.1-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.4, "output_token_cost": 1.6}, "batch": {"input_token_cost": 0.2, "output_token_cost": 0.8}}', true, NOW(), NOW()),
(5, 'openai', 'gpt-4.1-nano', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.1, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.05, "output_token_cost": 0.2}}', true, NOW(), NOW()),
(6, 'openai', 'o3-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()),
(7, 'openai', 'o3', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()),
(8, 'openai', 'o4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()),
(9, 'openai', 'gpt-5', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()),
(10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.25, "output_token_cost": 2}, "batch": {"input_token_cost": 0.125, "output_token_cost": 1}}', true, NOW(), NOW()),
(11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.05, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.025, "output_token_cost": 0.2}}', true, NOW(), NOW()),
(12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()),
(13, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()),
(14, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 21, "output_token_cost": 168}, "batch": {"input_token_cost": 10.5, "output_token_cost": 84}}', true, NOW(), NOW()),
(15, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()),
(16, 'openai', 'gpt-5.4', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 15}, "batch": {"input_token_cost": 1.25, "output_token_cost": 7.5}}', true, NOW(), NOW()),
(17, 'openai', 'gpt-5.4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.75, "output_token_cost": 4.5}, "batch": {"input_token_cost": 0.375, "output_token_cost": 2.25}}', true, NOW(), NOW()),
(18, 'openai', 'gpt-5.4-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.2, "output_token_cost": 1.25}, "batch": {"input_token_cost": 0.1, "output_token_cost": 0.625}}', true, NOW(), NOW()),
(19, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 30, "output_token_cost": 180}, "batch": {"input_token_cost": 15, "output_token_cost": 90}}', true, NOW(), NOW())
"""
)

# Reset the id sequence to continue after the last seeded id
op.execute(
"SELECT setval(pg_get_serial_sequence('global.model_config', 'id'), "
"(SELECT MAX(id) FROM global.model_config))"
)


def downgrade():
op.drop_table("model_config", schema="global")
67 changes: 67 additions & 0 deletions backend/app/api/docs/model_config/get_model.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
## Endpoint

**GET** `/api/v1/models/{provider}/{model_name}`

Retrieve a specific model configuration by provider and model name.

Returns model details including supported config parameters, input/output modalities, pricing, and active status.

### Path Parameters

- **`provider`** (required) — Provider name (e.g. `openai`, `google`)
- **`model_name`** (required) — Model name (e.g. `gpt-4o`, `gpt-4o-mini`)

### Example Response

```json
{
"success": true,
"data": {
"id": 2,
"provider": "openai",
"model_name": "gpt-4o",
"config": {
"temperature": {
"type": "float",
"default": 1.0,
"min": 0.0,
"max": 2.0,
"description": "Controls randomness. Lower = more deterministic."
},
"top_p": {
"type": "float",
"default": 1.0,
"min": 0.0,
"max": 1.0,
"description": "Nucleus sampling. Use either this or temperature, not both."
},
"max_output_tokens": {
"type": "int",
"default": 2048,
"min": 1,
"max": 32768,
"description": "Max tokens in the response."
}
},
"input_modalities": ["TEXT", "IMAGE"],
"output_modalities": ["TEXT"],
"pricing": {
"response": {
"input_token_cost": 2.5,
"output_token_cost": 10
},
"batch": {
"input_token_cost": 1.25,
"output_token_cost": 5
}
},
"is_active": true,
"inserted_at": "2026-03-12T00:00:00",
"updated_at": "2026-03-12T00:00:00"
}
}
```

### Error Response

- `404 Not Found` — Model not found for the given `provider` and `model_name`.
74 changes: 74 additions & 0 deletions backend/app/api/docs/model_config/list_models.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
## Endpoint

**GET** `/api/v1/models`

Retrieve a list of all active model configurations.

Returns model details including provider, model name, supported config parameters, input/output modalities, pricing, and active status.

Optionally filter by provider (e.g. openai, google).

### Query Parameters

- **`provider`** (optional) — Filter by provider name (e.g. `openai`, `google`)
- **`skip`** (optional, default 0) — Number of records to skip for pagination
- **`limit`** (optional, default 100, max 100) — Maximum number of records to return

### Example Response

```json
{
"success": true,
"metadata": {
"has_more": true
},
"data": {
"data": [
{
"id": 1,
"provider": "openai",
"model_name": "gpt-4o-mini",
"config": {
"temperature": {
"type": "float",
"default": 1.0,
"min": 0.0,
"max": 2.0,
"description": "Controls randomness. Lower = more deterministic."
},
"top_p": {
"type": "float",
"default": 1.0,
"min": 0.0,
"max": 1.0,
"description": "Nucleus sampling. Use either this or temperature, not both."
},
"max_output_tokens": {
"type": "int",
"default": 2048,
"min": 1,
"max": 32768,
"description": "Max tokens in the response."
}
},
"input_modalities": ["TEXT", "IMAGE"],
"output_modalities": ["TEXT"],
"pricing": {
"response": {
"input_token_cost": 0.15,
"output_token_cost": 0.6
},
"batch": {
"input_token_cost": 0.075,
"output_token_cost": 0.3
}
},
"is_active": true,
"inserted_at": "2026-03-12T00:00:00",
"updated_at": "2026-03-12T00:00:00"
}
],
"count": 1
}
}
```
52 changes: 52 additions & 0 deletions backend/app/api/docs/model_config/list_models_grouped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
## Endpoint

**GET** `/api/v1/models/grouped`

Retrieve active models grouped by provider.

Supports pagination of model rows before grouping:
- `skip` (default `0`)
- `limit` (default `100`, max `100`)

Returns a dictionary where each key is a provider present in the paginated slice, and each value is a list of active model configurations for that provider.
Includes `metadata.has_more` when additional model rows exist.

### Example Response

```json
{
"success": true,
"metadata": {
"has_more": true
},
"data": {
"openai": [
{
"id": 2,
"provider": "openai",
"model_name": "gpt-4o",
"config": {
"temperature": {
"type": "float",
"default": 1.0,
"min": 0.0,
"max": 2.0,
"description": "Controls randomness. Lower = more deterministic."
}
},
"input_modalities": ["TEXT", "IMAGE"],
"output_modalities": ["TEXT"],
"pricing": {
"response": {
"input_token_cost": 2.5,
"output_token_cost": 10
}
},
"is_active": true,
"inserted_at": "2026-03-12T00:00:00",
"updated_at": "2026-03-12T00:00:00"
}
]
}
}
```
16 changes: 16 additions & 0 deletions backend/app/api/docs/model_config/list_providers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
## Endpoint

**GET** `/api/v1/models/providers`

Retrieve the list of providers that currently have active models.

Returns provider names sorted in ascending order.

### Example Response

```json
{
"success": true,
"data": ["google", "openai"]
}
```
3 changes: 2 additions & 1 deletion backend/app/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
fine_tuning,
model_evaluation,
collection_job,
model_config,
)
from app.api.routes import evaluations
from app.core.config import settings
Expand Down Expand Up @@ -58,7 +59,7 @@
api_router.include_router(utils.router)
api_router.include_router(fine_tuning.router)
api_router.include_router(model_evaluation.router)

api_router.include_router(model_config.router)

if settings.ENVIRONMENT in ["development", "testing"]:
api_router.include_router(private.router)
Loading
Loading