-
Notifications
You must be signed in to change notification settings - Fork 10
Model Config: Add model configuration table and API endpoints #669
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
df0e840
8a51cf5
2b9ffef
4b68444
6e322c9
244b43f
951f9ba
97cb65a
f07fb2f
7c9f2a5
ee99e2e
ac87722
433581f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,130 @@ | ||
| """create model_config table | ||
|
|
||
| Revision ID: 052 | ||
| Revises: 051 | ||
| Create Date: 2026-03-12 00:00:00.000000 | ||
|
|
||
| """ | ||
|
|
||
| import sqlalchemy as sa | ||
| from alembic import op | ||
| from sqlalchemy.dialects import postgresql | ||
|
|
||
| # revision identifiers, used by Alembic. | ||
| revision = "052" | ||
| down_revision = "051" | ||
| branch_labels = None | ||
| depends_on = None | ||
|
|
||
|
|
||
| def upgrade(): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add return annotations to the Alembic entrypoints.
✏️ Proposed fix-def upgrade():
+def upgrade() -> None:
op.create_table(
"model_config",
...
)
-def downgrade():
+def downgrade() -> None:
op.drop_table("model_config", schema="global")Also applies to: 131-131 🤖 Prompt for AI Agents |
||
| op.create_table( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add inserted_at and updated_at
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the table does have this two columns |
||
| "model_config", | ||
| sa.Column( | ||
| "id", | ||
| sa.Integer(), | ||
| sa.Identity(always=False), | ||
| nullable=False, | ||
| comment="unique identifier for model config table", | ||
| ), | ||
| sa.Column( | ||
| "provider", | ||
| sa.String(), | ||
| nullable=False, | ||
| comment="provider name (e.g. openai, google)", | ||
| ), | ||
| sa.Column( | ||
| "model_name", | ||
| sa.String(), | ||
| nullable=False, | ||
| comment="model name (e.g. gpt-4o, gemini-3-flash-preview)", | ||
| ), | ||
| sa.Column( | ||
| "config", | ||
| postgresql.JSONB(astext_type=sa.Text()), | ||
| nullable=False, | ||
| comment="model adhoc configuration", | ||
| ), | ||
| sa.Column( | ||
| "input_modalities", | ||
| postgresql.ARRAY(sa.String()), | ||
| nullable=False, | ||
| server_default="{}", | ||
| comment="supported input modalities: TEXT, IMAGE, FILES, AUDIO", | ||
| ), | ||
| sa.Column( | ||
| "output_modalities", | ||
| postgresql.ARRAY(sa.String()), | ||
| nullable=False, | ||
| server_default="{}", | ||
| comment="supported output modalities: TEXT, AUDIO", | ||
| ), | ||
| sa.Column( | ||
| "pricing", | ||
| postgresql.JSONB(astext_type=sa.Text()), | ||
| nullable=True, | ||
| comment=( | ||
| "pricing per 1M tokens in USD. " | ||
| "Structure: {response: {input_token_cost, output_token_cost}, batch: {input_token_cost, output_token_cost}}" | ||
| ), | ||
| ), | ||
| sa.Column( | ||
| "is_active", | ||
| sa.Boolean(), | ||
| nullable=False, | ||
| server_default=sa.text("true"), | ||
| comment="whether this model is available", | ||
| ), | ||
| sa.Column( | ||
| "inserted_at", | ||
| sa.DateTime(), | ||
| nullable=False, | ||
| comment="timestamp when model configuration was created", | ||
| ), | ||
| sa.Column( | ||
| "updated_at", | ||
| sa.DateTime(), | ||
| nullable=False, | ||
| comment="timestamp when model configuration was updated", | ||
| ), | ||
| sa.PrimaryKeyConstraint("id"), | ||
| sa.UniqueConstraint("provider", "model_name"), | ||
| schema="global", | ||
| ) | ||
|
|
||
| # Seed default model configurations | ||
| op.execute( | ||
| """ | ||
| INSERT INTO global.model_config (id, provider, model_name, config, input_modalities, output_modalities, pricing, is_active, inserted_at, updated_at) | ||
| VALUES | ||
| (1, 'openai', 'gpt-4o-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.15, "output_token_cost": 0.6}, "batch": {"input_token_cost": 0.075, "output_token_cost": 0.3}}', true, NOW(), NOW()), | ||
| (2, 'openai', 'gpt-4o', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 10}, "batch": {"input_token_cost": 1.25, "output_token_cost": 5}}', true, NOW(), NOW()), | ||
| (3, 'openai', 'gpt-4.1', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()), | ||
| (4, 'openai', 'gpt-4.1-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.4, "output_token_cost": 1.6}, "batch": {"input_token_cost": 0.2, "output_token_cost": 0.8}}', true, NOW(), NOW()), | ||
| (5, 'openai', 'gpt-4.1-nano', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.1, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.05, "output_token_cost": 0.2}}', true, NOW(), NOW()), | ||
| (6, 'openai', 'o3-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()), | ||
| (7, 'openai', 'o3', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()), | ||
| (8, 'openai', 'o4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()), | ||
| (9, 'openai', 'gpt-5', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()), | ||
| (10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.25, "output_token_cost": 2}, "batch": {"input_token_cost": 0.125, "output_token_cost": 1}}', true, NOW(), NOW()), | ||
| (11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.05, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.025, "output_token_cost": 0.2}}', true, NOW(), NOW()), | ||
| (12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()), | ||
| (13, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()), | ||
| (14, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 21, "output_token_cost": 168}, "batch": {"input_token_cost": 10.5, "output_token_cost": 84}}', true, NOW(), NOW()), | ||
| (15, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()), | ||
| (16, 'openai', 'gpt-5.4', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 15}, "batch": {"input_token_cost": 1.25, "output_token_cost": 7.5}}', true, NOW(), NOW()), | ||
| (17, 'openai', 'gpt-5.4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.75, "output_token_cost": 4.5}, "batch": {"input_token_cost": 0.375, "output_token_cost": 2.25}}', true, NOW(), NOW()), | ||
| (18, 'openai', 'gpt-5.4-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.2, "output_token_cost": 1.25}, "batch": {"input_token_cost": 0.1, "output_token_cost": 0.625}}', true, NOW(), NOW()), | ||
| (19, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 30, "output_token_cost": 180}, "batch": {"input_token_cost": 15, "output_token_cost": 90}}', true, NOW(), NOW()) | ||
| """ | ||
| ) | ||
|
|
||
| # Reset the id sequence to continue after the last seeded id | ||
| op.execute( | ||
| "SELECT setval(pg_get_serial_sequence('global.model_config', 'id'), " | ||
| "(SELECT MAX(id) FROM global.model_config))" | ||
| ) | ||
|
|
||
|
|
||
| def downgrade(): | ||
| op.drop_table("model_config", schema="global") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| ## Endpoint | ||
|
|
||
| **GET** `/api/v1/models/{provider}/{model_name}` | ||
|
|
||
| Retrieve a specific model configuration by provider and model name. | ||
|
|
||
| Returns model details including supported config parameters, input/output modalities, pricing, and active status. | ||
|
|
||
| ### Path Parameters | ||
|
|
||
| - **`provider`** (required) — Provider name (e.g. `openai`, `google`) | ||
| - **`model_name`** (required) — Model name (e.g. `gpt-4o`, `gpt-4o-mini`) | ||
|
|
||
| ### Example Response | ||
|
|
||
| ```json | ||
| { | ||
| "success": true, | ||
| "data": { | ||
| "id": 2, | ||
| "provider": "openai", | ||
| "model_name": "gpt-4o", | ||
| "config": { | ||
| "temperature": { | ||
| "type": "float", | ||
| "default": 1.0, | ||
| "min": 0.0, | ||
| "max": 2.0, | ||
| "description": "Controls randomness. Lower = more deterministic." | ||
| }, | ||
| "top_p": { | ||
| "type": "float", | ||
| "default": 1.0, | ||
| "min": 0.0, | ||
| "max": 1.0, | ||
| "description": "Nucleus sampling. Use either this or temperature, not both." | ||
| }, | ||
| "max_output_tokens": { | ||
| "type": "int", | ||
| "default": 2048, | ||
| "min": 1, | ||
| "max": 32768, | ||
| "description": "Max tokens in the response." | ||
| } | ||
| }, | ||
| "input_modalities": ["TEXT", "IMAGE"], | ||
| "output_modalities": ["TEXT"], | ||
| "pricing": { | ||
| "response": { | ||
| "input_token_cost": 2.5, | ||
| "output_token_cost": 10 | ||
| }, | ||
| "batch": { | ||
| "input_token_cost": 1.25, | ||
| "output_token_cost": 5 | ||
| } | ||
| }, | ||
| "is_active": true, | ||
| "inserted_at": "2026-03-12T00:00:00", | ||
| "updated_at": "2026-03-12T00:00:00" | ||
| } | ||
| } | ||
| ``` | ||
|
|
||
| ### Error Response | ||
|
|
||
| - `404 Not Found` — Model not found for the given `provider` and `model_name`. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| ## Endpoint | ||
|
|
||
| **GET** `/api/v1/models` | ||
|
|
||
| Retrieve a list of all active model configurations. | ||
|
|
||
| Returns model details including provider, model name, supported config parameters, input/output modalities, pricing, and active status. | ||
|
|
||
| Optionally filter by provider (e.g. openai, google). | ||
|
|
||
| ### Query Parameters | ||
|
|
||
| - **`provider`** (optional) — Filter by provider name (e.g. `openai`, `google`) | ||
| - **`skip`** (optional, default 0) — Number of records to skip for pagination | ||
| - **`limit`** (optional, default 100, max 100) — Maximum number of records to return | ||
|
|
||
| ### Example Response | ||
|
|
||
| ```json | ||
| { | ||
| "success": true, | ||
| "metadata": { | ||
| "has_more": true | ||
| }, | ||
| "data": { | ||
| "data": [ | ||
| { | ||
| "id": 1, | ||
| "provider": "openai", | ||
| "model_name": "gpt-4o-mini", | ||
| "config": { | ||
| "temperature": { | ||
| "type": "float", | ||
| "default": 1.0, | ||
| "min": 0.0, | ||
| "max": 2.0, | ||
| "description": "Controls randomness. Lower = more deterministic." | ||
| }, | ||
| "top_p": { | ||
| "type": "float", | ||
| "default": 1.0, | ||
| "min": 0.0, | ||
| "max": 1.0, | ||
| "description": "Nucleus sampling. Use either this or temperature, not both." | ||
| }, | ||
| "max_output_tokens": { | ||
| "type": "int", | ||
| "default": 2048, | ||
| "min": 1, | ||
| "max": 32768, | ||
| "description": "Max tokens in the response." | ||
| } | ||
| }, | ||
| "input_modalities": ["TEXT", "IMAGE"], | ||
| "output_modalities": ["TEXT"], | ||
| "pricing": { | ||
| "response": { | ||
| "input_token_cost": 0.15, | ||
| "output_token_cost": 0.6 | ||
| }, | ||
| "batch": { | ||
| "input_token_cost": 0.075, | ||
| "output_token_cost": 0.3 | ||
| } | ||
| }, | ||
| "is_active": true, | ||
| "inserted_at": "2026-03-12T00:00:00", | ||
| "updated_at": "2026-03-12T00:00:00" | ||
| } | ||
| ], | ||
| "count": 1 | ||
| } | ||
| } | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| ## Endpoint | ||
|
|
||
| **GET** `/api/v1/models/grouped` | ||
|
|
||
| Retrieve active models grouped by provider. | ||
|
|
||
| Supports pagination of model rows before grouping: | ||
| - `skip` (default `0`) | ||
| - `limit` (default `100`, max `100`) | ||
|
|
||
| Returns a dictionary where each key is a provider present in the paginated slice, and each value is a list of active model configurations for that provider. | ||
| Includes `metadata.has_more` when additional model rows exist. | ||
|
|
||
| ### Example Response | ||
|
|
||
| ```json | ||
| { | ||
| "success": true, | ||
| "metadata": { | ||
| "has_more": true | ||
| }, | ||
| "data": { | ||
| "openai": [ | ||
| { | ||
| "id": 2, | ||
| "provider": "openai", | ||
| "model_name": "gpt-4o", | ||
| "config": { | ||
| "temperature": { | ||
| "type": "float", | ||
| "default": 1.0, | ||
| "min": 0.0, | ||
| "max": 2.0, | ||
| "description": "Controls randomness. Lower = more deterministic." | ||
| } | ||
| }, | ||
| "input_modalities": ["TEXT", "IMAGE"], | ||
| "output_modalities": ["TEXT"], | ||
| "pricing": { | ||
| "response": { | ||
| "input_token_cost": 2.5, | ||
| "output_token_cost": 10 | ||
| } | ||
| }, | ||
| "is_active": true, | ||
| "inserted_at": "2026-03-12T00:00:00", | ||
| "updated_at": "2026-03-12T00:00:00" | ||
| } | ||
| ] | ||
| } | ||
| } | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| ## Endpoint | ||
|
|
||
| **GET** `/api/v1/models/providers` | ||
|
|
||
| Retrieve the list of providers that currently have active models. | ||
|
|
||
| Returns provider names sorted in ascending order. | ||
|
|
||
| ### Example Response | ||
|
|
||
| ```json | ||
| { | ||
| "success": true, | ||
| "data": ["google", "openai"] | ||
| } | ||
| ``` |
Uh oh!
There was an error while loading. Please reload this page.