From df0e840b198d12340dc7766f5dc6bb15665816d9 Mon Sep 17 00:00:00 2001 From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com> Date: Thu, 12 Mar 2026 09:47:38 +0530 Subject: [PATCH 01/10] Model Config: Add model configuration table and API endpoints --- .../versions/050_create_model_config_table.py | 132 ++++++++++++++++++ .../app/api/docs/model_config/get_model.md | 54 +++++++ .../app/api/docs/model_config/list_models.md | 62 ++++++++ backend/app/api/main.py | 3 +- backend/app/api/routes/model_config.py | 50 +++++++ backend/app/crud/model_config.py | 50 +++++++ backend/app/models/__init__.py | 8 ++ backend/app/models/model_config.py | 124 ++++++++++++++++ 8 files changed, 482 insertions(+), 1 deletion(-) create mode 100644 backend/app/alembic/versions/050_create_model_config_table.py create mode 100644 backend/app/api/docs/model_config/get_model.md create mode 100644 backend/app/api/docs/model_config/list_models.md create mode 100644 backend/app/api/routes/model_config.py create mode 100644 backend/app/crud/model_config.py create mode 100644 backend/app/models/model_config.py diff --git a/backend/app/alembic/versions/050_create_model_config_table.py b/backend/app/alembic/versions/050_create_model_config_table.py new file mode 100644 index 000000000..aa5481c17 --- /dev/null +++ b/backend/app/alembic/versions/050_create_model_config_table.py @@ -0,0 +1,132 @@ +"""create model_config table + +Revision ID: 050 +Revises: 049 +Create Date: 2026-03-12 00:00:00.000000 + +""" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "050" +down_revision = "049" +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table( + "model_config", + sa.Column( + "id", + sa.Integer(), + sa.Identity(always=False), + nullable=False, + comment="unique identifier for model config table", + ), + sa.Column( + "provider", + sa.String(), + nullable=False, + comment="provider name (e.g. openai, google)", + ), + sa.Column( + "model_name", + sa.String(), + nullable=False, + comment="model name (e.g. gpt-4o, gemini-3-flash-preview)", + ), + sa.Column( + "config", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + comment="model adhoc configuration", + ), + sa.Column( + "input_modalities", + postgresql.ARRAY(sa.String()), + nullable=False, + server_default="{}", + comment="supported input modalities: TEXT, IMAGE, PDF, AUDIO", + ), + sa.Column( + "output_modalities", + postgresql.ARRAY(sa.String()), + nullable=False, + server_default="{}", + comment="supported output modalities: TEXT, AUDIO", + ), + sa.Column( + "default_for", + sa.String(), + nullable=True, + comment=( + "completion types this model is the default for. " + "e.g. [text, stt, tts]. " + "NULL means not a default. " + "Supported: text, stt, tts" + ), + ), + sa.Column( + "is_active", + sa.Boolean(), + nullable=False, + server_default=sa.text("true"), + comment="whether this model is available", + ), + sa.Column( + "inserted_at", + sa.DateTime(), + nullable=False, + comment="timestamp when model configuration was created", + ), + sa.Column( + "updated_at", + sa.DateTime(), + nullable=False, + comment="timestamp when model configuration was updated", + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("provider", "model_name"), + schema="global", + ) + + # Seed default model configurations + op.execute( + """ + INSERT INTO global.model_config (id, provider, model_name, config, input_modalities, output_modalities, default_for, is_active, inserted_at, updated_at) + VALUES + (1, 'openai', 'gpt-4o-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (2, 'openai', 'gpt-4o', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', 'text', true, NOW(), NOW()), + (3, 'openai', 'gpt-4.1', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (4, 'openai', 'gpt-4.1-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (5, 'openai', 'gpt-4.1-nano', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (6, 'openai', 'o3-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (7, 'openai', 'o3', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (8, 'openai', 'o4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (9, 'openai', 'gpt-5', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (13, 'openai', 'gpt-5.1-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (14, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (15, 'openai', 'gpt-5.2-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (16, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (17, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (18, 'openai', 'gpt-5.4-2026-03-05', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), + (19, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()) + """ + ) + + # Reset the id sequence to continue after the last seeded id + op.execute( + "SELECT setval(pg_get_serial_sequence('global.model_config', 'id'), " + "(SELECT MAX(id) FROM global.model_config))" + ) + + +def downgrade(): + op.drop_table("model_config", schema="global") diff --git a/backend/app/api/docs/model_config/get_model.md b/backend/app/api/docs/model_config/get_model.md new file mode 100644 index 000000000..0f4080081 --- /dev/null +++ b/backend/app/api/docs/model_config/get_model.md @@ -0,0 +1,54 @@ +## Endpoint + +**GET** `/api/v1/models/{provider}/{model_name}` + +Retrieve a specific model configuration by provider and model name. + +Returns model details including supported config parameters, input/output modalities, and default assignment. + +### Path Parameters + +- **`provider`** (required) — Provider name (e.g. `openai`, `google`) +- **`model_name`** (required) — Model name (e.g. `gpt-4o`, `gpt-4o-mini`) + +### Example Response + +```json +{ + "success": true, + "data": { + "id": 2, + "provider": "openai", + "model_name": "gpt-4o", + "config": { + "temperature": { + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 2.0, + "description": "Controls randomness. Lower = more deterministic." + }, + "top_p": { + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 1.0, + "description": "Nucleus sampling. Use either this or temperature, not both." + }, + "max_output_tokens": { + "type": "int", + "default": 2048, + "min": 1, + "max": 32768, + "description": "Max tokens in the response." + } + }, + "input_modalities": ["TEXT", "IMAGE"], + "output_modalities": ["TEXT"], + "default_for": "text", + "is_active": true, + "inserted_at": "2026-03-12T00:00:00", + "updated_at": "2026-03-12T00:00:00" + } +} +``` diff --git a/backend/app/api/docs/model_config/list_models.md b/backend/app/api/docs/model_config/list_models.md new file mode 100644 index 000000000..82eac1a91 --- /dev/null +++ b/backend/app/api/docs/model_config/list_models.md @@ -0,0 +1,62 @@ +## Endpoint + +**GET** `/api/v1/models` + +Retrieve a list of all active model configurations. + +Returns model details including provider, model name, supported config parameters, input/output modalities, and default assignment. + +Optionally filter by provider (e.g. openai, google). + +### Query Parameters + +- **`provider`** (optional) — Filter by provider name (e.g. `openai`, `google`) +- **`skip`** (optional, default 0) — Number of records to skip for pagination +- **`limit`** (optional, default 100) — Maximum number of records to return + +### Example Response + +```json +{ + "success": true, + "data": { + "data": [ + { + "id": 1, + "provider": "openai", + "model_name": "gpt-4o-mini", + "config": { + "temperature": { + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 2.0, + "description": "Controls randomness. Lower = more deterministic." + }, + "top_p": { + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 1.0, + "description": "Nucleus sampling. Use either this or temperature, not both." + }, + "max_output_tokens": { + "type": "int", + "default": 2048, + "min": 1, + "max": 32768, + "description": "Max tokens in the response." + } + }, + "input_modalities": ["TEXT", "IMAGE"], + "output_modalities": ["TEXT"], + "default_for": null, + "is_active": true, + "inserted_at": "2026-03-12T00:00:00", + "updated_at": "2026-03-12T00:00:00" + } + ], + "count": 1 + } +} +``` diff --git a/backend/app/api/main.py b/backend/app/api/main.py index 5ab1cbd9e..49c081562 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -25,6 +25,7 @@ fine_tuning, model_evaluation, collection_job, + model_config, ) from app.api.routes import evaluations from app.core.config import settings @@ -54,7 +55,7 @@ api_router.include_router(utils.router) api_router.include_router(fine_tuning.router) api_router.include_router(model_evaluation.router) - +api_router.include_router(model_config.router) if settings.ENVIRONMENT in ["development", "testing"]: api_router.include_router(private.router) diff --git a/backend/app/api/routes/model_config.py b/backend/app/api/routes/model_config.py new file mode 100644 index 000000000..7b7caf05f --- /dev/null +++ b/backend/app/api/routes/model_config.py @@ -0,0 +1,50 @@ +import logging + +from fastapi import APIRouter, HTTPException + +from app.api.deps import AuthContextDep, SessionDep +from app.crud.model_config import get_active_models, get_model_config +from app.models import ModelConfigPublic, ModelConfigListPublic +from app.utils import APIResponse, load_description + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/models", tags=["Model Config"]) + + +@router.get( + "/", + response_model=APIResponse[ModelConfigListPublic], + description=load_description("model_config/list_models.md"), +) +def list_models( + session: SessionDep, + auth_context: AuthContextDep, + provider: str | None = None, + skip: int = 0, + limit: int = 100, +) -> APIResponse[ModelConfigListPublic]: + models = get_active_models( + session=session, provider=provider, skip=skip, limit=limit + ) + return APIResponse.success_response( + ModelConfigListPublic(data=models, count=len(models)) + ) + + +@router.get( + "/{provider}/{model_name:path}", + response_model=APIResponse[ModelConfigPublic], + description=load_description("model_config/get_model.md"), +) +def get_model( + session: SessionDep, auth_context: AuthContextDep, provider: str, model_name: str +) -> APIResponse[ModelConfigPublic]: + model = get_model_config(session=session, provider=provider, model_name=model_name) + + if model is None: + logger.error( + f"[get_model] Model not found | provider={provider}, model_name={model_name}" + ) + raise HTTPException(status_code=404, detail="Model not found") + + return APIResponse.success_response(model) diff --git a/backend/app/crud/model_config.py b/backend/app/crud/model_config.py new file mode 100644 index 000000000..546b051e3 --- /dev/null +++ b/backend/app/crud/model_config.py @@ -0,0 +1,50 @@ +import logging +from typing import Optional, Literal + +from sqlmodel import Session, select + +from app.models import ModelConfig + +logger = logging.getLogger(__name__) + + +def get_default_model_for_type( + session: Session, completion_type: Literal["text", "stt", "tts"] +) -> Optional[ModelConfig]: + statement = ( + select(ModelConfig) + .where( + ModelConfig.is_active == True, + ModelConfig.default_for == completion_type, + ) + .limit(1) + ) + + return session.exec(statement).first() + + +def get_active_models( + session: Session, + provider: Literal["openai", "google"] | None = None, + skip: int = 0, + limit: int = 100, +) -> list[ModelConfig]: + statement = select(ModelConfig).where(ModelConfig.is_active == True) + + if provider: + statement = statement.where(ModelConfig.provider == provider) + + statement = statement.order_by(ModelConfig.provider, ModelConfig.model_name) + statement = statement.offset(skip).limit(limit) + return list(session.exec(statement).all()) + + +def get_model_config( + session: Session, provider: Literal["openai", "google"], model_name: str +) -> Optional[ModelConfig]: + statement = select(ModelConfig).where( + ModelConfig.provider == provider, + ModelConfig.model_name == model_name, + ModelConfig.is_active == True, + ) + return session.exec(statement).first() diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index b5cb3f0c6..9aeeee1da 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -118,6 +118,14 @@ ) from .message import Message + +from .model_config import ( + ModelConfig, + ModelConfigBase, + ModelConfigListPublic, + ModelConfigPublic, +) + from .model_evaluation import ( ModelEvaluation, ModelEvaluationBase, diff --git a/backend/app/models/model_config.py b/backend/app/models/model_config.py new file mode 100644 index 000000000..73d2c2963 --- /dev/null +++ b/backend/app/models/model_config.py @@ -0,0 +1,124 @@ +from datetime import datetime +from typing import Any, Literal + +import sqlalchemy as sa +from app.core.util import now +from sqlmodel import Field, SQLModel +from sqlalchemy.dialects.postgresql import JSONB, ARRAY + + +class ModelConfigBase(SQLModel): + provider: Literal["openai", "google"] = Field( + default="openai", + sa_column=sa.Column( + sa.String, nullable=False, comment="provider name (e.g. openai, google)" + ), + ) + + model_name: str = Field( + ..., + sa_column=sa.Column( + sa.String, + nullable=False, + comment="model name (e.g. gpt-4o, gemini-3-flash-preview)", + ), + ) + + config: dict[str, Any] = Field( + default_factory=dict, + sa_column=sa.Column(JSONB, nullable=False, comment="model adhoc configuration"), + ) + + input_modalities: list[str] = Field( + default_factory=list, + sa_column=sa.Column( + ARRAY(sa.String), + nullable=False, + server_default="{}", + comment="supported input modalities: TEXT, IMAGE, PDF, AUDIO", + ), + ) + + output_modalities: list[str] = Field( + default_factory=list, + sa_column=sa.Column( + ARRAY(sa.String), + nullable=False, + server_default="{}", + comment="supported output modalities: TEXT, AUDIO", + ), + ) + + # NOTE: can we use this default_for column to help in routing? + default_for: Literal["text", "stt", "tts"] | None = Field( + default=None, + sa_column=sa.Column( + sa.String, + nullable=True, + comment=( + "completion types this model is the default for. " + "e.g. [text, stt, tts]. " + "NULL means not a default. " + "Supported: text, stt, tts" + ), + ), + ) + + is_active: bool = Field( + default=True, + sa_column=sa.Column( + sa.Boolean, + nullable=False, + server_default=sa.text("true"), + comment="whether this model is available", + ), + ) + + +class ModelConfig(ModelConfigBase, table=True): + __tablename__ = "model_config" + __table_args__ = ( + sa.UniqueConstraint("provider", "model_name"), + {"schema": "global"}, + ) + + id: int | None = Field( + default=None, + sa_column=sa.Column( + sa.Integer, + primary_key=True, + comment="unique identifier for model config table", + ), + ) + + inserted_at: datetime = Field( + default_factory=now, + sa_column=sa.Column( + sa.DateTime, + default=now, + nullable=False, + comment="timestamp when model configuration was created", + ), + ) + + updated_at: datetime = Field( + default_factory=now, + sa_column=sa.Column( + sa.DateTime, + default=now, + nullable=False, + onupdate=now, + comment="timestamp when model configuration was updated", + ), + ) + + +class ModelConfigPublic(ModelConfigBase): + id: int + inserted_at: datetime + updated_at: datetime + + +class ModelConfigListPublic(SQLModel): + data: list[ModelConfigPublic] + count: int From 2b9ffefacb77cf207adab4eeb1f62baaf316a600 Mon Sep 17 00:00:00 2001 From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com> Date: Mon, 13 Apr 2026 07:00:01 +0530 Subject: [PATCH 02/10] feat: add model_config table with default configurations --- ...ate_model_config_table.py => 051_create_model_config_table.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename backend/app/alembic/versions/{050_create_model_config_table.py => 051_create_model_config_table.py} (100%) diff --git a/backend/app/alembic/versions/050_create_model_config_table.py b/backend/app/alembic/versions/051_create_model_config_table.py similarity index 100% rename from backend/app/alembic/versions/050_create_model_config_table.py rename to backend/app/alembic/versions/051_create_model_config_table.py From 4b68444c6e1fc8053a0ca54230cf544c0d7b8a67 Mon Sep 17 00:00:00 2001 From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com> Date: Mon, 13 Apr 2026 07:02:35 +0530 Subject: [PATCH 03/10] fix: update revision identifiers for model_config table migration --- .../app/alembic/versions/051_create_model_config_table.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/app/alembic/versions/051_create_model_config_table.py b/backend/app/alembic/versions/051_create_model_config_table.py index aa5481c17..229523995 100644 --- a/backend/app/alembic/versions/051_create_model_config_table.py +++ b/backend/app/alembic/versions/051_create_model_config_table.py @@ -1,7 +1,7 @@ """create model_config table -Revision ID: 050 -Revises: 049 +Revision ID: 051 +Revises: 050 Create Date: 2026-03-12 00:00:00.000000 """ @@ -11,8 +11,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "050" -down_revision = "049" +revision = "051" +down_revision = "050" branch_labels = None depends_on = None From 6e322c9e8bb9b6986f19e439092031a19509b976 Mon Sep 17 00:00:00 2001 From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:44:38 +0530 Subject: [PATCH 04/10] feat: add tests for model configuration endpoints --- .../app/tests/api/routes/test_model_config.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 backend/app/tests/api/routes/test_model_config.py diff --git a/backend/app/tests/api/routes/test_model_config.py b/backend/app/tests/api/routes/test_model_config.py new file mode 100644 index 000000000..60beda986 --- /dev/null +++ b/backend/app/tests/api/routes/test_model_config.py @@ -0,0 +1,68 @@ +from fastapi.testclient import TestClient +from sqlmodel import Session + +from app.core.config import settings +from app.crud.model_config import get_default_model_for_type + + +def test_list_models( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True + assert body["data"]["count"] > 0 + assert all(m["is_active"] for m in body["data"]["data"]) + + +def test_list_models_filter_by_provider( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/?provider=openai&limit=5", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + data = response.json()["data"]["data"] + assert len(data) <= 5 + assert all(m["provider"] == "openai" for m in data) + + +def test_get_model( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/openai/gpt-4o", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + model = response.json()["data"] + assert model["provider"] == "openai" + assert model["model_name"] == "gpt-4o" + + +def test_get_model_not_found( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/openai/does-not-exist", + headers=superuser_token_headers, + ) + + assert response.status_code == 404 + assert response.json()["error"] == "Model not found" + + +def test_get_default_model_for_type(db: Session) -> None: + model = get_default_model_for_type(session=db, completion_type="text") + + assert model is not None + assert model.default_for == "text" + assert model.is_active is True From 244b43fb21a2e2a8d37f7014b2cd3ebcc903bc2c Mon Sep 17 00:00:00 2001 From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:37:57 +0530 Subject: [PATCH 05/10] refactor: simplify function definition for test_get_model --- backend/app/tests/api/routes/test_model_config.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backend/app/tests/api/routes/test_model_config.py b/backend/app/tests/api/routes/test_model_config.py index 60beda986..779d22e50 100644 --- a/backend/app/tests/api/routes/test_model_config.py +++ b/backend/app/tests/api/routes/test_model_config.py @@ -34,9 +34,7 @@ def test_list_models_filter_by_provider( assert all(m["provider"] == "openai" for m in data) -def test_get_model( - client: TestClient, superuser_token_headers: dict[str, str] -) -> None: +def test_get_model(client: TestClient, superuser_token_headers: dict[str, str]) -> None: response = client.get( f"{settings.API_V1_STR}/models/openai/gpt-4o", headers=superuser_token_headers, From 951f9ba89410f9b4e4e48728664bf2db75625d76 Mon Sep 17 00:00:00 2001 From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com> Date: Tue, 14 Apr 2026 14:20:51 +0530 Subject: [PATCH 06/10] feat: add endpoints to list models grouped by provider and list active providers --- .../versions/051_create_model_config_table.py | 52 ++++++++-------- .../docs/model_config/list_models_grouped.md | 45 ++++++++++++++ .../api/docs/model_config/list_providers.md | 16 +++++ backend/app/api/routes/model_config.py | 41 +++++++++++- backend/app/crud/model_config.py | 62 ++++++++++++++----- backend/app/models/model_config.py | 51 ++++++++++++--- .../app/tests/api/routes/test_model_config.py | 44 ++++++++++--- 7 files changed, 250 insertions(+), 61 deletions(-) create mode 100644 backend/app/api/docs/model_config/list_models_grouped.md create mode 100644 backend/app/api/docs/model_config/list_providers.md diff --git a/backend/app/alembic/versions/051_create_model_config_table.py b/backend/app/alembic/versions/051_create_model_config_table.py index 229523995..e913bcb5e 100644 --- a/backend/app/alembic/versions/051_create_model_config_table.py +++ b/backend/app/alembic/versions/051_create_model_config_table.py @@ -50,7 +50,7 @@ def upgrade(): postgresql.ARRAY(sa.String()), nullable=False, server_default="{}", - comment="supported input modalities: TEXT, IMAGE, PDF, AUDIO", + comment="supported input modalities: TEXT, IMAGE, FILES, AUDIO", ), sa.Column( "output_modalities", @@ -60,14 +60,12 @@ def upgrade(): comment="supported output modalities: TEXT, AUDIO", ), sa.Column( - "default_for", - sa.String(), + "pricing", + postgresql.JSONB(astext_type=sa.Text()), nullable=True, comment=( - "completion types this model is the default for. " - "e.g. [text, stt, tts]. " - "NULL means not a default. " - "Supported: text, stt, tts" + "pricing per 1M tokens in USD. " + "Structure: {response: {input_token_cost, output_token_cost}, batch: {input_token_cost, output_token_cost}}" ), ), sa.Column( @@ -97,27 +95,27 @@ def upgrade(): # Seed default model configurations op.execute( """ - INSERT INTO global.model_config (id, provider, model_name, config, input_modalities, output_modalities, default_for, is_active, inserted_at, updated_at) + INSERT INTO global.model_config (id, provider, model_name, config, input_modalities, output_modalities, pricing, is_active, inserted_at, updated_at) VALUES - (1, 'openai', 'gpt-4o-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (2, 'openai', 'gpt-4o', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', 'text', true, NOW(), NOW()), - (3, 'openai', 'gpt-4.1', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (4, 'openai', 'gpt-4.1-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (5, 'openai', 'gpt-4.1-nano', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (6, 'openai', 'o3-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (7, 'openai', 'o3', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (8, 'openai', 'o4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (9, 'openai', 'gpt-5', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (13, 'openai', 'gpt-5.1-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (14, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (15, 'openai', 'gpt-5.2-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (16, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (17, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (18, 'openai', 'gpt-5.4-2026-03-05', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()), - (19, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()) + (1, 'openai', 'gpt-4o-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.15, "output_token_cost": 0.6}, "batch": {"input_token_cost": 0.075, "output_token_cost": 0.3}}', true, NOW(), NOW()), + (2, 'openai', 'gpt-4o', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 10}, "batch": {"input_token_cost": 1.25, "output_token_cost": 5}}', true, NOW(), NOW()), + (3, 'openai', 'gpt-4.1', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()), + (4, 'openai', 'gpt-4.1-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.4, "output_token_cost": 1.6}, "batch": {"input_token_cost": 0.2, "output_token_cost": 0.8}}', true, NOW(), NOW()), + (5, 'openai', 'gpt-4.1-nano', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.1, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.05, "output_token_cost": 0.2}}', true, NOW(), NOW()), + (6, 'openai', 'o3-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()), + (7, 'openai', 'o3', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()), + (8, 'openai', 'o4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()), + (9, 'openai', 'gpt-5', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()), + (10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.25, "output_token_cost": 2}, "batch": {"input_token_cost": 0.125, "output_token_cost": 1}}', true, NOW(), NOW()), + (11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.05, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.025, "output_token_cost": 0.2}}', true, NOW(), NOW()), + (12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()), + (14, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()), + (16, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 21, "output_token_cost": 168}, "batch": {"input_token_cost": 10.5, "output_token_cost": 84}}', true, NOW(), NOW()), + (17, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()), + (18, 'openai', 'gpt-5.4', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 15}, "batch": {"input_token_cost": 1.25, "output_token_cost": 7.5}}', true, NOW(), NOW()), + (19, 'openai', 'gpt-5.4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.75, "output_token_cost": 4.5}, "batch": {"input_token_cost": 0.375, "output_token_cost": 2.25}}', true, NOW(), NOW()), + (20, 'openai', 'gpt-5.4-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.2, "output_token_cost": 1.25}, "batch": {"input_token_cost": 0.1, "output_token_cost": 0.625}}', true, NOW(), NOW()), + (21, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 30, "output_token_cost": 180}, "batch": {"input_token_cost": 15, "output_token_cost": 90}}', true, NOW(), NOW()) """ ) diff --git a/backend/app/api/docs/model_config/list_models_grouped.md b/backend/app/api/docs/model_config/list_models_grouped.md new file mode 100644 index 000000000..0e0b725c0 --- /dev/null +++ b/backend/app/api/docs/model_config/list_models_grouped.md @@ -0,0 +1,45 @@ +## Endpoint + +**GET** `/api/v1/models/grouped` + +Retrieve all active models grouped by provider. + +Returns a dictionary where each key is a provider and each value is a list of active model configurations for that provider. + +### Example Response + +```json +{ + "success": true, + "data": { + "openai": [ + { + "id": 2, + "provider": "openai", + "model_name": "gpt-4o", + "config": { + "temperature": { + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 2.0, + "description": "Controls randomness. Lower = more deterministic." + } + }, + "input_modalities": ["TEXT", "IMAGE"], + "output_modalities": ["TEXT"], + "pricing": { + "response": { + "input_token_cost": 2.5, + "output_token_cost": 10 + } + }, + "is_active": true, + "inserted_at": "2026-03-12T00:00:00", + "updated_at": "2026-03-12T00:00:00" + } + ], + "google": [] + } +} +``` diff --git a/backend/app/api/docs/model_config/list_providers.md b/backend/app/api/docs/model_config/list_providers.md new file mode 100644 index 000000000..aa498bf68 --- /dev/null +++ b/backend/app/api/docs/model_config/list_providers.md @@ -0,0 +1,16 @@ +## Endpoint + +**GET** `/api/v1/models/providers` + +Retrieve the list of providers that currently have active models. + +Returns provider names sorted in ascending order. + +### Example Response + +```json +{ + "success": true, + "data": ["google", "openai"] +} +``` diff --git a/backend/app/api/routes/model_config.py b/backend/app/api/routes/model_config.py index 7b7caf05f..33089eb80 100644 --- a/backend/app/api/routes/model_config.py +++ b/backend/app/api/routes/model_config.py @@ -1,9 +1,15 @@ import logging +from typing import Literal +from collections import defaultdict from fastapi import APIRouter, HTTPException from app.api.deps import AuthContextDep, SessionDep -from app.crud.model_config import get_active_models, get_model_config +from app.crud.model_config import ( + estimate_model_cost, + get_active_models, + get_model_config, +) from app.models import ModelConfigPublic, ModelConfigListPublic from app.utils import APIResponse, load_description @@ -32,7 +38,38 @@ def list_models( @router.get( - "/{provider}/{model_name:path}", + "/grouped", + response_model=APIResponse[dict[str, list[ModelConfigPublic]]], + description=load_description("model_config/list_models_grouped.md"), +) +def list_models_grouped( + session: SessionDep, + auth_context: AuthContextDep, +) -> APIResponse[dict[str, list[ModelConfigPublic]]]: + models = get_active_models(session=session, skip=0, limit=1000) + grouped: dict[str, list[ModelConfigPublic]] = defaultdict(list) + for model in models: + grouped[model.provider].append(model) + + return APIResponse.success_response(dict(grouped)) + + +@router.get( + "/providers", + response_model=APIResponse[list[str]], + description=load_description("model_config/list_providers.md"), +) +def list_providers( + session: SessionDep, + auth_context: AuthContextDep, +) -> APIResponse[list[str]]: + models = get_active_models(session=session, skip=0, limit=1000) + providers = sorted({model.provider for model in models}) + return APIResponse.success_response(providers) + + +@router.get( + "/{provider}/{model_name}", response_model=APIResponse[ModelConfigPublic], description=load_description("model_config/get_model.md"), ) diff --git a/backend/app/crud/model_config.py b/backend/app/crud/model_config.py index 546b051e3..603110127 100644 --- a/backend/app/crud/model_config.py +++ b/backend/app/crud/model_config.py @@ -1,5 +1,5 @@ import logging -from typing import Optional, Literal +from typing import Any, Optional, Literal from sqlmodel import Session, select @@ -8,21 +8,6 @@ logger = logging.getLogger(__name__) -def get_default_model_for_type( - session: Session, completion_type: Literal["text", "stt", "tts"] -) -> Optional[ModelConfig]: - statement = ( - select(ModelConfig) - .where( - ModelConfig.is_active == True, - ModelConfig.default_for == completion_type, - ) - .limit(1) - ) - - return session.exec(statement).first() - - def get_active_models( session: Session, provider: Literal["openai", "google"] | None = None, @@ -48,3 +33,48 @@ def get_model_config( ModelConfig.is_active == True, ) return session.exec(statement).first() + + +def estimate_model_cost( + session: Session, + provider: Literal["openai", "google"], + model_name: str, + input_tokens: int, + output_tokens: int, + tag: Literal["response", "batch"] = "response", +) -> Optional[dict[str, Any]]: + model = get_model_config(session=session, provider=provider, model_name=model_name) + if model is None or model.pricing is None: + return None + + if not isinstance(model.pricing, dict): + return None + + pricing_source: dict[str, Any] = model.pricing + tag_pricing = pricing_source.get(tag) + if not isinstance(tag_pricing, dict): + return None + + input_price = tag_pricing.get("input_token_cost") + output_price = tag_pricing.get("output_token_cost") + + if not isinstance(input_price, (int, float)) or not isinstance( + output_price, (int, float) + ): + return None + + input_cost = (input_tokens / 1_000_000) * float(input_price) + output_cost = (output_tokens / 1_000_000) * float(output_price) + total_cost = round(input_cost + output_cost, 4) + + return { + "provider": provider, + "model_name": model_name, + "tag": tag, + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "input_cost": input_cost, + "output_cost": output_cost, + "total_cost": total_cost, + "currency": "USD", + } diff --git a/backend/app/models/model_config.py b/backend/app/models/model_config.py index 73d2c2963..464c5f746 100644 --- a/backend/app/models/model_config.py +++ b/backend/app/models/model_config.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import Any, Literal +from typing import Any, Literal, Optional import sqlalchemy as sa from app.core.util import now @@ -35,7 +35,7 @@ class ModelConfigBase(SQLModel): ARRAY(sa.String), nullable=False, server_default="{}", - comment="supported input modalities: TEXT, IMAGE, PDF, AUDIO", + comment="supported input modalities: TEXT, IMAGE, FILES, AUDIO", ), ) @@ -49,17 +49,15 @@ class ModelConfigBase(SQLModel): ), ) - # NOTE: can we use this default_for column to help in routing? - default_for: Literal["text", "stt", "tts"] | None = Field( + pricing: Optional[dict[str, Any]] = Field( default=None, sa_column=sa.Column( - sa.String, + JSONB, nullable=True, comment=( - "completion types this model is the default for. " - "e.g. [text, stt, tts]. " - "NULL means not a default. " - "Supported: text, stt, tts" + "pricing per 1M tokens in USD. " + "Structure: {response: {input_token_cost, output_token_cost}, " + "batch: {input_token_cost, output_token_cost}}" ), ), ) @@ -122,3 +120,38 @@ class ModelConfigPublic(ModelConfigBase): class ModelConfigListPublic(SQLModel): data: list[ModelConfigPublic] count: int + + +# if __name__ == "__main__": +# import os + +# from sqlmodel import Session, create_engine + +# from app.crud.model_config import estimate_model_cost + +# database_url = "postgresql+psycopg://postgres:postgres@localhost:5432/kaapi" +# engine = create_engine(database_url) + +# with Session(engine) as session: +# input_tokens = 5000 +# output_tokens = 10000 + +# response_cost_info = estimate_model_cost( +# session=session, +# provider="openai", +# model_name="gpt-4o", +# input_tokens=input_tokens, +# output_tokens=output_tokens, +# tag="response", +# ) +# print(response_cost_info) + +# batch_cost_info = estimate_model_cost( +# session=session, +# provider="openai", +# model_name="gpt-4o", +# input_tokens=input_tokens, +# output_tokens=output_tokens, +# tag="batch", +# ) +# print(batch_cost_info) diff --git a/backend/app/tests/api/routes/test_model_config.py b/backend/app/tests/api/routes/test_model_config.py index 779d22e50..792918026 100644 --- a/backend/app/tests/api/routes/test_model_config.py +++ b/backend/app/tests/api/routes/test_model_config.py @@ -1,8 +1,6 @@ from fastapi.testclient import TestClient -from sqlmodel import Session from app.core.config import settings -from app.crud.model_config import get_default_model_for_type def test_list_models( @@ -58,9 +56,41 @@ def test_get_model_not_found( assert response.json()["error"] == "Model not found" -def test_get_default_model_for_type(db: Session) -> None: - model = get_default_model_for_type(session=db, completion_type="text") +def test_list_models_grouped( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/grouped", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True + + grouped_models = body["data"] + assert grouped_models + for provider, models in grouped_models.items(): + assert isinstance(provider, str) + assert isinstance(models, list) + assert all(model["provider"] == provider for model in models) + assert all(model["is_active"] for model in models) + + +def test_list_providers( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/providers", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True - assert model is not None - assert model.default_for == "text" - assert model.is_active is True + providers = body["data"] + assert isinstance(providers, list) + assert providers == sorted(providers) + assert len(providers) == len(set(providers)) + assert "openai" in providers From f07fb2facaa3e59b5cc76937bf8c93e688607995 Mon Sep 17 00:00:00 2001 From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com> Date: Tue, 14 Apr 2026 14:30:46 +0530 Subject: [PATCH 07/10] feat: create model_config table with initial seed data --- ...l_config_table.py => 052_create_model_config_table.py} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename backend/app/alembic/versions/{051_create_model_config_table.py => 052_create_model_config_table.py} (99%) diff --git a/backend/app/alembic/versions/051_create_model_config_table.py b/backend/app/alembic/versions/052_create_model_config_table.py similarity index 99% rename from backend/app/alembic/versions/051_create_model_config_table.py rename to backend/app/alembic/versions/052_create_model_config_table.py index e913bcb5e..4a6b1bff1 100644 --- a/backend/app/alembic/versions/051_create_model_config_table.py +++ b/backend/app/alembic/versions/052_create_model_config_table.py @@ -1,7 +1,7 @@ """create model_config table -Revision ID: 051 -Revises: 050 +Revision ID: 052 +Revises: 051 Create Date: 2026-03-12 00:00:00.000000 """ @@ -11,8 +11,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "051" -down_revision = "050" +revision = "052" +down_revision = "051" branch_labels = None depends_on = None From ee99e2eedf878c4b6666786cb5f42f2f3c27db6f Mon Sep 17 00:00:00 2001 From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com> Date: Wed, 15 Apr 2026 11:35:40 +0530 Subject: [PATCH 08/10] chore: comments resolved --- .../versions/052_create_model_config_table.py | 14 +- .../app/api/docs/model_config/get_model.md | 17 +- .../app/api/docs/model_config/list_models.md | 13 +- .../docs/model_config/list_models_grouped.md | 5 +- backend/app/api/routes/model_config.py | 22 +-- backend/app/crud/model_config.py | 27 ++- backend/app/models/model_config.py | 37 +--- backend/app/tests/crud/test_model_config.py | 162 ++++++++++++++++++ 8 files changed, 217 insertions(+), 80 deletions(-) create mode 100644 backend/app/tests/crud/test_model_config.py diff --git a/backend/app/alembic/versions/052_create_model_config_table.py b/backend/app/alembic/versions/052_create_model_config_table.py index 4a6b1bff1..e74b94641 100644 --- a/backend/app/alembic/versions/052_create_model_config_table.py +++ b/backend/app/alembic/versions/052_create_model_config_table.py @@ -109,13 +109,13 @@ def upgrade(): (10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.25, "output_token_cost": 2}, "batch": {"input_token_cost": 0.125, "output_token_cost": 1}}', true, NOW(), NOW()), (11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.05, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.025, "output_token_cost": 0.2}}', true, NOW(), NOW()), (12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()), - (14, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()), - (16, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 21, "output_token_cost": 168}, "batch": {"input_token_cost": 10.5, "output_token_cost": 84}}', true, NOW(), NOW()), - (17, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()), - (18, 'openai', 'gpt-5.4', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 15}, "batch": {"input_token_cost": 1.25, "output_token_cost": 7.5}}', true, NOW(), NOW()), - (19, 'openai', 'gpt-5.4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.75, "output_token_cost": 4.5}, "batch": {"input_token_cost": 0.375, "output_token_cost": 2.25}}', true, NOW(), NOW()), - (20, 'openai', 'gpt-5.4-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.2, "output_token_cost": 1.25}, "batch": {"input_token_cost": 0.1, "output_token_cost": 0.625}}', true, NOW(), NOW()), - (21, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 30, "output_token_cost": 180}, "batch": {"input_token_cost": 15, "output_token_cost": 90}}', true, NOW(), NOW()) + (13, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()), + (14, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 21, "output_token_cost": 168}, "batch": {"input_token_cost": 10.5, "output_token_cost": 84}}', true, NOW(), NOW()), + (15, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()), + (16, 'openai', 'gpt-5.4', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 15}, "batch": {"input_token_cost": 1.25, "output_token_cost": 7.5}}', true, NOW(), NOW()), + (17, 'openai', 'gpt-5.4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.75, "output_token_cost": 4.5}, "batch": {"input_token_cost": 0.375, "output_token_cost": 2.25}}', true, NOW(), NOW()), + (18, 'openai', 'gpt-5.4-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.2, "output_token_cost": 1.25}, "batch": {"input_token_cost": 0.1, "output_token_cost": 0.625}}', true, NOW(), NOW()), + (19, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 30, "output_token_cost": 180}, "batch": {"input_token_cost": 15, "output_token_cost": 90}}', true, NOW(), NOW()) """ ) diff --git a/backend/app/api/docs/model_config/get_model.md b/backend/app/api/docs/model_config/get_model.md index 0f4080081..b2f3700cb 100644 --- a/backend/app/api/docs/model_config/get_model.md +++ b/backend/app/api/docs/model_config/get_model.md @@ -4,7 +4,7 @@ Retrieve a specific model configuration by provider and model name. -Returns model details including supported config parameters, input/output modalities, and default assignment. +Returns model details including supported config parameters, input/output modalities, pricing, and active status. ### Path Parameters @@ -45,10 +45,23 @@ Returns model details including supported config parameters, input/output modali }, "input_modalities": ["TEXT", "IMAGE"], "output_modalities": ["TEXT"], - "default_for": "text", + "pricing": { + "response": { + "input_token_cost": 2.5, + "output_token_cost": 10 + }, + "batch": { + "input_token_cost": 1.25, + "output_token_cost": 5 + } + }, "is_active": true, "inserted_at": "2026-03-12T00:00:00", "updated_at": "2026-03-12T00:00:00" } } ``` + +### Error Response + +- `404 Not Found` — Model not found for the given `provider` and `model_name`. diff --git a/backend/app/api/docs/model_config/list_models.md b/backend/app/api/docs/model_config/list_models.md index 82eac1a91..412f0bfc3 100644 --- a/backend/app/api/docs/model_config/list_models.md +++ b/backend/app/api/docs/model_config/list_models.md @@ -4,7 +4,7 @@ Retrieve a list of all active model configurations. -Returns model details including provider, model name, supported config parameters, input/output modalities, and default assignment. +Returns model details including provider, model name, supported config parameters, input/output modalities, pricing, and active status. Optionally filter by provider (e.g. openai, google). @@ -50,7 +50,16 @@ Optionally filter by provider (e.g. openai, google). }, "input_modalities": ["TEXT", "IMAGE"], "output_modalities": ["TEXT"], - "default_for": null, + "pricing": { + "response": { + "input_token_cost": 0.15, + "output_token_cost": 0.6 + }, + "batch": { + "input_token_cost": 0.075, + "output_token_cost": 0.3 + } + }, "is_active": true, "inserted_at": "2026-03-12T00:00:00", "updated_at": "2026-03-12T00:00:00" diff --git a/backend/app/api/docs/model_config/list_models_grouped.md b/backend/app/api/docs/model_config/list_models_grouped.md index 0e0b725c0..eeb3a0246 100644 --- a/backend/app/api/docs/model_config/list_models_grouped.md +++ b/backend/app/api/docs/model_config/list_models_grouped.md @@ -4,7 +4,7 @@ Retrieve all active models grouped by provider. -Returns a dictionary where each key is a provider and each value is a list of active model configurations for that provider. +Returns a dictionary where each key is a provider present in active records, and each value is a list of active model configurations for that provider. ### Example Response @@ -38,8 +38,7 @@ Returns a dictionary where each key is a provider and each value is a list of ac "inserted_at": "2026-03-12T00:00:00", "updated_at": "2026-03-12T00:00:00" } - ], - "google": [] + ] } } ``` diff --git a/backend/app/api/routes/model_config.py b/backend/app/api/routes/model_config.py index 33089eb80..486528e0d 100644 --- a/backend/app/api/routes/model_config.py +++ b/backend/app/api/routes/model_config.py @@ -1,16 +1,11 @@ import logging -from typing import Literal from collections import defaultdict from fastapi import APIRouter, HTTPException -from app.api.deps import AuthContextDep, SessionDep -from app.crud.model_config import ( - estimate_model_cost, - get_active_models, - get_model_config, -) -from app.models import ModelConfigPublic, ModelConfigListPublic +from app.api.deps import SessionDep +from app.crud.model_config import get_model_config, list_active_model_configs +from app.models import ModelConfigListPublic, ModelConfigPublic from app.utils import APIResponse, load_description logger = logging.getLogger(__name__) @@ -24,12 +19,11 @@ ) def list_models( session: SessionDep, - auth_context: AuthContextDep, provider: str | None = None, skip: int = 0, limit: int = 100, ) -> APIResponse[ModelConfigListPublic]: - models = get_active_models( + models = list_active_model_configs( session=session, provider=provider, skip=skip, limit=limit ) return APIResponse.success_response( @@ -44,9 +38,8 @@ def list_models( ) def list_models_grouped( session: SessionDep, - auth_context: AuthContextDep, ) -> APIResponse[dict[str, list[ModelConfigPublic]]]: - models = get_active_models(session=session, skip=0, limit=1000) + models = list_active_model_configs(session=session, skip=0, limit=1000) grouped: dict[str, list[ModelConfigPublic]] = defaultdict(list) for model in models: grouped[model.provider].append(model) @@ -61,9 +54,8 @@ def list_models_grouped( ) def list_providers( session: SessionDep, - auth_context: AuthContextDep, ) -> APIResponse[list[str]]: - models = get_active_models(session=session, skip=0, limit=1000) + models = list_active_model_configs(session=session, skip=0, limit=1000) providers = sorted({model.provider for model in models}) return APIResponse.success_response(providers) @@ -74,7 +66,7 @@ def list_providers( description=load_description("model_config/get_model.md"), ) def get_model( - session: SessionDep, auth_context: AuthContextDep, provider: str, model_name: str + session: SessionDep, provider: str, model_name: str ) -> APIResponse[ModelConfigPublic]: model = get_model_config(session=session, provider=provider, model_name=model_name) diff --git a/backend/app/crud/model_config.py b/backend/app/crud/model_config.py index 603110127..2bf9cb674 100644 --- a/backend/app/crud/model_config.py +++ b/backend/app/crud/model_config.py @@ -1,20 +1,17 @@ -import logging -from typing import Any, Optional, Literal +from typing import Any, Literal from sqlmodel import Session, select from app.models import ModelConfig -logger = logging.getLogger(__name__) - -def get_active_models( +def list_active_model_configs( session: Session, provider: Literal["openai", "google"] | None = None, skip: int = 0, limit: int = 100, ) -> list[ModelConfig]: - statement = select(ModelConfig).where(ModelConfig.is_active == True) + statement = select(ModelConfig).where(ModelConfig.is_active) if provider: statement = statement.where(ModelConfig.provider == provider) @@ -26,11 +23,11 @@ def get_active_models( def get_model_config( session: Session, provider: Literal["openai", "google"], model_name: str -) -> Optional[ModelConfig]: +) -> ModelConfig | None: statement = select(ModelConfig).where( ModelConfig.provider == provider, ModelConfig.model_name == model_name, - ModelConfig.is_active == True, + ModelConfig.is_active, ) return session.exec(statement).first() @@ -41,8 +38,8 @@ def estimate_model_cost( model_name: str, input_tokens: int, output_tokens: int, - tag: Literal["response", "batch"] = "response", -) -> Optional[dict[str, Any]]: + usage_type: Literal["response", "batch"] = "response", +) -> dict[str, Any] | None: model = get_model_config(session=session, provider=provider, model_name=model_name) if model is None or model.pricing is None: return None @@ -51,12 +48,12 @@ def estimate_model_cost( return None pricing_source: dict[str, Any] = model.pricing - tag_pricing = pricing_source.get(tag) - if not isinstance(tag_pricing, dict): + usage_pricing = pricing_source.get(usage_type) + if not isinstance(usage_pricing, dict): return None - input_price = tag_pricing.get("input_token_cost") - output_price = tag_pricing.get("output_token_cost") + input_price = usage_pricing.get("input_token_cost") + output_price = usage_pricing.get("output_token_cost") if not isinstance(input_price, (int, float)) or not isinstance( output_price, (int, float) @@ -70,7 +67,7 @@ def estimate_model_cost( return { "provider": provider, "model_name": model_name, - "tag": tag, + "usage_type": usage_type, "input_tokens": input_tokens, "output_tokens": output_tokens, "input_cost": input_cost, diff --git a/backend/app/models/model_config.py b/backend/app/models/model_config.py index 464c5f746..b66c9efe6 100644 --- a/backend/app/models/model_config.py +++ b/backend/app/models/model_config.py @@ -4,7 +4,7 @@ import sqlalchemy as sa from app.core.util import now from sqlmodel import Field, SQLModel -from sqlalchemy.dialects.postgresql import JSONB, ARRAY +from sqlalchemy.dialects.postgresql import ARRAY, JSONB class ModelConfigBase(SQLModel): @@ -120,38 +120,3 @@ class ModelConfigPublic(ModelConfigBase): class ModelConfigListPublic(SQLModel): data: list[ModelConfigPublic] count: int - - -# if __name__ == "__main__": -# import os - -# from sqlmodel import Session, create_engine - -# from app.crud.model_config import estimate_model_cost - -# database_url = "postgresql+psycopg://postgres:postgres@localhost:5432/kaapi" -# engine = create_engine(database_url) - -# with Session(engine) as session: -# input_tokens = 5000 -# output_tokens = 10000 - -# response_cost_info = estimate_model_cost( -# session=session, -# provider="openai", -# model_name="gpt-4o", -# input_tokens=input_tokens, -# output_tokens=output_tokens, -# tag="response", -# ) -# print(response_cost_info) - -# batch_cost_info = estimate_model_cost( -# session=session, -# provider="openai", -# model_name="gpt-4o", -# input_tokens=input_tokens, -# output_tokens=output_tokens, -# tag="batch", -# ) -# print(batch_cost_info) diff --git a/backend/app/tests/crud/test_model_config.py b/backend/app/tests/crud/test_model_config.py new file mode 100644 index 000000000..be606f296 --- /dev/null +++ b/backend/app/tests/crud/test_model_config.py @@ -0,0 +1,162 @@ +from types import SimpleNamespace +from typing import Any + +import pytest + +from app.crud import model_config as model_config_crud + + +def _patch_model( + monkeypatch: pytest.MonkeyPatch, + pricing: Any, +) -> None: + model = SimpleNamespace(pricing=pricing) + monkeypatch.setattr( + model_config_crud, + "get_model_config", + lambda session, provider, model_name: model, + ) + + +def test_estimate_model_cost_response_success(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_model( + monkeypatch, + pricing={ + "response": {"input_token_cost": 2.5, "output_token_cost": 10.0}, + "batch": {"input_token_cost": 1.25, "output_token_cost": 5.0}, + }, + ) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1_000_000, + output_tokens=500_000, + usage_type="response", + ) + + assert result is not None + assert result["usage_type"] == "response" + assert result["input_cost"] == 2.5 + assert result["output_cost"] == 5.0 + assert result["total_cost"] == 7.5 + + +def test_estimate_model_cost_batch_success(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_model( + monkeypatch, + pricing={ + "response": {"input_token_cost": 2.5, "output_token_cost": 10.0}, + "batch": {"input_token_cost": 1.25, "output_token_cost": 5.0}, + }, + ) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1_000_000, + output_tokens=500_000, + usage_type="batch", + ) + + assert result is not None + assert result["usage_type"] == "batch" + assert result["input_cost"] == 1.25 + assert result["output_cost"] == 2.5 + assert result["total_cost"] == 3.75 + + +def test_estimate_model_cost_returns_none_for_missing_model( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + model_config_crud, + "get_model_config", + lambda session, provider, model_name: None, + ) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="does-not-exist", + input_tokens=1000, + output_tokens=1000, + ) + + assert result is None + + +def test_estimate_model_cost_returns_none_for_null_pricing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_model(monkeypatch, pricing=None) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1000, + output_tokens=1000, + ) + + assert result is None + + +def test_estimate_model_cost_returns_none_for_non_dict_pricing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_model(monkeypatch, pricing=["invalid"]) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1000, + output_tokens=1000, + ) + + assert result is None + + +def test_estimate_model_cost_returns_none_for_missing_usage_type( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_model( + monkeypatch, + pricing={"response": {"input_token_cost": 1.0, "output_token_cost": 2.0}}, + ) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1000, + output_tokens=1000, + usage_type="batch", + ) + + assert result is None + + +def test_estimate_model_cost_returns_none_for_non_numeric_prices( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_model( + monkeypatch, + pricing={ + "response": {"input_token_cost": "cheap", "output_token_cost": "expensive"} + }, + ) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1000, + output_tokens=1000, + usage_type="response", + ) + + assert result is None From ac87722fbe50ed6d1f648a6966a69f2a3b5bbdd8 Mon Sep 17 00:00:00 2001 From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com> Date: Wed, 15 Apr 2026 11:47:28 +0530 Subject: [PATCH 09/10] refactor: remove unnecessary logging and clean up imports in model_config --- backend/app/api/routes/model_config.py | 3 --- backend/app/models/model_config.py | 13 ++++++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/backend/app/api/routes/model_config.py b/backend/app/api/routes/model_config.py index 486528e0d..814c8c012 100644 --- a/backend/app/api/routes/model_config.py +++ b/backend/app/api/routes/model_config.py @@ -71,9 +71,6 @@ def get_model( model = get_model_config(session=session, provider=provider, model_name=model_name) if model is None: - logger.error( - f"[get_model] Model not found | provider={provider}, model_name={model_name}" - ) raise HTTPException(status_code=404, detail="Model not found") return APIResponse.success_response(model) diff --git a/backend/app/models/model_config.py b/backend/app/models/model_config.py index b66c9efe6..bc1d14115 100644 --- a/backend/app/models/model_config.py +++ b/backend/app/models/model_config.py @@ -1,17 +1,20 @@ from datetime import datetime -from typing import Any, Literal, Optional +from typing import Any, Literal import sqlalchemy as sa -from app.core.util import now -from sqlmodel import Field, SQLModel from sqlalchemy.dialects.postgresql import ARRAY, JSONB +from sqlmodel import Field, SQLModel + +from app.core.util import now class ModelConfigBase(SQLModel): provider: Literal["openai", "google"] = Field( default="openai", sa_column=sa.Column( - sa.String, nullable=False, comment="provider name (e.g. openai, google)" + sa.String, + nullable=False, + comment="provider name (e.g. openai, google)", ), ) @@ -49,7 +52,7 @@ class ModelConfigBase(SQLModel): ), ) - pricing: Optional[dict[str, Any]] = Field( + pricing: dict[str, Any] | None = Field( default=None, sa_column=sa.Column( JSONB, From 433581f00e62dc64b4426e6d1d3d75e4ae8e3e2b Mon Sep 17 00:00:00 2001 From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com> Date: Wed, 15 Apr 2026 13:08:47 +0530 Subject: [PATCH 10/10] feat: add pagination support and metadata for model listing endpoints --- .../app/api/docs/model_config/list_models.md | 5 +- .../docs/model_config/list_models_grouped.md | 12 ++++- backend/app/api/routes/model_config.py | 29 +++++++---- backend/app/crud/model_config.py | 22 +++++++- .../app/tests/api/routes/test_model_config.py | 52 +++++++++++++++++++ 5 files changed, 107 insertions(+), 13 deletions(-) diff --git a/backend/app/api/docs/model_config/list_models.md b/backend/app/api/docs/model_config/list_models.md index 412f0bfc3..321a3d673 100644 --- a/backend/app/api/docs/model_config/list_models.md +++ b/backend/app/api/docs/model_config/list_models.md @@ -12,13 +12,16 @@ Optionally filter by provider (e.g. openai, google). - **`provider`** (optional) — Filter by provider name (e.g. `openai`, `google`) - **`skip`** (optional, default 0) — Number of records to skip for pagination -- **`limit`** (optional, default 100) — Maximum number of records to return +- **`limit`** (optional, default 100, max 100) — Maximum number of records to return ### Example Response ```json { "success": true, + "metadata": { + "has_more": true + }, "data": { "data": [ { diff --git a/backend/app/api/docs/model_config/list_models_grouped.md b/backend/app/api/docs/model_config/list_models_grouped.md index eeb3a0246..706beba37 100644 --- a/backend/app/api/docs/model_config/list_models_grouped.md +++ b/backend/app/api/docs/model_config/list_models_grouped.md @@ -2,15 +2,23 @@ **GET** `/api/v1/models/grouped` -Retrieve all active models grouped by provider. +Retrieve active models grouped by provider. -Returns a dictionary where each key is a provider present in active records, and each value is a list of active model configurations for that provider. +Supports pagination of model rows before grouping: +- `skip` (default `0`) +- `limit` (default `100`, max `100`) + +Returns a dictionary where each key is a provider present in the paginated slice, and each value is a list of active model configurations for that provider. +Includes `metadata.has_more` when additional model rows exist. ### Example Response ```json { "success": true, + "metadata": { + "has_more": true + }, "data": { "openai": [ { diff --git a/backend/app/api/routes/model_config.py b/backend/app/api/routes/model_config.py index 814c8c012..565149c7e 100644 --- a/backend/app/api/routes/model_config.py +++ b/backend/app/api/routes/model_config.py @@ -1,10 +1,14 @@ import logging from collections import defaultdict -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, HTTPException, Query from app.api.deps import SessionDep -from app.crud.model_config import get_model_config, list_active_model_configs +from app.crud.model_config import ( + get_model_config, + list_active_model_configs, + list_all_active_model_configs, +) from app.models import ModelConfigListPublic, ModelConfigPublic from app.utils import APIResponse, load_description @@ -20,14 +24,15 @@ def list_models( session: SessionDep, provider: str | None = None, - skip: int = 0, - limit: int = 100, + skip: int = Query(0, ge=0, description="Number of records to skip"), + limit: int = Query(100, ge=1, le=100, description="Maximum records to return"), ) -> APIResponse[ModelConfigListPublic]: - models = list_active_model_configs( + models, has_more = list_active_model_configs( session=session, provider=provider, skip=skip, limit=limit ) return APIResponse.success_response( - ModelConfigListPublic(data=models, count=len(models)) + ModelConfigListPublic(data=models, count=len(models)), + metadata={"has_more": has_more}, ) @@ -38,13 +43,19 @@ def list_models( ) def list_models_grouped( session: SessionDep, + skip: int = Query(0, ge=0, description="Number of model records to skip"), + limit: int = Query( + 100, ge=1, le=100, description="Maximum model records to return before grouping" + ), ) -> APIResponse[dict[str, list[ModelConfigPublic]]]: - models = list_active_model_configs(session=session, skip=0, limit=1000) + models, has_more = list_active_model_configs( + session=session, skip=skip, limit=limit + ) grouped: dict[str, list[ModelConfigPublic]] = defaultdict(list) for model in models: grouped[model.provider].append(model) - return APIResponse.success_response(dict(grouped)) + return APIResponse.success_response(dict(grouped), metadata={"has_more": has_more}) @router.get( @@ -55,7 +66,7 @@ def list_models_grouped( def list_providers( session: SessionDep, ) -> APIResponse[list[str]]: - models = list_active_model_configs(session=session, skip=0, limit=1000) + models = list_all_active_model_configs(session=session) providers = sorted({model.provider for model in models}) return APIResponse.success_response(providers) diff --git a/backend/app/crud/model_config.py b/backend/app/crud/model_config.py index 2bf9cb674..fed1f71c7 100644 --- a/backend/app/crud/model_config.py +++ b/backend/app/crud/model_config.py @@ -10,6 +10,27 @@ def list_active_model_configs( provider: Literal["openai", "google"] | None = None, skip: int = 0, limit: int = 100, +) -> tuple[list[ModelConfig], bool]: + statement = select(ModelConfig).where(ModelConfig.is_active) + + if provider: + statement = statement.where(ModelConfig.provider == provider) + + statement = statement.order_by(ModelConfig.provider, ModelConfig.model_name) + statement = statement.offset(skip).limit(limit + 1) + models = list(session.exec(statement).all()) + + has_more = False + if len(models) > limit: + has_more = True + models = models[:limit] + + return models, has_more + + +def list_all_active_model_configs( + session: Session, + provider: Literal["openai", "google"] | None = None, ) -> list[ModelConfig]: statement = select(ModelConfig).where(ModelConfig.is_active) @@ -17,7 +38,6 @@ def list_active_model_configs( statement = statement.where(ModelConfig.provider == provider) statement = statement.order_by(ModelConfig.provider, ModelConfig.model_name) - statement = statement.offset(skip).limit(limit) return list(session.exec(statement).all()) diff --git a/backend/app/tests/api/routes/test_model_config.py b/backend/app/tests/api/routes/test_model_config.py index 792918026..4f111194e 100644 --- a/backend/app/tests/api/routes/test_model_config.py +++ b/backend/app/tests/api/routes/test_model_config.py @@ -14,10 +14,26 @@ def test_list_models( assert response.status_code == 200 body = response.json() assert body["success"] is True + assert "has_more" in body["metadata"] assert body["data"]["count"] > 0 assert all(m["is_active"] for m in body["data"]["data"]) +def test_list_models_has_more( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/?skip=0&limit=1", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True + assert body["data"]["count"] == 1 + assert body["metadata"]["has_more"] is True + + def test_list_models_filter_by_provider( client: TestClient, superuser_token_headers: dict[str, str] ) -> None: @@ -32,6 +48,16 @@ def test_list_models_filter_by_provider( assert all(m["provider"] == "openai" for m in data) +def test_list_models_invalid_limit( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/?skip=0&limit=0", + headers=superuser_token_headers, + ) + assert response.status_code == 422 + + def test_get_model(client: TestClient, superuser_token_headers: dict[str, str]) -> None: response = client.get( f"{settings.API_V1_STR}/models/openai/gpt-4o", @@ -67,6 +93,7 @@ def test_list_models_grouped( assert response.status_code == 200 body = response.json() assert body["success"] is True + assert "has_more" in body["metadata"] grouped_models = body["data"] assert grouped_models @@ -77,6 +104,31 @@ def test_list_models_grouped( assert all(model["is_active"] for model in models) +def test_list_models_grouped_has_more( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/grouped?skip=0&limit=1", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True + assert body["metadata"]["has_more"] is True + + +def test_list_models_grouped_invalid_limit( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/grouped?skip=0&limit=0", + headers=superuser_token_headers, + ) + + assert response.status_code == 422 + + def test_list_providers( client: TestClient, superuser_token_headers: dict[str, str] ) -> None: