diff --git a/backend/app/alembic/versions/052_create_model_config_table.py b/backend/app/alembic/versions/052_create_model_config_table.py new file mode 100644 index 000000000..e74b94641 --- /dev/null +++ b/backend/app/alembic/versions/052_create_model_config_table.py @@ -0,0 +1,130 @@ +"""create model_config table + +Revision ID: 052 +Revises: 051 +Create Date: 2026-03-12 00:00:00.000000 + +""" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "052" +down_revision = "051" +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table( + "model_config", + sa.Column( + "id", + sa.Integer(), + sa.Identity(always=False), + nullable=False, + comment="unique identifier for model config table", + ), + sa.Column( + "provider", + sa.String(), + nullable=False, + comment="provider name (e.g. openai, google)", + ), + sa.Column( + "model_name", + sa.String(), + nullable=False, + comment="model name (e.g. gpt-4o, gemini-3-flash-preview)", + ), + sa.Column( + "config", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + comment="model adhoc configuration", + ), + sa.Column( + "input_modalities", + postgresql.ARRAY(sa.String()), + nullable=False, + server_default="{}", + comment="supported input modalities: TEXT, IMAGE, FILES, AUDIO", + ), + sa.Column( + "output_modalities", + postgresql.ARRAY(sa.String()), + nullable=False, + server_default="{}", + comment="supported output modalities: TEXT, AUDIO", + ), + sa.Column( + "pricing", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + comment=( + "pricing per 1M tokens in USD. " + "Structure: {response: {input_token_cost, output_token_cost}, batch: {input_token_cost, output_token_cost}}" + ), + ), + sa.Column( + "is_active", + sa.Boolean(), + nullable=False, + server_default=sa.text("true"), + comment="whether this model is available", + ), + sa.Column( + "inserted_at", + sa.DateTime(), + nullable=False, + comment="timestamp when model configuration was created", + ), + sa.Column( + "updated_at", + sa.DateTime(), + nullable=False, + comment="timestamp when model configuration was updated", + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("provider", "model_name"), + schema="global", + ) + + # Seed default model configurations + op.execute( + """ + INSERT INTO global.model_config (id, provider, model_name, config, input_modalities, output_modalities, pricing, is_active, inserted_at, updated_at) + VALUES + (1, 'openai', 'gpt-4o-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.15, "output_token_cost": 0.6}, "batch": {"input_token_cost": 0.075, "output_token_cost": 0.3}}', true, NOW(), NOW()), + (2, 'openai', 'gpt-4o', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 10}, "batch": {"input_token_cost": 1.25, "output_token_cost": 5}}', true, NOW(), NOW()), + (3, 'openai', 'gpt-4.1', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()), + (4, 'openai', 'gpt-4.1-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.4, "output_token_cost": 1.6}, "batch": {"input_token_cost": 0.2, "output_token_cost": 0.8}}', true, NOW(), NOW()), + (5, 'openai', 'gpt-4.1-nano', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.1, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.05, "output_token_cost": 0.2}}', true, NOW(), NOW()), + (6, 'openai', 'o3-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()), + (7, 'openai', 'o3', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()), + (8, 'openai', 'o4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()), + (9, 'openai', 'gpt-5', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()), + (10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.25, "output_token_cost": 2}, "batch": {"input_token_cost": 0.125, "output_token_cost": 1}}', true, NOW(), NOW()), + (11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.05, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.025, "output_token_cost": 0.2}}', true, NOW(), NOW()), + (12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()), + (13, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()), + (14, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 21, "output_token_cost": 168}, "batch": {"input_token_cost": 10.5, "output_token_cost": 84}}', true, NOW(), NOW()), + (15, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()), + (16, 'openai', 'gpt-5.4', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 15}, "batch": {"input_token_cost": 1.25, "output_token_cost": 7.5}}', true, NOW(), NOW()), + (17, 'openai', 'gpt-5.4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.75, "output_token_cost": 4.5}, "batch": {"input_token_cost": 0.375, "output_token_cost": 2.25}}', true, NOW(), NOW()), + (18, 'openai', 'gpt-5.4-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.2, "output_token_cost": 1.25}, "batch": {"input_token_cost": 0.1, "output_token_cost": 0.625}}', true, NOW(), NOW()), + (19, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 30, "output_token_cost": 180}, "batch": {"input_token_cost": 15, "output_token_cost": 90}}', true, NOW(), NOW()) + """ + ) + + # Reset the id sequence to continue after the last seeded id + op.execute( + "SELECT setval(pg_get_serial_sequence('global.model_config', 'id'), " + "(SELECT MAX(id) FROM global.model_config))" + ) + + +def downgrade(): + op.drop_table("model_config", schema="global") diff --git a/backend/app/api/docs/model_config/get_model.md b/backend/app/api/docs/model_config/get_model.md new file mode 100644 index 000000000..b2f3700cb --- /dev/null +++ b/backend/app/api/docs/model_config/get_model.md @@ -0,0 +1,67 @@ +## Endpoint + +**GET** `/api/v1/models/{provider}/{model_name}` + +Retrieve a specific model configuration by provider and model name. + +Returns model details including supported config parameters, input/output modalities, pricing, and active status. + +### Path Parameters + +- **`provider`** (required) — Provider name (e.g. `openai`, `google`) +- **`model_name`** (required) — Model name (e.g. `gpt-4o`, `gpt-4o-mini`) + +### Example Response + +```json +{ + "success": true, + "data": { + "id": 2, + "provider": "openai", + "model_name": "gpt-4o", + "config": { + "temperature": { + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 2.0, + "description": "Controls randomness. Lower = more deterministic." + }, + "top_p": { + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 1.0, + "description": "Nucleus sampling. Use either this or temperature, not both." + }, + "max_output_tokens": { + "type": "int", + "default": 2048, + "min": 1, + "max": 32768, + "description": "Max tokens in the response." + } + }, + "input_modalities": ["TEXT", "IMAGE"], + "output_modalities": ["TEXT"], + "pricing": { + "response": { + "input_token_cost": 2.5, + "output_token_cost": 10 + }, + "batch": { + "input_token_cost": 1.25, + "output_token_cost": 5 + } + }, + "is_active": true, + "inserted_at": "2026-03-12T00:00:00", + "updated_at": "2026-03-12T00:00:00" + } +} +``` + +### Error Response + +- `404 Not Found` — Model not found for the given `provider` and `model_name`. diff --git a/backend/app/api/docs/model_config/list_models.md b/backend/app/api/docs/model_config/list_models.md new file mode 100644 index 000000000..321a3d673 --- /dev/null +++ b/backend/app/api/docs/model_config/list_models.md @@ -0,0 +1,74 @@ +## Endpoint + +**GET** `/api/v1/models` + +Retrieve a list of all active model configurations. + +Returns model details including provider, model name, supported config parameters, input/output modalities, pricing, and active status. + +Optionally filter by provider (e.g. openai, google). + +### Query Parameters + +- **`provider`** (optional) — Filter by provider name (e.g. `openai`, `google`) +- **`skip`** (optional, default 0) — Number of records to skip for pagination +- **`limit`** (optional, default 100, max 100) — Maximum number of records to return + +### Example Response + +```json +{ + "success": true, + "metadata": { + "has_more": true + }, + "data": { + "data": [ + { + "id": 1, + "provider": "openai", + "model_name": "gpt-4o-mini", + "config": { + "temperature": { + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 2.0, + "description": "Controls randomness. Lower = more deterministic." + }, + "top_p": { + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 1.0, + "description": "Nucleus sampling. Use either this or temperature, not both." + }, + "max_output_tokens": { + "type": "int", + "default": 2048, + "min": 1, + "max": 32768, + "description": "Max tokens in the response." + } + }, + "input_modalities": ["TEXT", "IMAGE"], + "output_modalities": ["TEXT"], + "pricing": { + "response": { + "input_token_cost": 0.15, + "output_token_cost": 0.6 + }, + "batch": { + "input_token_cost": 0.075, + "output_token_cost": 0.3 + } + }, + "is_active": true, + "inserted_at": "2026-03-12T00:00:00", + "updated_at": "2026-03-12T00:00:00" + } + ], + "count": 1 + } +} +``` diff --git a/backend/app/api/docs/model_config/list_models_grouped.md b/backend/app/api/docs/model_config/list_models_grouped.md new file mode 100644 index 000000000..706beba37 --- /dev/null +++ b/backend/app/api/docs/model_config/list_models_grouped.md @@ -0,0 +1,52 @@ +## Endpoint + +**GET** `/api/v1/models/grouped` + +Retrieve active models grouped by provider. + +Supports pagination of model rows before grouping: +- `skip` (default `0`) +- `limit` (default `100`, max `100`) + +Returns a dictionary where each key is a provider present in the paginated slice, and each value is a list of active model configurations for that provider. +Includes `metadata.has_more` when additional model rows exist. + +### Example Response + +```json +{ + "success": true, + "metadata": { + "has_more": true + }, + "data": { + "openai": [ + { + "id": 2, + "provider": "openai", + "model_name": "gpt-4o", + "config": { + "temperature": { + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 2.0, + "description": "Controls randomness. Lower = more deterministic." + } + }, + "input_modalities": ["TEXT", "IMAGE"], + "output_modalities": ["TEXT"], + "pricing": { + "response": { + "input_token_cost": 2.5, + "output_token_cost": 10 + } + }, + "is_active": true, + "inserted_at": "2026-03-12T00:00:00", + "updated_at": "2026-03-12T00:00:00" + } + ] + } +} +``` diff --git a/backend/app/api/docs/model_config/list_providers.md b/backend/app/api/docs/model_config/list_providers.md new file mode 100644 index 000000000..aa498bf68 --- /dev/null +++ b/backend/app/api/docs/model_config/list_providers.md @@ -0,0 +1,16 @@ +## Endpoint + +**GET** `/api/v1/models/providers` + +Retrieve the list of providers that currently have active models. + +Returns provider names sorted in ascending order. + +### Example Response + +```json +{ + "success": true, + "data": ["google", "openai"] +} +``` diff --git a/backend/app/api/main.py b/backend/app/api/main.py index 98ce324c5..292e4d1b7 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -27,6 +27,7 @@ fine_tuning, model_evaluation, collection_job, + model_config, ) from app.api.routes import evaluations from app.core.config import settings @@ -58,7 +59,7 @@ api_router.include_router(utils.router) api_router.include_router(fine_tuning.router) api_router.include_router(model_evaluation.router) - +api_router.include_router(model_config.router) if settings.ENVIRONMENT in ["development", "testing"]: api_router.include_router(private.router) diff --git a/backend/app/api/routes/model_config.py b/backend/app/api/routes/model_config.py new file mode 100644 index 000000000..565149c7e --- /dev/null +++ b/backend/app/api/routes/model_config.py @@ -0,0 +1,87 @@ +import logging +from collections import defaultdict + +from fastapi import APIRouter, HTTPException, Query + +from app.api.deps import SessionDep +from app.crud.model_config import ( + get_model_config, + list_active_model_configs, + list_all_active_model_configs, +) +from app.models import ModelConfigListPublic, ModelConfigPublic +from app.utils import APIResponse, load_description + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/models", tags=["Model Config"]) + + +@router.get( + "/", + response_model=APIResponse[ModelConfigListPublic], + description=load_description("model_config/list_models.md"), +) +def list_models( + session: SessionDep, + provider: str | None = None, + skip: int = Query(0, ge=0, description="Number of records to skip"), + limit: int = Query(100, ge=1, le=100, description="Maximum records to return"), +) -> APIResponse[ModelConfigListPublic]: + models, has_more = list_active_model_configs( + session=session, provider=provider, skip=skip, limit=limit + ) + return APIResponse.success_response( + ModelConfigListPublic(data=models, count=len(models)), + metadata={"has_more": has_more}, + ) + + +@router.get( + "/grouped", + response_model=APIResponse[dict[str, list[ModelConfigPublic]]], + description=load_description("model_config/list_models_grouped.md"), +) +def list_models_grouped( + session: SessionDep, + skip: int = Query(0, ge=0, description="Number of model records to skip"), + limit: int = Query( + 100, ge=1, le=100, description="Maximum model records to return before grouping" + ), +) -> APIResponse[dict[str, list[ModelConfigPublic]]]: + models, has_more = list_active_model_configs( + session=session, skip=skip, limit=limit + ) + grouped: dict[str, list[ModelConfigPublic]] = defaultdict(list) + for model in models: + grouped[model.provider].append(model) + + return APIResponse.success_response(dict(grouped), metadata={"has_more": has_more}) + + +@router.get( + "/providers", + response_model=APIResponse[list[str]], + description=load_description("model_config/list_providers.md"), +) +def list_providers( + session: SessionDep, +) -> APIResponse[list[str]]: + models = list_all_active_model_configs(session=session) + providers = sorted({model.provider for model in models}) + return APIResponse.success_response(providers) + + +@router.get( + "/{provider}/{model_name}", + response_model=APIResponse[ModelConfigPublic], + description=load_description("model_config/get_model.md"), +) +def get_model( + session: SessionDep, provider: str, model_name: str +) -> APIResponse[ModelConfigPublic]: + model = get_model_config(session=session, provider=provider, model_name=model_name) + + if model is None: + raise HTTPException(status_code=404, detail="Model not found") + + return APIResponse.success_response(model) diff --git a/backend/app/crud/model_config.py b/backend/app/crud/model_config.py new file mode 100644 index 000000000..fed1f71c7 --- /dev/null +++ b/backend/app/crud/model_config.py @@ -0,0 +1,97 @@ +from typing import Any, Literal + +from sqlmodel import Session, select + +from app.models import ModelConfig + + +def list_active_model_configs( + session: Session, + provider: Literal["openai", "google"] | None = None, + skip: int = 0, + limit: int = 100, +) -> tuple[list[ModelConfig], bool]: + statement = select(ModelConfig).where(ModelConfig.is_active) + + if provider: + statement = statement.where(ModelConfig.provider == provider) + + statement = statement.order_by(ModelConfig.provider, ModelConfig.model_name) + statement = statement.offset(skip).limit(limit + 1) + models = list(session.exec(statement).all()) + + has_more = False + if len(models) > limit: + has_more = True + models = models[:limit] + + return models, has_more + + +def list_all_active_model_configs( + session: Session, + provider: Literal["openai", "google"] | None = None, +) -> list[ModelConfig]: + statement = select(ModelConfig).where(ModelConfig.is_active) + + if provider: + statement = statement.where(ModelConfig.provider == provider) + + statement = statement.order_by(ModelConfig.provider, ModelConfig.model_name) + return list(session.exec(statement).all()) + + +def get_model_config( + session: Session, provider: Literal["openai", "google"], model_name: str +) -> ModelConfig | None: + statement = select(ModelConfig).where( + ModelConfig.provider == provider, + ModelConfig.model_name == model_name, + ModelConfig.is_active, + ) + return session.exec(statement).first() + + +def estimate_model_cost( + session: Session, + provider: Literal["openai", "google"], + model_name: str, + input_tokens: int, + output_tokens: int, + usage_type: Literal["response", "batch"] = "response", +) -> dict[str, Any] | None: + model = get_model_config(session=session, provider=provider, model_name=model_name) + if model is None or model.pricing is None: + return None + + if not isinstance(model.pricing, dict): + return None + + pricing_source: dict[str, Any] = model.pricing + usage_pricing = pricing_source.get(usage_type) + if not isinstance(usage_pricing, dict): + return None + + input_price = usage_pricing.get("input_token_cost") + output_price = usage_pricing.get("output_token_cost") + + if not isinstance(input_price, (int, float)) or not isinstance( + output_price, (int, float) + ): + return None + + input_cost = (input_tokens / 1_000_000) * float(input_price) + output_cost = (output_tokens / 1_000_000) * float(output_price) + total_cost = round(input_cost + output_cost, 4) + + return { + "provider": provider, + "model_name": model_name, + "usage_type": usage_type, + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "input_cost": input_cost, + "output_cost": output_cost, + "total_cost": total_cost, + "currency": "USD", + } diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index 75fc97d9f..d3fc55a7f 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -127,6 +127,14 @@ ) from .message import Message + +from .model_config import ( + ModelConfig, + ModelConfigBase, + ModelConfigListPublic, + ModelConfigPublic, +) + from .model_evaluation import ( ModelEvaluation, ModelEvaluationBase, diff --git a/backend/app/models/model_config.py b/backend/app/models/model_config.py new file mode 100644 index 000000000..bc1d14115 --- /dev/null +++ b/backend/app/models/model_config.py @@ -0,0 +1,125 @@ +from datetime import datetime +from typing import Any, Literal + +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import ARRAY, JSONB +from sqlmodel import Field, SQLModel + +from app.core.util import now + + +class ModelConfigBase(SQLModel): + provider: Literal["openai", "google"] = Field( + default="openai", + sa_column=sa.Column( + sa.String, + nullable=False, + comment="provider name (e.g. openai, google)", + ), + ) + + model_name: str = Field( + ..., + sa_column=sa.Column( + sa.String, + nullable=False, + comment="model name (e.g. gpt-4o, gemini-3-flash-preview)", + ), + ) + + config: dict[str, Any] = Field( + default_factory=dict, + sa_column=sa.Column(JSONB, nullable=False, comment="model adhoc configuration"), + ) + + input_modalities: list[str] = Field( + default_factory=list, + sa_column=sa.Column( + ARRAY(sa.String), + nullable=False, + server_default="{}", + comment="supported input modalities: TEXT, IMAGE, FILES, AUDIO", + ), + ) + + output_modalities: list[str] = Field( + default_factory=list, + sa_column=sa.Column( + ARRAY(sa.String), + nullable=False, + server_default="{}", + comment="supported output modalities: TEXT, AUDIO", + ), + ) + + pricing: dict[str, Any] | None = Field( + default=None, + sa_column=sa.Column( + JSONB, + nullable=True, + comment=( + "pricing per 1M tokens in USD. " + "Structure: {response: {input_token_cost, output_token_cost}, " + "batch: {input_token_cost, output_token_cost}}" + ), + ), + ) + + is_active: bool = Field( + default=True, + sa_column=sa.Column( + sa.Boolean, + nullable=False, + server_default=sa.text("true"), + comment="whether this model is available", + ), + ) + + +class ModelConfig(ModelConfigBase, table=True): + __tablename__ = "model_config" + __table_args__ = ( + sa.UniqueConstraint("provider", "model_name"), + {"schema": "global"}, + ) + + id: int | None = Field( + default=None, + sa_column=sa.Column( + sa.Integer, + primary_key=True, + comment="unique identifier for model config table", + ), + ) + + inserted_at: datetime = Field( + default_factory=now, + sa_column=sa.Column( + sa.DateTime, + default=now, + nullable=False, + comment="timestamp when model configuration was created", + ), + ) + + updated_at: datetime = Field( + default_factory=now, + sa_column=sa.Column( + sa.DateTime, + default=now, + nullable=False, + onupdate=now, + comment="timestamp when model configuration was updated", + ), + ) + + +class ModelConfigPublic(ModelConfigBase): + id: int + inserted_at: datetime + updated_at: datetime + + +class ModelConfigListPublic(SQLModel): + data: list[ModelConfigPublic] + count: int diff --git a/backend/app/tests/api/routes/test_model_config.py b/backend/app/tests/api/routes/test_model_config.py new file mode 100644 index 000000000..4f111194e --- /dev/null +++ b/backend/app/tests/api/routes/test_model_config.py @@ -0,0 +1,148 @@ +from fastapi.testclient import TestClient + +from app.core.config import settings + + +def test_list_models( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True + assert "has_more" in body["metadata"] + assert body["data"]["count"] > 0 + assert all(m["is_active"] for m in body["data"]["data"]) + + +def test_list_models_has_more( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/?skip=0&limit=1", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True + assert body["data"]["count"] == 1 + assert body["metadata"]["has_more"] is True + + +def test_list_models_filter_by_provider( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/?provider=openai&limit=5", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + data = response.json()["data"]["data"] + assert len(data) <= 5 + assert all(m["provider"] == "openai" for m in data) + + +def test_list_models_invalid_limit( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/?skip=0&limit=0", + headers=superuser_token_headers, + ) + assert response.status_code == 422 + + +def test_get_model(client: TestClient, superuser_token_headers: dict[str, str]) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/openai/gpt-4o", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + model = response.json()["data"] + assert model["provider"] == "openai" + assert model["model_name"] == "gpt-4o" + + +def test_get_model_not_found( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/openai/does-not-exist", + headers=superuser_token_headers, + ) + + assert response.status_code == 404 + assert response.json()["error"] == "Model not found" + + +def test_list_models_grouped( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/grouped", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True + assert "has_more" in body["metadata"] + + grouped_models = body["data"] + assert grouped_models + for provider, models in grouped_models.items(): + assert isinstance(provider, str) + assert isinstance(models, list) + assert all(model["provider"] == provider for model in models) + assert all(model["is_active"] for model in models) + + +def test_list_models_grouped_has_more( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/grouped?skip=0&limit=1", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True + assert body["metadata"]["has_more"] is True + + +def test_list_models_grouped_invalid_limit( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/grouped?skip=0&limit=0", + headers=superuser_token_headers, + ) + + assert response.status_code == 422 + + +def test_list_providers( + client: TestClient, superuser_token_headers: dict[str, str] +) -> None: + response = client.get( + f"{settings.API_V1_STR}/models/providers", + headers=superuser_token_headers, + ) + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True + + providers = body["data"] + assert isinstance(providers, list) + assert providers == sorted(providers) + assert len(providers) == len(set(providers)) + assert "openai" in providers diff --git a/backend/app/tests/crud/test_model_config.py b/backend/app/tests/crud/test_model_config.py new file mode 100644 index 000000000..be606f296 --- /dev/null +++ b/backend/app/tests/crud/test_model_config.py @@ -0,0 +1,162 @@ +from types import SimpleNamespace +from typing import Any + +import pytest + +from app.crud import model_config as model_config_crud + + +def _patch_model( + monkeypatch: pytest.MonkeyPatch, + pricing: Any, +) -> None: + model = SimpleNamespace(pricing=pricing) + monkeypatch.setattr( + model_config_crud, + "get_model_config", + lambda session, provider, model_name: model, + ) + + +def test_estimate_model_cost_response_success(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_model( + monkeypatch, + pricing={ + "response": {"input_token_cost": 2.5, "output_token_cost": 10.0}, + "batch": {"input_token_cost": 1.25, "output_token_cost": 5.0}, + }, + ) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1_000_000, + output_tokens=500_000, + usage_type="response", + ) + + assert result is not None + assert result["usage_type"] == "response" + assert result["input_cost"] == 2.5 + assert result["output_cost"] == 5.0 + assert result["total_cost"] == 7.5 + + +def test_estimate_model_cost_batch_success(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_model( + monkeypatch, + pricing={ + "response": {"input_token_cost": 2.5, "output_token_cost": 10.0}, + "batch": {"input_token_cost": 1.25, "output_token_cost": 5.0}, + }, + ) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1_000_000, + output_tokens=500_000, + usage_type="batch", + ) + + assert result is not None + assert result["usage_type"] == "batch" + assert result["input_cost"] == 1.25 + assert result["output_cost"] == 2.5 + assert result["total_cost"] == 3.75 + + +def test_estimate_model_cost_returns_none_for_missing_model( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + model_config_crud, + "get_model_config", + lambda session, provider, model_name: None, + ) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="does-not-exist", + input_tokens=1000, + output_tokens=1000, + ) + + assert result is None + + +def test_estimate_model_cost_returns_none_for_null_pricing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_model(monkeypatch, pricing=None) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1000, + output_tokens=1000, + ) + + assert result is None + + +def test_estimate_model_cost_returns_none_for_non_dict_pricing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_model(monkeypatch, pricing=["invalid"]) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1000, + output_tokens=1000, + ) + + assert result is None + + +def test_estimate_model_cost_returns_none_for_missing_usage_type( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_model( + monkeypatch, + pricing={"response": {"input_token_cost": 1.0, "output_token_cost": 2.0}}, + ) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1000, + output_tokens=1000, + usage_type="batch", + ) + + assert result is None + + +def test_estimate_model_cost_returns_none_for_non_numeric_prices( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_model( + monkeypatch, + pricing={ + "response": {"input_token_cost": "cheap", "output_token_cost": "expensive"} + }, + ) + + result = model_config_crud.estimate_model_cost( + session=None, # type: ignore[arg-type] + provider="openai", + model_name="gpt-4o", + input_tokens=1000, + output_tokens=1000, + usage_type="response", + ) + + assert result is None