From df0e840b198d12340dc7766f5dc6bb15665816d9 Mon Sep 17 00:00:00 2001
From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com>
Date: Thu, 12 Mar 2026 09:47:38 +0530
Subject: [PATCH 01/10] Model Config: Add model configuration table and API
 endpoints

---
 .../versions/050_create_model_config_table.py | 132 ++++++++++++++++++
 .../app/api/docs/model_config/get_model.md    |  54 +++++++
 .../app/api/docs/model_config/list_models.md  |  62 ++++++++
 backend/app/api/main.py                       |   3 +-
 backend/app/api/routes/model_config.py        |  50 +++++++
 backend/app/crud/model_config.py              |  50 +++++++
 backend/app/models/__init__.py                |   8 ++
 backend/app/models/model_config.py            | 124 ++++++++++++++++
 8 files changed, 482 insertions(+), 1 deletion(-)
 create mode 100644 backend/app/alembic/versions/050_create_model_config_table.py
 create mode 100644 backend/app/api/docs/model_config/get_model.md
 create mode 100644 backend/app/api/docs/model_config/list_models.md
 create mode 100644 backend/app/api/routes/model_config.py
 create mode 100644 backend/app/crud/model_config.py
 create mode 100644 backend/app/models/model_config.py

diff --git a/backend/app/alembic/versions/050_create_model_config_table.py b/backend/app/alembic/versions/050_create_model_config_table.py
new file mode 100644
index 000000000..aa5481c17
--- /dev/null
+++ b/backend/app/alembic/versions/050_create_model_config_table.py
@@ -0,0 +1,132 @@
+"""create model_config table
+
+Revision ID: 050
+Revises: 049
+Create Date: 2026-03-12 00:00:00.000000
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "050"
+down_revision = "049"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.create_table(
+        "model_config",
+        sa.Column(
+            "id",
+            sa.Integer(),
+            sa.Identity(always=False),
+            nullable=False,
+            comment="unique identifier for model config table",
+        ),
+        sa.Column(
+            "provider",
+            sa.String(),
+            nullable=False,
+            comment="provider name (e.g. openai, google)",
+        ),
+        sa.Column(
+            "model_name",
+            sa.String(),
+            nullable=False,
+            comment="model name (e.g. gpt-4o, gemini-3-flash-preview)",
+        ),
+        sa.Column(
+            "config",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+            comment="model adhoc configuration",
+        ),
+        sa.Column(
+            "input_modalities",
+            postgresql.ARRAY(sa.String()),
+            nullable=False,
+            server_default="{}",
+            comment="supported input modalities: TEXT, IMAGE, PDF, AUDIO",
+        ),
+        sa.Column(
+            "output_modalities",
+            postgresql.ARRAY(sa.String()),
+            nullable=False,
+            server_default="{}",
+            comment="supported output modalities: TEXT, AUDIO",
+        ),
+        sa.Column(
+            "default_for",
+            sa.String(),
+            nullable=True,
+            comment=(
+                "completion types this model is the default for. "
+                "e.g. [text, stt, tts]. "
+                "NULL means not a default. "
+                "Supported: text, stt, tts"
+            ),
+        ),
+        sa.Column(
+            "is_active",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.text("true"),
+            comment="whether this model is available",
+        ),
+        sa.Column(
+            "inserted_at",
+            sa.DateTime(),
+            nullable=False,
+            comment="timestamp when model configuration was created",
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(),
+            nullable=False,
+            comment="timestamp when model configuration was updated",
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("provider", "model_name"),
+        schema="global",
+    )
+
+    # Seed default model configurations
+    op.execute(
+        """
+        INSERT INTO global.model_config (id, provider, model_name, config, input_modalities, output_modalities, default_for, is_active, inserted_at, updated_at)
+        VALUES
+            (1, 'openai', 'gpt-4o-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (2, 'openai', 'gpt-4o', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', 'text', true, NOW(), NOW()),
+            (3, 'openai', 'gpt-4.1', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (4, 'openai', 'gpt-4.1-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (5, 'openai', 'gpt-4.1-nano', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (6, 'openai', 'o3-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (7, 'openai', 'o3', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (8, 'openai', 'o4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (9, 'openai', 'gpt-5', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (13, 'openai', 'gpt-5.1-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (14, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (15, 'openai', 'gpt-5.2-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (16, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (17, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (18, 'openai', 'gpt-5.4-2026-03-05', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
+            (19, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW())
+        """
+    )
+
+    # Reset the id sequence to continue after the last seeded id
+    op.execute(
+        "SELECT setval(pg_get_serial_sequence('global.model_config', 'id'), "
+        "(SELECT MAX(id) FROM global.model_config))"
+    )
+
+
+def downgrade():
+    op.drop_table("model_config", schema="global")
diff --git a/backend/app/api/docs/model_config/get_model.md b/backend/app/api/docs/model_config/get_model.md
new file mode 100644
index 000000000..0f4080081
--- /dev/null
+++ b/backend/app/api/docs/model_config/get_model.md
@@ -0,0 +1,54 @@
+## Endpoint
+
+**GET** `/api/v1/models/{provider}/{model_name}`
+
+Retrieve a specific model configuration by provider and model name.
+
+Returns model details including supported config parameters, input/output modalities, and default assignment.
+
+### Path Parameters
+
+- **`provider`** (required) — Provider name (e.g. `openai`, `google`)
+- **`model_name`** (required) — Model name (e.g. `gpt-4o`, `gpt-4o-mini`)
+
+### Example Response
+
+```json
+{
+  "success": true,
+  "data": {
+    "id": 2,
+    "provider": "openai",
+    "model_name": "gpt-4o",
+    "config": {
+      "temperature": {
+        "type": "float",
+        "default": 1.0,
+        "min": 0.0,
+        "max": 2.0,
+        "description": "Controls randomness. Lower = more deterministic."
+      },
+      "top_p": {
+        "type": "float",
+        "default": 1.0,
+        "min": 0.0,
+        "max": 1.0,
+        "description": "Nucleus sampling. Use either this or temperature, not both."
+      },
+      "max_output_tokens": {
+        "type": "int",
+        "default": 2048,
+        "min": 1,
+        "max": 32768,
+        "description": "Max tokens in the response."
+      }
+    },
+    "input_modalities": ["TEXT", "IMAGE"],
+    "output_modalities": ["TEXT"],
+    "default_for": "text",
+    "is_active": true,
+    "inserted_at": "2026-03-12T00:00:00",
+    "updated_at": "2026-03-12T00:00:00"
+  }
+}
+```
diff --git a/backend/app/api/docs/model_config/list_models.md b/backend/app/api/docs/model_config/list_models.md
new file mode 100644
index 000000000..82eac1a91
--- /dev/null
+++ b/backend/app/api/docs/model_config/list_models.md
@@ -0,0 +1,62 @@
+## Endpoint
+
+**GET** `/api/v1/models`
+
+Retrieve a list of all active model configurations.
+
+Returns model details including provider, model name, supported config parameters, input/output modalities, and default assignment.
+
+Optionally filter by provider (e.g. openai, google).
+
+### Query Parameters
+
+- **`provider`** (optional) — Filter by provider name (e.g. `openai`, `google`)
+- **`skip`** (optional, default 0) — Number of records to skip for pagination
+- **`limit`** (optional, default 100) — Maximum number of records to return
+
+### Example Response
+
+```json
+{
+  "success": true,
+  "data": {
+    "data": [
+      {
+        "id": 1,
+        "provider": "openai",
+        "model_name": "gpt-4o-mini",
+        "config": {
+          "temperature": {
+            "type": "float",
+            "default": 1.0,
+            "min": 0.0,
+            "max": 2.0,
+            "description": "Controls randomness. Lower = more deterministic."
+          },
+          "top_p": {
+            "type": "float",
+            "default": 1.0,
+            "min": 0.0,
+            "max": 1.0,
+            "description": "Nucleus sampling. Use either this or temperature, not both."
+          },
+          "max_output_tokens": {
+            "type": "int",
+            "default": 2048,
+            "min": 1,
+            "max": 32768,
+            "description": "Max tokens in the response."
+          }
+        },
+        "input_modalities": ["TEXT", "IMAGE"],
+        "output_modalities": ["TEXT"],
+        "default_for": null,
+        "is_active": true,
+        "inserted_at": "2026-03-12T00:00:00",
+        "updated_at": "2026-03-12T00:00:00"
+      }
+    ],
+    "count": 1
+  }
+}
+```
diff --git a/backend/app/api/main.py b/backend/app/api/main.py
index 5ab1cbd9e..49c081562 100644
--- a/backend/app/api/main.py
+++ b/backend/app/api/main.py
@@ -25,6 +25,7 @@
     fine_tuning,
     model_evaluation,
     collection_job,
+    model_config,
 )
 from app.api.routes import evaluations
 from app.core.config import settings
@@ -54,7 +55,7 @@
 api_router.include_router(utils.router)
 api_router.include_router(fine_tuning.router)
 api_router.include_router(model_evaluation.router)
-
+api_router.include_router(model_config.router)
 
 if settings.ENVIRONMENT in ["development", "testing"]:
     api_router.include_router(private.router)
diff --git a/backend/app/api/routes/model_config.py b/backend/app/api/routes/model_config.py
new file mode 100644
index 000000000..7b7caf05f
--- /dev/null
+++ b/backend/app/api/routes/model_config.py
@@ -0,0 +1,50 @@
+import logging
+
+from fastapi import APIRouter, HTTPException
+
+from app.api.deps import AuthContextDep, SessionDep
+from app.crud.model_config import get_active_models, get_model_config
+from app.models import ModelConfigPublic, ModelConfigListPublic
+from app.utils import APIResponse, load_description
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/models", tags=["Model Config"])
+
+
+@router.get(
+    "/",
+    response_model=APIResponse[ModelConfigListPublic],
+    description=load_description("model_config/list_models.md"),
+)
+def list_models(
+    session: SessionDep,
+    auth_context: AuthContextDep,
+    provider: str | None = None,
+    skip: int = 0,
+    limit: int = 100,
+) -> APIResponse[ModelConfigListPublic]:
+    models = get_active_models(
+        session=session, provider=provider, skip=skip, limit=limit
+    )
+    return APIResponse.success_response(
+        ModelConfigListPublic(data=models, count=len(models))
+    )
+
+
+@router.get(
+    "/{provider}/{model_name:path}",
+    response_model=APIResponse[ModelConfigPublic],
+    description=load_description("model_config/get_model.md"),
+)
+def get_model(
+    session: SessionDep, auth_context: AuthContextDep, provider: str, model_name: str
+) -> APIResponse[ModelConfigPublic]:
+    model = get_model_config(session=session, provider=provider, model_name=model_name)
+
+    if model is None:
+        logger.error(
+            f"[get_model] Model not found | provider={provider}, model_name={model_name}"
+        )
+        raise HTTPException(status_code=404, detail="Model not found")
+
+    return APIResponse.success_response(model)
diff --git a/backend/app/crud/model_config.py b/backend/app/crud/model_config.py
new file mode 100644
index 000000000..546b051e3
--- /dev/null
+++ b/backend/app/crud/model_config.py
@@ -0,0 +1,50 @@
+import logging
+from typing import Optional, Literal
+
+from sqlmodel import Session, select
+
+from app.models import ModelConfig
+
+logger = logging.getLogger(__name__)
+
+
+def get_default_model_for_type(
+    session: Session, completion_type: Literal["text", "stt", "tts"]
+) -> Optional[ModelConfig]:
+    statement = (
+        select(ModelConfig)
+        .where(
+            ModelConfig.is_active == True,
+            ModelConfig.default_for == completion_type,
+        )
+        .limit(1)
+    )
+
+    return session.exec(statement).first()
+
+
+def get_active_models(
+    session: Session,
+    provider: Literal["openai", "google"] | None = None,
+    skip: int = 0,
+    limit: int = 100,
+) -> list[ModelConfig]:
+    statement = select(ModelConfig).where(ModelConfig.is_active == True)
+
+    if provider:
+        statement = statement.where(ModelConfig.provider == provider)
+
+    statement = statement.order_by(ModelConfig.provider, ModelConfig.model_name)
+    statement = statement.offset(skip).limit(limit)
+    return list(session.exec(statement).all())
+
+
+def get_model_config(
+    session: Session, provider: Literal["openai", "google"], model_name: str
+) -> Optional[ModelConfig]:
+    statement = select(ModelConfig).where(
+        ModelConfig.provider == provider,
+        ModelConfig.model_name == model_name,
+        ModelConfig.is_active == True,
+    )
+    return session.exec(statement).first()
diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py
index b5cb3f0c6..9aeeee1da 100644
--- a/backend/app/models/__init__.py
+++ b/backend/app/models/__init__.py
@@ -118,6 +118,14 @@
 )
 
 from .message import Message
+
+from .model_config import (
+    ModelConfig,
+    ModelConfigBase,
+    ModelConfigListPublic,
+    ModelConfigPublic,
+)
+
 from .model_evaluation import (
     ModelEvaluation,
     ModelEvaluationBase,
diff --git a/backend/app/models/model_config.py b/backend/app/models/model_config.py
new file mode 100644
index 000000000..73d2c2963
--- /dev/null
+++ b/backend/app/models/model_config.py
@@ -0,0 +1,124 @@
+from datetime import datetime
+from typing import Any, Literal
+
+import sqlalchemy as sa
+from app.core.util import now
+from sqlmodel import Field, SQLModel
+from sqlalchemy.dialects.postgresql import JSONB, ARRAY
+
+
+class ModelConfigBase(SQLModel):
+    provider: Literal["openai", "google"] = Field(
+        default="openai",
+        sa_column=sa.Column(
+            sa.String, nullable=False, comment="provider name (e.g. openai, google)"
+        ),
+    )
+
+    model_name: str = Field(
+        ...,
+        sa_column=sa.Column(
+            sa.String,
+            nullable=False,
+            comment="model name (e.g. gpt-4o, gemini-3-flash-preview)",
+        ),
+    )
+
+    config: dict[str, Any] = Field(
+        default_factory=dict,
+        sa_column=sa.Column(JSONB, nullable=False, comment="model adhoc configuration"),
+    )
+
+    input_modalities: list[str] = Field(
+        default_factory=list,
+        sa_column=sa.Column(
+            ARRAY(sa.String),
+            nullable=False,
+            server_default="{}",
+            comment="supported input modalities: TEXT, IMAGE, PDF, AUDIO",
+        ),
+    )
+
+    output_modalities: list[str] = Field(
+        default_factory=list,
+        sa_column=sa.Column(
+            ARRAY(sa.String),
+            nullable=False,
+            server_default="{}",
+            comment="supported output modalities: TEXT, AUDIO",
+        ),
+    )
+
+    # NOTE: can we use this default_for column to help in routing?
+    default_for: Literal["text", "stt", "tts"] | None = Field(
+        default=None,
+        sa_column=sa.Column(
+            sa.String,
+            nullable=True,
+            comment=(
+                "completion types this model is the default for. "
+                "e.g. [text, stt, tts]. "
+                "NULL means not a default. "
+                "Supported: text, stt, tts"
+            ),
+        ),
+    )
+
+    is_active: bool = Field(
+        default=True,
+        sa_column=sa.Column(
+            sa.Boolean,
+            nullable=False,
+            server_default=sa.text("true"),
+            comment="whether this model is available",
+        ),
+    )
+
+
+class ModelConfig(ModelConfigBase, table=True):
+    __tablename__ = "model_config"
+    __table_args__ = (
+        sa.UniqueConstraint("provider", "model_name"),
+        {"schema": "global"},
+    )
+
+    id: int | None = Field(
+        default=None,
+        sa_column=sa.Column(
+            sa.Integer,
+            primary_key=True,
+            comment="unique identifier for model config table",
+        ),
+    )
+
+    inserted_at: datetime = Field(
+        default_factory=now,
+        sa_column=sa.Column(
+            sa.DateTime,
+            default=now,
+            nullable=False,
+            comment="timestamp when model configuration was created",
+        ),
+    )
+
+    updated_at: datetime = Field(
+        default_factory=now,
+        sa_column=sa.Column(
+            sa.DateTime,
+            default=now,
+            nullable=False,
+            onupdate=now,
+            comment="timestamp when model configuration was updated",
+        ),
+    )
+
+
+class ModelConfigPublic(ModelConfigBase):
+    id: int
+    inserted_at: datetime
+    updated_at: datetime
+
+
+class ModelConfigListPublic(SQLModel):
+    data: list[ModelConfigPublic]
+    count: int

From 2b9ffefacb77cf207adab4eeb1f62baaf316a600 Mon Sep 17 00:00:00 2001
From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com>
Date: Mon, 13 Apr 2026 07:00:01 +0530
Subject: [PATCH 02/10] feat: add model_config table with default
 configurations

---
 ...ate_model_config_table.py => 051_create_model_config_table.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename backend/app/alembic/versions/{050_create_model_config_table.py => 051_create_model_config_table.py} (100%)

diff --git a/backend/app/alembic/versions/050_create_model_config_table.py b/backend/app/alembic/versions/051_create_model_config_table.py
similarity index 100%
rename from backend/app/alembic/versions/050_create_model_config_table.py
rename to backend/app/alembic/versions/051_create_model_config_table.py

From 4b68444c6e1fc8053a0ca54230cf544c0d7b8a67 Mon Sep 17 00:00:00 2001
From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com>
Date: Mon, 13 Apr 2026 07:02:35 +0530
Subject: [PATCH 03/10] fix: update revision identifiers for model_config table
 migration

---
 .../app/alembic/versions/051_create_model_config_table.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/backend/app/alembic/versions/051_create_model_config_table.py b/backend/app/alembic/versions/051_create_model_config_table.py
index aa5481c17..229523995 100644
--- a/backend/app/alembic/versions/051_create_model_config_table.py
+++ b/backend/app/alembic/versions/051_create_model_config_table.py
@@ -1,7 +1,7 @@
 """create model_config table
 
-Revision ID: 050
-Revises: 049
+Revision ID: 051
+Revises: 050
 Create Date: 2026-03-12 00:00:00.000000
 
 """
@@ -11,8 +11,8 @@
 from sqlalchemy.dialects import postgresql
 
 # revision identifiers, used by Alembic.
-revision = "050"
-down_revision = "049"
+revision = "051"
+down_revision = "050"
 branch_labels = None
 depends_on = None
 

From 6e322c9e8bb9b6986f19e439092031a19509b976 Mon Sep 17 00:00:00 2001
From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com>
Date: Mon, 13 Apr 2026 14:44:38 +0530
Subject: [PATCH 04/10] feat: add tests for model configuration endpoints

---
 .../app/tests/api/routes/test_model_config.py | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 backend/app/tests/api/routes/test_model_config.py

diff --git a/backend/app/tests/api/routes/test_model_config.py b/backend/app/tests/api/routes/test_model_config.py
new file mode 100644
index 000000000..60beda986
--- /dev/null
+++ b/backend/app/tests/api/routes/test_model_config.py
@@ -0,0 +1,68 @@
+from fastapi.testclient import TestClient
+from sqlmodel import Session
+
+from app.core.config import settings
+from app.crud.model_config import get_default_model_for_type
+
+
+def test_list_models(
+    client: TestClient, superuser_token_headers: dict[str, str]
+) -> None:
+    response = client.get(
+        f"{settings.API_V1_STR}/models/",
+        headers=superuser_token_headers,
+    )
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["success"] is True
+    assert body["data"]["count"] > 0
+    assert all(m["is_active"] for m in body["data"]["data"])
+
+
+def test_list_models_filter_by_provider(
+    client: TestClient, superuser_token_headers: dict[str, str]
+) -> None:
+    response = client.get(
+        f"{settings.API_V1_STR}/models/?provider=openai&limit=5",
+        headers=superuser_token_headers,
+    )
+
+    assert response.status_code == 200
+    data = response.json()["data"]["data"]
+    assert len(data) <= 5
+    assert all(m["provider"] == "openai" for m in data)
+
+
+def test_get_model(
+    client: TestClient, superuser_token_headers: dict[str, str]
+) -> None:
+    response = client.get(
+        f"{settings.API_V1_STR}/models/openai/gpt-4o",
+        headers=superuser_token_headers,
+    )
+
+    assert response.status_code == 200
+    model = response.json()["data"]
+    assert model["provider"] == "openai"
+    assert model["model_name"] == "gpt-4o"
+
+
+def test_get_model_not_found(
+    client: TestClient, superuser_token_headers: dict[str, str]
+) -> None:
+    response = client.get(
+        f"{settings.API_V1_STR}/models/openai/does-not-exist",
+        headers=superuser_token_headers,
+    )
+
+    assert response.status_code == 404
+    assert response.json()["error"] == "Model not found"
+
+
+def test_get_default_model_for_type(db: Session) -> None:
+    model = get_default_model_for_type(session=db, completion_type="text")
+
+    assert model is not None
+    assert model.default_for == "text"
+    assert model.is_active is True

From 244b43fb21a2e2a8d37f7014b2cd3ebcc903bc2c Mon Sep 17 00:00:00 2001
From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com>
Date: Mon, 13 Apr 2026 15:37:57 +0530
Subject: [PATCH 05/10] refactor: simplify function definition for
 test_get_model

---
 backend/app/tests/api/routes/test_model_config.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/backend/app/tests/api/routes/test_model_config.py b/backend/app/tests/api/routes/test_model_config.py
index 60beda986..779d22e50 100644
--- a/backend/app/tests/api/routes/test_model_config.py
+++ b/backend/app/tests/api/routes/test_model_config.py
@@ -34,9 +34,7 @@ def test_list_models_filter_by_provider(
     assert all(m["provider"] == "openai" for m in data)
 
 
-def test_get_model(
-    client: TestClient, superuser_token_headers: dict[str, str]
-) -> None:
+def test_get_model(client: TestClient, superuser_token_headers: dict[str, str]) -> None:
     response = client.get(
         f"{settings.API_V1_STR}/models/openai/gpt-4o",
         headers=superuser_token_headers,

From 951f9ba89410f9b4e4e48728664bf2db75625d76 Mon Sep 17 00:00:00 2001
From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com>
Date: Tue, 14 Apr 2026 14:20:51 +0530
Subject: [PATCH 06/10] feat: add endpoints to list models grouped by provider
 and list active providers

---
 .../versions/051_create_model_config_table.py | 52 ++++++++--------
 .../docs/model_config/list_models_grouped.md  | 45 ++++++++++++++
 .../api/docs/model_config/list_providers.md   | 16 +++++
 backend/app/api/routes/model_config.py        | 41 +++++++++++-
 backend/app/crud/model_config.py              | 62 ++++++++++++++-----
 backend/app/models/model_config.py            | 51 ++++++++++++---
 .../app/tests/api/routes/test_model_config.py | 44 ++++++++++---
 7 files changed, 250 insertions(+), 61 deletions(-)
 create mode 100644 backend/app/api/docs/model_config/list_models_grouped.md
 create mode 100644 backend/app/api/docs/model_config/list_providers.md

diff --git a/backend/app/alembic/versions/051_create_model_config_table.py b/backend/app/alembic/versions/051_create_model_config_table.py
index 229523995..e913bcb5e 100644
--- a/backend/app/alembic/versions/051_create_model_config_table.py
+++ b/backend/app/alembic/versions/051_create_model_config_table.py
@@ -50,7 +50,7 @@ def upgrade():
             postgresql.ARRAY(sa.String()),
             nullable=False,
             server_default="{}",
-            comment="supported input modalities: TEXT, IMAGE, PDF, AUDIO",
+            comment="supported input modalities: TEXT, IMAGE, FILES, AUDIO",
         ),
         sa.Column(
             "output_modalities",
@@ -60,14 +60,12 @@ def upgrade():
             comment="supported output modalities: TEXT, AUDIO",
         ),
         sa.Column(
-            "default_for",
-            sa.String(),
+            "pricing",
+            postgresql.JSONB(astext_type=sa.Text()),
             nullable=True,
             comment=(
-                "completion types this model is the default for. "
-                "e.g. [text, stt, tts]. "
-                "NULL means not a default. "
-                "Supported: text, stt, tts"
+                "pricing per 1M tokens in USD. "
+                "Structure: {response: {input_token_cost, output_token_cost}, batch: {input_token_cost, output_token_cost}}"
             ),
         ),
         sa.Column(
@@ -97,27 +95,27 @@ def upgrade():
     # Seed default model configurations
     op.execute(
         """
-        INSERT INTO global.model_config (id, provider, model_name, config, input_modalities, output_modalities, default_for, is_active, inserted_at, updated_at)
+        INSERT INTO global.model_config (id, provider, model_name, config, input_modalities, output_modalities, pricing, is_active, inserted_at, updated_at)
         VALUES
-            (1, 'openai', 'gpt-4o-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (2, 'openai', 'gpt-4o', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', 'text', true, NOW(), NOW()),
-            (3, 'openai', 'gpt-4.1', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (4, 'openai', 'gpt-4.1-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (5, 'openai', 'gpt-4.1-nano', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (6, 'openai', 'o3-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (7, 'openai', 'o3', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (8, 'openai', 'o4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (9, 'openai', 'gpt-5', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (13, 'openai', 'gpt-5.1-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (14, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (15, 'openai', 'gpt-5.2-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (16, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (17, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (18, 'openai', 'gpt-5.4-2026-03-05', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW()),
-            (19, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', NULL, true, NOW(), NOW())
+            (1, 'openai', 'gpt-4o-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.15, "output_token_cost": 0.6}, "batch": {"input_token_cost": 0.075, "output_token_cost": 0.3}}', true, NOW(), NOW()),
+            (2, 'openai', 'gpt-4o', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 10}, "batch": {"input_token_cost": 1.25, "output_token_cost": 5}}', true, NOW(), NOW()),
+            (3, 'openai', 'gpt-4.1', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()),
+            (4, 'openai', 'gpt-4.1-mini', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.4, "output_token_cost": 1.6}, "batch": {"input_token_cost": 0.2, "output_token_cost": 0.8}}', true, NOW(), NOW()),
+            (5, 'openai', 'gpt-4.1-nano', '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}, "top_p": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Nucleus sampling. Use either this or temperature, not both."}, "max_output_tokens": {"type": "int", "default": 2048, "min": 1, "max": 32768, "description": "Max tokens in the response."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.1, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.05, "output_token_cost": 0.2}}', true, NOW(), NOW()),
+            (6, 'openai', 'o3-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()),
+            (7, 'openai', 'o3', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2, "output_token_cost": 8}, "batch": {"input_token_cost": 1, "output_token_cost": 4}}', true, NOW(), NOW()),
+            (8, 'openai', 'o4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.1, "output_token_cost": 4.4}, "batch": {"input_token_cost": 0.55, "output_token_cost": 2.2}}', true, NOW(), NOW()),
+            (9, 'openai', 'gpt-5', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()),
+            (10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.25, "output_token_cost": 2}, "batch": {"input_token_cost": 0.125, "output_token_cost": 1}}', true, NOW(), NOW()),
+            (11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.05, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.025, "output_token_cost": 0.2}}', true, NOW(), NOW()),
+            (12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()),
+            (14, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()),
+            (16, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 21, "output_token_cost": 168}, "batch": {"input_token_cost": 10.5, "output_token_cost": 84}}', true, NOW(), NOW()),
+            (17, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()),
+            (18, 'openai', 'gpt-5.4', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 15}, "batch": {"input_token_cost": 1.25, "output_token_cost": 7.5}}', true, NOW(), NOW()),
+            (19, 'openai', 'gpt-5.4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.75, "output_token_cost": 4.5}, "batch": {"input_token_cost": 0.375, "output_token_cost": 2.25}}', true, NOW(), NOW()),
+            (20, 'openai', 'gpt-5.4-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.2, "output_token_cost": 1.25}, "batch": {"input_token_cost": 0.1, "output_token_cost": 0.625}}', true, NOW(), NOW()),
+            (21, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 30, "output_token_cost": 180}, "batch": {"input_token_cost": 15, "output_token_cost": 90}}', true, NOW(), NOW())
         """
     )
 
diff --git a/backend/app/api/docs/model_config/list_models_grouped.md b/backend/app/api/docs/model_config/list_models_grouped.md
new file mode 100644
index 000000000..0e0b725c0
--- /dev/null
+++ b/backend/app/api/docs/model_config/list_models_grouped.md
@@ -0,0 +1,45 @@
+## Endpoint
+
+**GET** `/api/v1/models/grouped`
+
+Retrieve all active models grouped by provider.
+
+Returns a dictionary where each key is a provider and each value is a list of active model configurations for that provider.
+
+### Example Response
+
+```json
+{
+  "success": true,
+  "data": {
+    "openai": [
+      {
+        "id": 2,
+        "provider": "openai",
+        "model_name": "gpt-4o",
+        "config": {
+          "temperature": {
+            "type": "float",
+            "default": 1.0,
+            "min": 0.0,
+            "max": 2.0,
+            "description": "Controls randomness. Lower = more deterministic."
+          }
+        },
+        "input_modalities": ["TEXT", "IMAGE"],
+        "output_modalities": ["TEXT"],
+        "pricing": {
+          "response": {
+            "input_token_cost": 2.5,
+            "output_token_cost": 10
+          }
+        },
+        "is_active": true,
+        "inserted_at": "2026-03-12T00:00:00",
+        "updated_at": "2026-03-12T00:00:00"
+      }
+    ],
+    "google": []
+  }
+}
+```
diff --git a/backend/app/api/docs/model_config/list_providers.md b/backend/app/api/docs/model_config/list_providers.md
new file mode 100644
index 000000000..aa498bf68
--- /dev/null
+++ b/backend/app/api/docs/model_config/list_providers.md
@@ -0,0 +1,16 @@
+## Endpoint
+
+**GET** `/api/v1/models/providers`
+
+Retrieve the list of providers that currently have active models.
+
+Returns provider names sorted in ascending order.
+
+### Example Response
+
+```json
+{
+  "success": true,
+  "data": ["google", "openai"]
+}
+```
diff --git a/backend/app/api/routes/model_config.py b/backend/app/api/routes/model_config.py
index 7b7caf05f..33089eb80 100644
--- a/backend/app/api/routes/model_config.py
+++ b/backend/app/api/routes/model_config.py
@@ -1,9 +1,15 @@
 import logging
+from typing import Literal
+from collections import defaultdict
 
 from fastapi import APIRouter, HTTPException
 
 from app.api.deps import AuthContextDep, SessionDep
-from app.crud.model_config import get_active_models, get_model_config
+from app.crud.model_config import (
+    estimate_model_cost,
+    get_active_models,
+    get_model_config,
+)
 from app.models import ModelConfigPublic, ModelConfigListPublic
 from app.utils import APIResponse, load_description
 
@@ -32,7 +38,38 @@ def list_models(
 
 
 @router.get(
-    "/{provider}/{model_name:path}",
+    "/grouped",
+    response_model=APIResponse[dict[str, list[ModelConfigPublic]]],
+    description=load_description("model_config/list_models_grouped.md"),
+)
+def list_models_grouped(
+    session: SessionDep,
+    auth_context: AuthContextDep,
+) -> APIResponse[dict[str, list[ModelConfigPublic]]]:
+    models = get_active_models(session=session, skip=0, limit=1000)
+    grouped: dict[str, list[ModelConfigPublic]] = defaultdict(list)
+    for model in models:
+        grouped[model.provider].append(model)
+
+    return APIResponse.success_response(dict(grouped))
+
+
+@router.get(
+    "/providers",
+    response_model=APIResponse[list[str]],
+    description=load_description("model_config/list_providers.md"),
+)
+def list_providers(
+    session: SessionDep,
+    auth_context: AuthContextDep,
+) -> APIResponse[list[str]]:
+    models = get_active_models(session=session, skip=0, limit=1000)
+    providers = sorted({model.provider for model in models})
+    return APIResponse.success_response(providers)
+
+
+@router.get(
+    "/{provider}/{model_name}",
     response_model=APIResponse[ModelConfigPublic],
     description=load_description("model_config/get_model.md"),
 )
diff --git a/backend/app/crud/model_config.py b/backend/app/crud/model_config.py
index 546b051e3..603110127 100644
--- a/backend/app/crud/model_config.py
+++ b/backend/app/crud/model_config.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Optional, Literal
+from typing import Any, Optional, Literal
 
 from sqlmodel import Session, select
 
@@ -8,21 +8,6 @@
 logger = logging.getLogger(__name__)
 
 
-def get_default_model_for_type(
-    session: Session, completion_type: Literal["text", "stt", "tts"]
-) -> Optional[ModelConfig]:
-    statement = (
-        select(ModelConfig)
-        .where(
-            ModelConfig.is_active == True,
-            ModelConfig.default_for == completion_type,
-        )
-        .limit(1)
-    )
-
-    return session.exec(statement).first()
-
-
 def get_active_models(
     session: Session,
     provider: Literal["openai", "google"] | None = None,
@@ -48,3 +33,48 @@ def get_model_config(
         ModelConfig.is_active == True,
     )
     return session.exec(statement).first()
+
+
+def estimate_model_cost(
+    session: Session,
+    provider: Literal["openai", "google"],
+    model_name: str,
+    input_tokens: int,
+    output_tokens: int,
+    tag: Literal["response", "batch"] = "response",
+) -> Optional[dict[str, Any]]:
+    model = get_model_config(session=session, provider=provider, model_name=model_name)
+    if model is None or model.pricing is None:
+        return None
+
+    if not isinstance(model.pricing, dict):
+        return None
+
+    pricing_source: dict[str, Any] = model.pricing
+    tag_pricing = pricing_source.get(tag)
+    if not isinstance(tag_pricing, dict):
+        return None
+
+    input_price = tag_pricing.get("input_token_cost")
+    output_price = tag_pricing.get("output_token_cost")
+
+    if not isinstance(input_price, (int, float)) or not isinstance(
+        output_price, (int, float)
+    ):
+        return None
+
+    input_cost = (input_tokens / 1_000_000) * float(input_price)
+    output_cost = (output_tokens / 1_000_000) * float(output_price)
+    total_cost = round(input_cost + output_cost, 4)
+
+    return {
+        "provider": provider,
+        "model_name": model_name,
+        "tag": tag,
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+        "input_cost": input_cost,
+        "output_cost": output_cost,
+        "total_cost": total_cost,
+        "currency": "USD",
+    }
diff --git a/backend/app/models/model_config.py b/backend/app/models/model_config.py
index 73d2c2963..464c5f746 100644
--- a/backend/app/models/model_config.py
+++ b/backend/app/models/model_config.py
@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import Any, Literal
+from typing import Any, Literal, Optional
 
 import sqlalchemy as sa
 from app.core.util import now
@@ -35,7 +35,7 @@ class ModelConfigBase(SQLModel):
             ARRAY(sa.String),
             nullable=False,
             server_default="{}",
-            comment="supported input modalities: TEXT, IMAGE, PDF, AUDIO",
+            comment="supported input modalities: TEXT, IMAGE, FILES, AUDIO",
         ),
     )
 
@@ -49,17 +49,15 @@ class ModelConfigBase(SQLModel):
         ),
     )
 
-    # NOTE: can we use this default_for column to help in routing?
-    default_for: Literal["text", "stt", "tts"] | None = Field(
+    pricing: Optional[dict[str, Any]] = Field(
         default=None,
         sa_column=sa.Column(
-            sa.String,
+            JSONB,
             nullable=True,
             comment=(
-                "completion types this model is the default for. "
-                "e.g. [text, stt, tts]. "
-                "NULL means not a default. "
-                "Supported: text, stt, tts"
+                "pricing per 1M tokens in USD. "
+                "Structure: {response: {input_token_cost, output_token_cost}, "
+                "batch: {input_token_cost, output_token_cost}}"
             ),
         ),
     )
@@ -122,3 +120,38 @@ class ModelConfigPublic(ModelConfigBase):
 class ModelConfigListPublic(SQLModel):
     data: list[ModelConfigPublic]
     count: int
+
+
+# if __name__ == "__main__":
+#     import os
+
+#     from sqlmodel import Session, create_engine
+
+#     from app.crud.model_config import estimate_model_cost
+
+#     database_url = "postgresql+psycopg://postgres:postgres@localhost:5432/kaapi"
+#     engine = create_engine(database_url)
+
+#     with Session(engine) as session:
+#         input_tokens = 5000
+#         output_tokens = 10000
+
+#         response_cost_info = estimate_model_cost(
+#             session=session,
+#             provider="openai",
+#             model_name="gpt-4o",
+#             input_tokens=input_tokens,
+#             output_tokens=output_tokens,
+#             tag="response",
+#         )
+#         print(response_cost_info)
+
+#         batch_cost_info = estimate_model_cost(
+#             session=session,
+#             provider="openai",
+#             model_name="gpt-4o",
+#             input_tokens=input_tokens,
+#             output_tokens=output_tokens,
+#             tag="batch",
+#         )
+#         print(batch_cost_info)
diff --git a/backend/app/tests/api/routes/test_model_config.py b/backend/app/tests/api/routes/test_model_config.py
index 779d22e50..792918026 100644
--- a/backend/app/tests/api/routes/test_model_config.py
+++ b/backend/app/tests/api/routes/test_model_config.py
@@ -1,8 +1,6 @@
 from fastapi.testclient import TestClient
-from sqlmodel import Session
 
 from app.core.config import settings
-from app.crud.model_config import get_default_model_for_type
 
 
 def test_list_models(
@@ -58,9 +56,41 @@ def test_get_model_not_found(
     assert response.json()["error"] == "Model not found"
 
 
-def test_get_default_model_for_type(db: Session) -> None:
-    model = get_default_model_for_type(session=db, completion_type="text")
+def test_list_models_grouped(
+    client: TestClient, superuser_token_headers: dict[str, str]
+) -> None:
+    response = client.get(
+        f"{settings.API_V1_STR}/models/grouped",
+        headers=superuser_token_headers,
+    )
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["success"] is True
+
+    grouped_models = body["data"]
+    assert grouped_models
+    for provider, models in grouped_models.items():
+        assert isinstance(provider, str)
+        assert isinstance(models, list)
+        assert all(model["provider"] == provider for model in models)
+        assert all(model["is_active"] for model in models)
+
+
+def test_list_providers(
+    client: TestClient, superuser_token_headers: dict[str, str]
+) -> None:
+    response = client.get(
+        f"{settings.API_V1_STR}/models/providers",
+        headers=superuser_token_headers,
+    )
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["success"] is True
 
-    assert model is not None
-    assert model.default_for == "text"
-    assert model.is_active is True
+    providers = body["data"]
+    assert isinstance(providers, list)
+    assert providers == sorted(providers)
+    assert len(providers) == len(set(providers))
+    assert "openai" in providers

From f07fb2facaa3e59b5cc76937bf8c93e688607995 Mon Sep 17 00:00:00 2001
From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com>
Date: Tue, 14 Apr 2026 14:30:46 +0530
Subject: [PATCH 07/10] feat: create model_config table with initial seed data

---
 ...l_config_table.py => 052_create_model_config_table.py} | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
 rename backend/app/alembic/versions/{051_create_model_config_table.py => 052_create_model_config_table.py} (99%)

diff --git a/backend/app/alembic/versions/051_create_model_config_table.py b/backend/app/alembic/versions/052_create_model_config_table.py
similarity index 99%
rename from backend/app/alembic/versions/051_create_model_config_table.py
rename to backend/app/alembic/versions/052_create_model_config_table.py
index e913bcb5e..4a6b1bff1 100644
--- a/backend/app/alembic/versions/051_create_model_config_table.py
+++ b/backend/app/alembic/versions/052_create_model_config_table.py
@@ -1,7 +1,7 @@
 """create model_config table
 
-Revision ID: 051
-Revises: 050
+Revision ID: 052
+Revises: 051
 Create Date: 2026-03-12 00:00:00.000000
 
 """
@@ -11,8 +11,8 @@
 from sqlalchemy.dialects import postgresql
 
 # revision identifiers, used by Alembic.
-revision = "051"
-down_revision = "050"
+revision = "052"
+down_revision = "051"
 branch_labels = None
 depends_on = None
 

From ee99e2eedf878c4b6666786cb5f42f2f3c27db6f Mon Sep 17 00:00:00 2001
From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com>
Date: Wed, 15 Apr 2026 11:35:40 +0530
Subject: [PATCH 08/10] chore: comments resolved

---
 .../versions/052_create_model_config_table.py |  14 +-
 .../app/api/docs/model_config/get_model.md    |  17 +-
 .../app/api/docs/model_config/list_models.md  |  13 +-
 .../docs/model_config/list_models_grouped.md  |   5 +-
 backend/app/api/routes/model_config.py        |  22 +--
 backend/app/crud/model_config.py              |  27 ++-
 backend/app/models/model_config.py            |  37 +---
 backend/app/tests/crud/test_model_config.py   | 162 ++++++++++++++++++
 8 files changed, 217 insertions(+), 80 deletions(-)
 create mode 100644 backend/app/tests/crud/test_model_config.py

diff --git a/backend/app/alembic/versions/052_create_model_config_table.py b/backend/app/alembic/versions/052_create_model_config_table.py
index 4a6b1bff1..e74b94641 100644
--- a/backend/app/alembic/versions/052_create_model_config_table.py
+++ b/backend/app/alembic/versions/052_create_model_config_table.py
@@ -109,13 +109,13 @@ def upgrade():
             (10, 'openai', 'gpt-5-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.25, "output_token_cost": 2}, "batch": {"input_token_cost": 0.125, "output_token_cost": 1}}', true, NOW(), NOW()),
             (11, 'openai', 'gpt-5-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["minimal", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.05, "output_token_cost": 0.4}, "batch": {"input_token_cost": 0.025, "output_token_cost": 0.2}}', true, NOW(), NOW()),
             (12, 'openai', 'gpt-5.1', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.25, "output_token_cost": 10}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5}}', true, NOW(), NOW()),
-            (14, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()),
-            (16, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 21, "output_token_cost": 168}, "batch": {"input_token_cost": 10.5, "output_token_cost": 84}}', true, NOW(), NOW()),
-            (17, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()),
-            (18, 'openai', 'gpt-5.4', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 15}, "batch": {"input_token_cost": 1.25, "output_token_cost": 7.5}}', true, NOW(), NOW()),
-            (19, 'openai', 'gpt-5.4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.75, "output_token_cost": 4.5}, "batch": {"input_token_cost": 0.375, "output_token_cost": 2.25}}', true, NOW(), NOW()),
-            (20, 'openai', 'gpt-5.4-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.2, "output_token_cost": 1.25}, "batch": {"input_token_cost": 0.1, "output_token_cost": 0.625}}', true, NOW(), NOW()),
-            (21, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 30, "output_token_cost": 180}, "batch": {"input_token_cost": 15, "output_token_cost": 90}}', true, NOW(), NOW())
+            (13, 'openai', 'gpt-5.2', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()),
+            (14, 'openai', 'gpt-5.2-pro', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 21, "output_token_cost": 168}, "batch": {"input_token_cost": 10.5, "output_token_cost": 84}}', true, NOW(), NOW()),
+            (15, 'openai', 'gpt-5.3-chat-latest', '{"summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 1.75, "output_token_cost": 14}, "batch": {"input_token_cost": 0.875, "output_token_cost": 7}}', true, NOW(), NOW()),
+            (16, 'openai', 'gpt-5.4', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 2.5, "output_token_cost": 15}, "batch": {"input_token_cost": 1.25, "output_token_cost": 7.5}}', true, NOW(), NOW()),
+            (17, 'openai', 'gpt-5.4-mini', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.75, "output_token_cost": 4.5}, "batch": {"input_token_cost": 0.375, "output_token_cost": 2.25}}', true, NOW(), NOW()),
+            (18, 'openai', 'gpt-5.4-nano', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 0.2, "output_token_cost": 1.25}, "batch": {"input_token_cost": 0.1, "output_token_cost": 0.625}}', true, NOW(), NOW()),
+            (19, 'openai', 'gpt-5.4-pro', '{"effort": {"type": "enum", "default": "medium", "options": ["none", "low", "medium", "high", "xhigh"], "description": "How long the model spends reasoning. Higher = better but slower."}, "summary": {"type": "enum", "default": "auto", "options": ["auto", "detailed", "concise", "null"], "description": "Summarize the reasoning result."}}', '{TEXT,IMAGE}', '{TEXT}', '{"response": {"input_token_cost": 30, "output_token_cost": 180}, "batch": {"input_token_cost": 15, "output_token_cost": 90}}', true, NOW(), NOW())
         """
     )
 
diff --git a/backend/app/api/docs/model_config/get_model.md b/backend/app/api/docs/model_config/get_model.md
index 0f4080081..b2f3700cb 100644
--- a/backend/app/api/docs/model_config/get_model.md
+++ b/backend/app/api/docs/model_config/get_model.md
@@ -4,7 +4,7 @@
 
 Retrieve a specific model configuration by provider and model name.
 
-Returns model details including supported config parameters, input/output modalities, and default assignment.
+Returns model details including supported config parameters, input/output modalities, pricing, and active status.
 
 ### Path Parameters
 
@@ -45,10 +45,23 @@ Returns model details including supported config parameters, input/output modali
     },
     "input_modalities": ["TEXT", "IMAGE"],
     "output_modalities": ["TEXT"],
-    "default_for": "text",
+    "pricing": {
+      "response": {
+        "input_token_cost": 2.5,
+        "output_token_cost": 10
+      },
+      "batch": {
+        "input_token_cost": 1.25,
+        "output_token_cost": 5
+      }
+    },
     "is_active": true,
     "inserted_at": "2026-03-12T00:00:00",
     "updated_at": "2026-03-12T00:00:00"
   }
 }
 ```
+
+### Error Response
+
+- `404 Not Found` — Model not found for the given `provider` and `model_name`.
diff --git a/backend/app/api/docs/model_config/list_models.md b/backend/app/api/docs/model_config/list_models.md
index 82eac1a91..412f0bfc3 100644
--- a/backend/app/api/docs/model_config/list_models.md
+++ b/backend/app/api/docs/model_config/list_models.md
@@ -4,7 +4,7 @@
 
 Retrieve a list of all active model configurations.
 
-Returns model details including provider, model name, supported config parameters, input/output modalities, and default assignment.
+Returns model details including provider, model name, supported config parameters, input/output modalities, pricing, and active status.
 
 Optionally filter by provider (e.g. openai, google).
 
@@ -50,7 +50,16 @@ Optionally filter by provider (e.g. openai, google).
         },
         "input_modalities": ["TEXT", "IMAGE"],
         "output_modalities": ["TEXT"],
-        "default_for": null,
+        "pricing": {
+          "response": {
+            "input_token_cost": 0.15,
+            "output_token_cost": 0.6
+          },
+          "batch": {
+            "input_token_cost": 0.075,
+            "output_token_cost": 0.3
+          }
+        },
         "is_active": true,
         "inserted_at": "2026-03-12T00:00:00",
         "updated_at": "2026-03-12T00:00:00"
diff --git a/backend/app/api/docs/model_config/list_models_grouped.md b/backend/app/api/docs/model_config/list_models_grouped.md
index 0e0b725c0..eeb3a0246 100644
--- a/backend/app/api/docs/model_config/list_models_grouped.md
+++ b/backend/app/api/docs/model_config/list_models_grouped.md
@@ -4,7 +4,7 @@
 
 Retrieve all active models grouped by provider.
 
-Returns a dictionary where each key is a provider and each value is a list of active model configurations for that provider.
+Returns a dictionary where each key is a provider present in active records, and each value is a list of active model configurations for that provider.
 
 ### Example Response
 
@@ -38,8 +38,7 @@ Returns a dictionary where each key is a provider and each value is a list of ac
         "inserted_at": "2026-03-12T00:00:00",
         "updated_at": "2026-03-12T00:00:00"
       }
-    ],
-    "google": []
+    ]
   }
 }
 ```
diff --git a/backend/app/api/routes/model_config.py b/backend/app/api/routes/model_config.py
index 33089eb80..486528e0d 100644
--- a/backend/app/api/routes/model_config.py
+++ b/backend/app/api/routes/model_config.py
@@ -1,16 +1,11 @@
 import logging
-from typing import Literal
 from collections import defaultdict
 
 from fastapi import APIRouter, HTTPException
 
-from app.api.deps import AuthContextDep, SessionDep
-from app.crud.model_config import (
-    estimate_model_cost,
-    get_active_models,
-    get_model_config,
-)
-from app.models import ModelConfigPublic, ModelConfigListPublic
+from app.api.deps import SessionDep
+from app.crud.model_config import get_model_config, list_active_model_configs
+from app.models import ModelConfigListPublic, ModelConfigPublic
 from app.utils import APIResponse, load_description
 
 logger = logging.getLogger(__name__)
@@ -24,12 +19,11 @@
 )
 def list_models(
     session: SessionDep,
-    auth_context: AuthContextDep,
     provider: str | None = None,
     skip: int = 0,
     limit: int = 100,
 ) -> APIResponse[ModelConfigListPublic]:
-    models = get_active_models(
+    models = list_active_model_configs(
         session=session, provider=provider, skip=skip, limit=limit
     )
     return APIResponse.success_response(
@@ -44,9 +38,8 @@ def list_models(
 )
 def list_models_grouped(
     session: SessionDep,
-    auth_context: AuthContextDep,
 ) -> APIResponse[dict[str, list[ModelConfigPublic]]]:
-    models = get_active_models(session=session, skip=0, limit=1000)
+    models = list_active_model_configs(session=session, skip=0, limit=1000)
     grouped: dict[str, list[ModelConfigPublic]] = defaultdict(list)
     for model in models:
         grouped[model.provider].append(model)
@@ -61,9 +54,8 @@ def list_models_grouped(
 )
 def list_providers(
     session: SessionDep,
-    auth_context: AuthContextDep,
 ) -> APIResponse[list[str]]:
-    models = get_active_models(session=session, skip=0, limit=1000)
+    models = list_active_model_configs(session=session, skip=0, limit=1000)
     providers = sorted({model.provider for model in models})
     return APIResponse.success_response(providers)
 
@@ -74,7 +66,7 @@ def list_providers(
     description=load_description("model_config/get_model.md"),
 )
 def get_model(
-    session: SessionDep, auth_context: AuthContextDep, provider: str, model_name: str
+    session: SessionDep, provider: str, model_name: str
 ) -> APIResponse[ModelConfigPublic]:
     model = get_model_config(session=session, provider=provider, model_name=model_name)
 
diff --git a/backend/app/crud/model_config.py b/backend/app/crud/model_config.py
index 603110127..2bf9cb674 100644
--- a/backend/app/crud/model_config.py
+++ b/backend/app/crud/model_config.py
@@ -1,20 +1,17 @@
-import logging
-from typing import Any, Optional, Literal
+from typing import Any, Literal
 
 from sqlmodel import Session, select
 
 from app.models import ModelConfig
 
-logger = logging.getLogger(__name__)
 
-
-def get_active_models(
+def list_active_model_configs(
     session: Session,
     provider: Literal["openai", "google"] | None = None,
     skip: int = 0,
     limit: int = 100,
 ) -> list[ModelConfig]:
-    statement = select(ModelConfig).where(ModelConfig.is_active == True)
+    statement = select(ModelConfig).where(ModelConfig.is_active)
 
     if provider:
         statement = statement.where(ModelConfig.provider == provider)
@@ -26,11 +23,11 @@ def get_active_models(
 
 def get_model_config(
     session: Session, provider: Literal["openai", "google"], model_name: str
-) -> Optional[ModelConfig]:
+) -> ModelConfig | None:
     statement = select(ModelConfig).where(
         ModelConfig.provider == provider,
         ModelConfig.model_name == model_name,
-        ModelConfig.is_active == True,
+        ModelConfig.is_active,
     )
     return session.exec(statement).first()
 
@@ -41,8 +38,8 @@ def estimate_model_cost(
     model_name: str,
     input_tokens: int,
     output_tokens: int,
-    tag: Literal["response", "batch"] = "response",
-) -> Optional[dict[str, Any]]:
+    usage_type: Literal["response", "batch"] = "response",
+) -> dict[str, Any] | None:
     model = get_model_config(session=session, provider=provider, model_name=model_name)
     if model is None or model.pricing is None:
         return None
@@ -51,12 +48,12 @@ def estimate_model_cost(
         return None
 
     pricing_source: dict[str, Any] = model.pricing
-    tag_pricing = pricing_source.get(tag)
-    if not isinstance(tag_pricing, dict):
+    usage_pricing = pricing_source.get(usage_type)
+    if not isinstance(usage_pricing, dict):
         return None
 
-    input_price = tag_pricing.get("input_token_cost")
-    output_price = tag_pricing.get("output_token_cost")
+    input_price = usage_pricing.get("input_token_cost")
+    output_price = usage_pricing.get("output_token_cost")
 
     if not isinstance(input_price, (int, float)) or not isinstance(
         output_price, (int, float)
@@ -70,7 +67,7 @@ def estimate_model_cost(
     return {
         "provider": provider,
         "model_name": model_name,
-        "tag": tag,
+        "usage_type": usage_type,
         "input_tokens": input_tokens,
         "output_tokens": output_tokens,
         "input_cost": input_cost,
diff --git a/backend/app/models/model_config.py b/backend/app/models/model_config.py
index 464c5f746..b66c9efe6 100644
--- a/backend/app/models/model_config.py
+++ b/backend/app/models/model_config.py
@@ -4,7 +4,7 @@
 import sqlalchemy as sa
 from app.core.util import now
 from sqlmodel import Field, SQLModel
-from sqlalchemy.dialects.postgresql import JSONB, ARRAY
+from sqlalchemy.dialects.postgresql import ARRAY, JSONB
 
 
 class ModelConfigBase(SQLModel):
@@ -120,38 +120,3 @@ class ModelConfigPublic(ModelConfigBase):
 class ModelConfigListPublic(SQLModel):
     data: list[ModelConfigPublic]
     count: int
-
-
-# if __name__ == "__main__":
-#     import os
-
-#     from sqlmodel import Session, create_engine
-
-#     from app.crud.model_config import estimate_model_cost
-
-#     database_url = "postgresql+psycopg://postgres:postgres@localhost:5432/kaapi"
-#     engine = create_engine(database_url)
-
-#     with Session(engine) as session:
-#         input_tokens = 5000
-#         output_tokens = 10000
-
-#         response_cost_info = estimate_model_cost(
-#             session=session,
-#             provider="openai",
-#             model_name="gpt-4o",
-#             input_tokens=input_tokens,
-#             output_tokens=output_tokens,
-#             tag="response",
-#         )
-#         print(response_cost_info)
-
-#         batch_cost_info = estimate_model_cost(
-#             session=session,
-#             provider="openai",
-#             model_name="gpt-4o",
-#             input_tokens=input_tokens,
-#             output_tokens=output_tokens,
-#             tag="batch",
-#         )
-#         print(batch_cost_info)
diff --git a/backend/app/tests/crud/test_model_config.py b/backend/app/tests/crud/test_model_config.py
new file mode 100644
index 000000000..be606f296
--- /dev/null
+++ b/backend/app/tests/crud/test_model_config.py
@@ -0,0 +1,162 @@
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from app.crud import model_config as model_config_crud
+
+
+def _patch_model(
+    monkeypatch: pytest.MonkeyPatch,
+    pricing: Any,
+) -> None:
+    model = SimpleNamespace(pricing=pricing)
+    monkeypatch.setattr(
+        model_config_crud,
+        "get_model_config",
+        lambda session, provider, model_name: model,
+    )
+
+
+def test_estimate_model_cost_response_success(monkeypatch: pytest.MonkeyPatch) -> None:
+    _patch_model(
+        monkeypatch,
+        pricing={
+            "response": {"input_token_cost": 2.5, "output_token_cost": 10.0},
+            "batch": {"input_token_cost": 1.25, "output_token_cost": 5.0},
+        },
+    )
+
+    result = model_config_crud.estimate_model_cost(
+        session=None,  # type: ignore[arg-type]
+        provider="openai",
+        model_name="gpt-4o",
+        input_tokens=1_000_000,
+        output_tokens=500_000,
+        usage_type="response",
+    )
+
+    assert result is not None
+    assert result["usage_type"] == "response"
+    assert result["input_cost"] == 2.5
+    assert result["output_cost"] == 5.0
+    assert result["total_cost"] == 7.5
+
+
+def test_estimate_model_cost_batch_success(monkeypatch: pytest.MonkeyPatch) -> None:
+    _patch_model(
+        monkeypatch,
+        pricing={
+            "response": {"input_token_cost": 2.5, "output_token_cost": 10.0},
+            "batch": {"input_token_cost": 1.25, "output_token_cost": 5.0},
+        },
+    )
+
+    result = model_config_crud.estimate_model_cost(
+        session=None,  # type: ignore[arg-type]
+        provider="openai",
+        model_name="gpt-4o",
+        input_tokens=1_000_000,
+        output_tokens=500_000,
+        usage_type="batch",
+    )
+
+    assert result is not None
+    assert result["usage_type"] == "batch"
+    assert result["input_cost"] == 1.25
+    assert result["output_cost"] == 2.5
+    assert result["total_cost"] == 3.75
+
+
+def test_estimate_model_cost_returns_none_for_missing_model(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        model_config_crud,
+        "get_model_config",
+        lambda session, provider, model_name: None,
+    )
+
+    result = model_config_crud.estimate_model_cost(
+        session=None,  # type: ignore[arg-type]
+        provider="openai",
+        model_name="does-not-exist",
+        input_tokens=1000,
+        output_tokens=1000,
+    )
+
+    assert result is None
+
+
+def test_estimate_model_cost_returns_none_for_null_pricing(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _patch_model(monkeypatch, pricing=None)
+
+    result = model_config_crud.estimate_model_cost(
+        session=None,  # type: ignore[arg-type]
+        provider="openai",
+        model_name="gpt-4o",
+        input_tokens=1000,
+        output_tokens=1000,
+    )
+
+    assert result is None
+
+
+def test_estimate_model_cost_returns_none_for_non_dict_pricing(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _patch_model(monkeypatch, pricing=["invalid"])
+
+    result = model_config_crud.estimate_model_cost(
+        session=None,  # type: ignore[arg-type]
+        provider="openai",
+        model_name="gpt-4o",
+        input_tokens=1000,
+        output_tokens=1000,
+    )
+
+    assert result is None
+
+
+def test_estimate_model_cost_returns_none_for_missing_usage_type(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _patch_model(
+        monkeypatch,
+        pricing={"response": {"input_token_cost": 1.0, "output_token_cost": 2.0}},
+    )
+
+    result = model_config_crud.estimate_model_cost(
+        session=None,  # type: ignore[arg-type]
+        provider="openai",
+        model_name="gpt-4o",
+        input_tokens=1000,
+        output_tokens=1000,
+        usage_type="batch",
+    )
+
+    assert result is None
+
+
+def test_estimate_model_cost_returns_none_for_non_numeric_prices(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _patch_model(
+        monkeypatch,
+        pricing={
+            "response": {"input_token_cost": "cheap", "output_token_cost": "expensive"}
+        },
+    )
+
+    result = model_config_crud.estimate_model_cost(
+        session=None,  # type: ignore[arg-type]
+        provider="openai",
+        model_name="gpt-4o",
+        input_tokens=1000,
+        output_tokens=1000,
+        usage_type="response",
+    )
+
+    assert result is None

From ac87722fbe50ed6d1f648a6966a69f2a3b5bbdd8 Mon Sep 17 00:00:00 2001
From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com>
Date: Wed, 15 Apr 2026 11:47:28 +0530
Subject: [PATCH 09/10] refactor: remove unnecessary logging and clean up
 imports in model_config

---
 backend/app/api/routes/model_config.py |  3 ---
 backend/app/models/model_config.py     | 13 ++++++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/backend/app/api/routes/model_config.py b/backend/app/api/routes/model_config.py
index 486528e0d..814c8c012 100644
--- a/backend/app/api/routes/model_config.py
+++ b/backend/app/api/routes/model_config.py
@@ -71,9 +71,6 @@ def get_model(
     model = get_model_config(session=session, provider=provider, model_name=model_name)
 
     if model is None:
-        logger.error(
-            f"[get_model] Model not found | provider={provider}, model_name={model_name}"
-        )
         raise HTTPException(status_code=404, detail="Model not found")
 
     return APIResponse.success_response(model)
diff --git a/backend/app/models/model_config.py b/backend/app/models/model_config.py
index b66c9efe6..bc1d14115 100644
--- a/backend/app/models/model_config.py
+++ b/backend/app/models/model_config.py
@@ -1,17 +1,20 @@
 from datetime import datetime
-from typing import Any, Literal, Optional
+from typing import Any, Literal
 
 import sqlalchemy as sa
-from app.core.util import now
-from sqlmodel import Field, SQLModel
 from sqlalchemy.dialects.postgresql import ARRAY, JSONB
+from sqlmodel import Field, SQLModel
+
+from app.core.util import now
 
 
 class ModelConfigBase(SQLModel):
     provider: Literal["openai", "google"] = Field(
         default="openai",
         sa_column=sa.Column(
-            sa.String, nullable=False, comment="provider name (e.g. openai, google)"
+            sa.String,
+            nullable=False,
+            comment="provider name (e.g. openai, google)",
         ),
     )
 
@@ -49,7 +52,7 @@ class ModelConfigBase(SQLModel):
         ),
     )
 
-    pricing: Optional[dict[str, Any]] = Field(
+    pricing: dict[str, Any] | None = Field(
         default=None,
         sa_column=sa.Column(
             JSONB,

From 433581f00e62dc64b4426e6d1d3d75e4ae8e3e2b Mon Sep 17 00:00:00 2001
From: Prashant Vasudevan <71649489+vprashrex@users.noreply.github.com>
Date: Wed, 15 Apr 2026 13:08:47 +0530
Subject: [PATCH 10/10] feat: add pagination support and metadata for model
 listing endpoints

---
 .../app/api/docs/model_config/list_models.md  |  5 +-
 .../docs/model_config/list_models_grouped.md  | 12 ++++-
 backend/app/api/routes/model_config.py        | 29 +++++++----
 backend/app/crud/model_config.py              | 22 +++++++-
 .../app/tests/api/routes/test_model_config.py | 52 +++++++++++++++++++
 5 files changed, 107 insertions(+), 13 deletions(-)

diff --git a/backend/app/api/docs/model_config/list_models.md b/backend/app/api/docs/model_config/list_models.md
index 412f0bfc3..321a3d673 100644
--- a/backend/app/api/docs/model_config/list_models.md
+++ b/backend/app/api/docs/model_config/list_models.md
@@ -12,13 +12,16 @@ Optionally filter by provider (e.g. openai, google).
 
 - **`provider`** (optional) — Filter by provider name (e.g. `openai`, `google`)
 - **`skip`** (optional, default 0) — Number of records to skip for pagination
-- **`limit`** (optional, default 100) — Maximum number of records to return
+- **`limit`** (optional, default 100, max 100) — Maximum number of records to return
 
 ### Example Response
 
 ```json
 {
   "success": true,
+  "metadata": {
+    "has_more": true
+  },
   "data": {
     "data": [
       {
diff --git a/backend/app/api/docs/model_config/list_models_grouped.md b/backend/app/api/docs/model_config/list_models_grouped.md
index eeb3a0246..706beba37 100644
--- a/backend/app/api/docs/model_config/list_models_grouped.md
+++ b/backend/app/api/docs/model_config/list_models_grouped.md
@@ -2,15 +2,23 @@
 
 **GET** `/api/v1/models/grouped`
 
-Retrieve all active models grouped by provider.
+Retrieve active models grouped by provider.
 
-Returns a dictionary where each key is a provider present in active records, and each value is a list of active model configurations for that provider.
+Supports pagination of model rows before grouping:
+- `skip` (default `0`)
+- `limit` (default `100`, max `100`)
+
+Returns a dictionary where each key is a provider present in the paginated slice, and each value is a list of active model configurations for that provider.
+Includes `metadata.has_more` when additional model rows exist.
 
 ### Example Response
 
 ```json
 {
   "success": true,
+  "metadata": {
+    "has_more": true
+  },
   "data": {
     "openai": [
       {
diff --git a/backend/app/api/routes/model_config.py b/backend/app/api/routes/model_config.py
index 814c8c012..565149c7e 100644
--- a/backend/app/api/routes/model_config.py
+++ b/backend/app/api/routes/model_config.py
@@ -1,10 +1,14 @@
 import logging
 from collections import defaultdict
 
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, HTTPException, Query
 
 from app.api.deps import SessionDep
-from app.crud.model_config import get_model_config, list_active_model_configs
+from app.crud.model_config import (
+    get_model_config,
+    list_active_model_configs,
+    list_all_active_model_configs,
+)
 from app.models import ModelConfigListPublic, ModelConfigPublic
 from app.utils import APIResponse, load_description
 
@@ -20,14 +24,15 @@
 def list_models(
     session: SessionDep,
     provider: str | None = None,
-    skip: int = 0,
-    limit: int = 100,
+    skip: int = Query(0, ge=0, description="Number of records to skip"),
+    limit: int = Query(100, ge=1, le=100, description="Maximum records to return"),
 ) -> APIResponse[ModelConfigListPublic]:
-    models = list_active_model_configs(
+    models, has_more = list_active_model_configs(
         session=session, provider=provider, skip=skip, limit=limit
     )
     return APIResponse.success_response(
-        ModelConfigListPublic(data=models, count=len(models))
+        ModelConfigListPublic(data=models, count=len(models)),
+        metadata={"has_more": has_more},
     )
 
 
@@ -38,13 +43,19 @@ def list_models(
 )
 def list_models_grouped(
     session: SessionDep,
+    skip: int = Query(0, ge=0, description="Number of model records to skip"),
+    limit: int = Query(
+        100, ge=1, le=100, description="Maximum model records to return before grouping"
+    ),
 ) -> APIResponse[dict[str, list[ModelConfigPublic]]]:
-    models = list_active_model_configs(session=session, skip=0, limit=1000)
+    models, has_more = list_active_model_configs(
+        session=session, skip=skip, limit=limit
+    )
     grouped: dict[str, list[ModelConfigPublic]] = defaultdict(list)
     for model in models:
         grouped[model.provider].append(model)
 
-    return APIResponse.success_response(dict(grouped))
+    return APIResponse.success_response(dict(grouped), metadata={"has_more": has_more})
 
 
 @router.get(
@@ -55,7 +66,7 @@ def list_models_grouped(
 def list_providers(
     session: SessionDep,
 ) -> APIResponse[list[str]]:
-    models = list_active_model_configs(session=session, skip=0, limit=1000)
+    models = list_all_active_model_configs(session=session)
     providers = sorted({model.provider for model in models})
     return APIResponse.success_response(providers)
 
diff --git a/backend/app/crud/model_config.py b/backend/app/crud/model_config.py
index 2bf9cb674..fed1f71c7 100644
--- a/backend/app/crud/model_config.py
+++ b/backend/app/crud/model_config.py
@@ -10,6 +10,27 @@ def list_active_model_configs(
     provider: Literal["openai", "google"] | None = None,
     skip: int = 0,
     limit: int = 100,
+) -> tuple[list[ModelConfig], bool]:
+    statement = select(ModelConfig).where(ModelConfig.is_active)
+
+    if provider:
+        statement = statement.where(ModelConfig.provider == provider)
+
+    statement = statement.order_by(ModelConfig.provider, ModelConfig.model_name)
+    statement = statement.offset(skip).limit(limit + 1)
+    models = list(session.exec(statement).all())
+
+    has_more = False
+    if len(models) > limit:
+        has_more = True
+        models = models[:limit]
+
+    return models, has_more
+
+
+def list_all_active_model_configs(
+    session: Session,
+    provider: Literal["openai", "google"] | None = None,
 ) -> list[ModelConfig]:
     statement = select(ModelConfig).where(ModelConfig.is_active)
 
@@ -17,7 +38,6 @@ def list_active_model_configs(
         statement = statement.where(ModelConfig.provider == provider)
 
     statement = statement.order_by(ModelConfig.provider, ModelConfig.model_name)
-    statement = statement.offset(skip).limit(limit)
     return list(session.exec(statement).all())
 
 
diff --git a/backend/app/tests/api/routes/test_model_config.py b/backend/app/tests/api/routes/test_model_config.py
index 792918026..4f111194e 100644
--- a/backend/app/tests/api/routes/test_model_config.py
+++ b/backend/app/tests/api/routes/test_model_config.py
@@ -14,10 +14,26 @@ def test_list_models(
     assert response.status_code == 200
     body = response.json()
     assert body["success"] is True
+    assert "has_more" in body["metadata"]
     assert body["data"]["count"] > 0
     assert all(m["is_active"] for m in body["data"]["data"])
 
 
+def test_list_models_has_more(
+    client: TestClient, superuser_token_headers: dict[str, str]
+) -> None:
+    response = client.get(
+        f"{settings.API_V1_STR}/models/?skip=0&limit=1",
+        headers=superuser_token_headers,
+    )
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["success"] is True
+    assert body["data"]["count"] == 1
+    assert body["metadata"]["has_more"] is True
+
+
 def test_list_models_filter_by_provider(
     client: TestClient, superuser_token_headers: dict[str, str]
 ) -> None:
@@ -32,6 +48,16 @@ def test_list_models_filter_by_provider(
     assert all(m["provider"] == "openai" for m in data)
 
 
+def test_list_models_invalid_limit(
+    client: TestClient, superuser_token_headers: dict[str, str]
+) -> None:
+    response = client.get(
+        f"{settings.API_V1_STR}/models/?skip=0&limit=0",
+        headers=superuser_token_headers,
+    )
+    assert response.status_code == 422
+
+
 def test_get_model(client: TestClient, superuser_token_headers: dict[str, str]) -> None:
     response = client.get(
         f"{settings.API_V1_STR}/models/openai/gpt-4o",
@@ -67,6 +93,7 @@ def test_list_models_grouped(
     assert response.status_code == 200
     body = response.json()
     assert body["success"] is True
+    assert "has_more" in body["metadata"]
 
     grouped_models = body["data"]
     assert grouped_models
@@ -77,6 +104,31 @@ def test_list_models_grouped(
         assert all(model["is_active"] for model in models)
 
 
+def test_list_models_grouped_has_more(
+    client: TestClient, superuser_token_headers: dict[str, str]
+) -> None:
+    response = client.get(
+        f"{settings.API_V1_STR}/models/grouped?skip=0&limit=1",
+        headers=superuser_token_headers,
+    )
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["success"] is True
+    assert body["metadata"]["has_more"] is True
+
+
+def test_list_models_grouped_invalid_limit(
+    client: TestClient, superuser_token_headers: dict[str, str]
+) -> None:
+    response = client.get(
+        f"{settings.API_V1_STR}/models/grouped?skip=0&limit=0",
+        headers=superuser_token_headers,
+    )
+
+    assert response.status_code == 422
+
+
 def test_list_providers(
     client: TestClient, superuser_token_headers: dict[str, str]
 ) -> None: