Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 0ce34f8

Browse files
committed
chore: add more request body parameters for models start
1 parent 4530042 commit 0ce34f8

File tree

1 file changed

+55
-3
lines changed

1 file changed

+55
-3
lines changed

docs/static/openapi/cortex.json

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@
457457
"post": {
458458
"operationId": "ModelsController_startModel",
459459
"summary": "Start model",
460-
"description": "Load a model into memory.",
460+
"description": "Load a model into memory. Note: Request body parameters will override those loaded from model.yml",
461461
"requestBody": {
462462
"required": true,
463463
"content": {
@@ -498,7 +498,7 @@
498498
"content": {
499499
"application/json": {
500500
"schema": {
501-
"$ref": "#/components/schemas/ModelStartDto"
501+
"$ref": "#/components/schemas/ModelStopDto"
502502
},
503503
"example": {
504504
"model": "llama3:8b-gguf-q6-k"
@@ -3173,14 +3173,66 @@
31733173
]
31743174
},
31753175
"ModelStartDto": {
3176+
"type": "object",
3177+
"properties": {
3178+
"model": {
3179+
"type": "string",
3180+
"example": "llama3:8b-gguf-q6-k",
3181+
"description": "A downloaded model name."
3182+
},
3183+
"ctx_len": {
3184+
"type": "number",
3185+
"description": "The context length for model operations varies; the maximum depends on the specific model used.",
3186+
"example": 4096
3187+
},
3188+
"ngl": {
3189+
"type": "number",
3190+
"description": "Determines GPU layer usage.",
3191+
"example": 32
3192+
},
3193+
"n_parallel": {
3194+
"type": "number",
3195+
"minimum": 1,
3196+
"description": "Number of parallel processing units to use.",
3197+
"example": 1
3198+
},
3199+
"cache_type": {
3200+
"type": "string",
3201+
"description": "KV cache type: f16, q8_0, q4_0, default is f16",
3202+
"example": "f16"
3203+
},
3204+
"caching_enabled": {
3205+
"type": "boolean",
3206+
"description": "To enable prompt caching or not",
3207+
"example": true
3208+
},
3209+
"model_path": {
3210+
"type": "string",
3211+
"description": "Local path to LLM model file",
3212+
"example": "/tmp/model.gguf"
3213+
},
3214+
"mmproj": {
3215+
"type": "string",
3216+
"description": "Local path to mmproj model file",
3217+
"example": "/tmp/model.gguf"
3218+
}
3219+
},
3220+
"required": [
3221+
"model"
3222+
]
3223+
},
3224+
"ModelStopDto": {
31763225
"type": "object",
31773226
"properties": {
31783227
"model": {
31793228
"type": "string",
31803229
"example": "llama3:8b-gguf-q6-k",
31813230
"description": "A downloaded model name."
31823231
}
3183-
}
3232+
},
3233+
"required": [
3234+
"model"
3235+
]
31843236
},
31853237
"ImportModelRequest": {
31863238
"type": "object",

0 commit comments

Comments
 (0)