|
457 | 457 | "post": { |
458 | 458 | "operationId": "ModelsController_startModel", |
459 | 459 | "summary": "Start model", |
460 | | - "description": "Load a model into memory.", |
| 460 | + "description": "Load a model into memory. Note: Request body parameters will override those loaded from model.yml", |
461 | 461 | "requestBody": { |
462 | 462 | "required": true, |
463 | 463 | "content": { |
|
498 | 498 | "content": { |
499 | 499 | "application/json": { |
500 | 500 | "schema": { |
501 | | - "$ref": "#/components/schemas/ModelStartDto" |
| 501 | + "$ref": "#/components/schemas/ModelStopDto" |
502 | 502 | }, |
503 | 503 | "example": { |
504 | 504 | "model": "llama3:8b-gguf-q6-k" |
|
3173 | 3173 | ] |
3174 | 3174 | }, |
3175 | 3175 | "ModelStartDto": { |
| 3176 | + "type": "object", |
| 3177 | + "properties": { |
| 3178 | + "model": { |
| 3179 | + "type": "string", |
| 3180 | + "example": "llama3:8b-gguf-q6-k", |
| 3181 | + "description": "A downloaded model name." |
| 3182 | + }, |
| 3183 | + "ctx_len": { |
| 3184 | + "type": "number", |
| 3185 | + "description": "The context length for model operations varies; the maximum depends on the specific model used.", |
| 3186 | + "example": 4096 |
| 3187 | + }, |
| 3188 | + "ngl": { |
| 3189 | + "type": "number", |
| 3190 | + "description": "Determines GPU layer usage.", |
| 3191 | + "example": 32 |
| 3192 | + }, |
| 3193 | + "n_parallel": { |
| 3194 | + "type": "number", |
| 3195 | + "minimum": 1, |
| 3196 | + "description": "Number of parallel processing units to use.", |
| 3197 | + "example": 1 |
| 3198 | + }, |
| 3199 | + "cache_type": { |
| 3200 | + "type": "string", |
| 3201 | + "description": "KV cache type: f16, q8_0, q4_0, default is f16", |
| 3202 | + "example": "f16" |
| 3203 | + }, |
| 3204 | + "caching_enabled": { |
| 3205 | + "type": "boolean", |
| 3206 | + "description": "To enable prompt caching or not", |
| 3207 | + "example": true |
| 3208 | + }, |
| 3209 | + "model_path": { |
| 3210 | + "type": "string", |
| 3211 | + "description": "Local path to LLM model file", |
| 3212 | + "example": "/tmp/model.gguf" |
| 3213 | + }, |
| 3214 | + "mmproj": { |
| 3215 | + "type": "string", |
| 3216 | + "description": "Local path to mmproj model file", |
| 3217 | + "example": "/tmp/model.gguf" |
| 3218 | + } |
| 3219 | + }, |
| 3220 | + "required": [ |
| 3221 | + "model" |
| 3222 | + ] |
| 3223 | + }, |
| 3224 | + "ModelStopDto": { |
3176 | 3225 | "type": "object", |
3177 | 3226 | "properties": { |
3178 | 3227 | "model": { |
3179 | 3228 | "type": "string", |
3180 | 3229 | "example": "llama3:8b-gguf-q6-k", |
3181 | 3230 | "description": "A downloaded model name." |
3182 | 3231 | } |
3183 | | - } |
| 3232 | + }, |
| 3233 | + "required": [ |
| 3234 | + "model" |
| 3235 | + ] |
3184 | 3236 | }, |
3185 | 3237 | "ImportModelRequest": { |
3186 | 3238 | "type": "object", |
|
0 commit comments