chore: models update request body

vansangpfiev · vansangpfiev · commit f7aa5fe4c310 · 2024-11-04T16:51:00.000+07:00
diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
@@ -3537,7 +3537,166 @@
       },
       "UpdateModelDto": {
         "type": "object",
-        "properties": {}
+        "properties": {
+          "model": {
+            "type": "string",
+            "description": "Unique model identifier used for request construction.",
+            "example": "tinyllama:1b-gguf"
+          },
+          "files": {
+            "type": "array",
+            "description": "List of file paths associated with the model. Can be relative or absolute.",
+            "items": {
+              "type": "string",
+              "example": "models\\cortex.so\\tinyllama\\1b-gguf\\model.gguf"
+            }
+          },
+          "stop": {
+            "type": "array",
+            "description": "Tokens that signal the end of generation.",
+            "items": {
+              "type": "string"
+            },
+            "example": [
+              "</s>"
+            ]
+          },
+          "stream": {
+            "type": "boolean",
+            "description": "Whether to stream the output as it is generated.",
+            "example": true
+          },
+          "top_p": {
+            "type": "number",
+            "description": "Controls nucleus sampling; the model considers the results of the tokens with top_p probability mass.",
+            "example": 0.95
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Controls randomness in token selection; lower values make the output more deterministic.",
+            "example": 0.7
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "description": "Penalizes repeated tokens based on their frequency.",
+            "example": 0
+          },
+          "presence_penalty": {
+            "type": "number",
+            "description": "Penalizes tokens that have already appeared in the output.",
+            "example": 0
+          },
+          "max_tokens": {
+            "type": "integer",
+            "description": "Maximum number of tokens to generate.",
+            "example": 4096
+          },
+          "seed": {
+            "type": "integer",
+            "description": "Seed for random number generation to ensure reproducibility; -1 for random seed.",
+            "example": -1
+          },
+          "dynatemp_range": {
+            "type": "number",
+            "description": "Range for dynamic temperature adjustment.",
+            "example": 0
+          },
+          "dynatemp_exponent": {
+            "type": "number",
+            "description": "Exponent for dynamic temperature adjustment.",
+            "example": 1
+          },
+          "top_k": {
+            "type": "integer",
+            "description": "Limits the sampling pool to the top_k most probable tokens.",
+            "example": 40
+          },
+          "min_p": {
+            "type": "number",
+            "description": "Minimum probability threshold for token selection.",
+            "example": 0.05
+          },
+          "tfs_z": {
+            "type": "number",
+            "description": "Threshold for token frequency sampling.",
+            "example": 1
+          },
+          "typ_p": {
+            "type": "number",
+            "description": "Controls typical sampling; similar to top_p but focuses on local token distribution.",
+            "example": 1
+          },
+          "repeat_last_n": {
+            "type": "integer",
+            "description": "Number of recent tokens to consider for repetition penalty.",
+            "example": 64
+          },
+          "repeat_penalty": {
+            "type": "number",
+            "description": "Penalty applied to repeated tokens.",
+            "example": 1
+          },
+          "mirostat": {
+            "type": "boolean",
+            "description": "Enables or disables Mirostat sampling.",
+            "example": false
+          },
+          "mirostat_tau": {
+            "type": "number",
+            "description": "Target entropy for Mirostat sampling.",
+            "example": 5
+          },
+          "mirostat_eta": {
+            "type": "number",
+            "description": "Learning rate for Mirostat sampling.",
+            "example": 0.1
+          },
+          "penalize_nl": {
+            "type": "boolean",
+            "description": "Whether to penalize newline tokens.",
+            "example": false
+          },
+          "ignore_eos": {
+            "type": "boolean",
+            "description": "Whether to ignore end-of-sequence tokens during generation.",
+            "example": false
+          },
+          "n_probs": {
+            "type": "integer",
+            "description": "Number of probabilities to consider for each token.",
+            "example": 0
+          },
+          "min_keep": {
+            "type": "integer",
+            "description": "Minimum number of tokens to keep in the buffer.",
+            "example": 0
+          },
+          "engine": {
+            "type": "string",
+            "description": "The engine used to run the model.",
+            "example": "llama-cpp"
+          },
+          "prompt_template": {
+            "type": "string",
+            "description": "Template used for formatting prompts.",
+            "example": "\n\n<|system|>\n{system_message}</s>\n\n\n\n\n<|user|>\n{prompt}</s>\n\n\n<|assistant|>\n\n"
+          },
+          "ctx_len": {
+            "type": "integer",
+            "description": "Context length for the model.",
+            "example": 4096
+          },
+          "n_parallel": {
+            "type": "integer",
+            "description": "Number of parallel threads for execution.",
+            "example": 1
+          },
+          "ngl": {
+            "type": "integer",
+            "description": "Number of GPU layers.",
+            "example": 33
+          }
+        }
       },
       "DeleteModelResponseDto": {
         "type": "object",