|
3537 | 3537 | }, |
3538 | 3538 | "UpdateModelDto": { |
3539 | 3539 | "type": "object", |
3540 | | - "properties": {} |
| 3540 | + "properties": { |
| 3541 | + "model": { |
| 3542 | + "type": "string", |
| 3543 | + "description": "Unique model identifier used for request construction.", |
| 3544 | + "example": "tinyllama:1b-gguf" |
| 3545 | + }, |
| 3546 | + "files": { |
| 3547 | + "type": "array", |
| 3548 | + "description": "List of file paths associated with the model. Can be relative or absolute.", |
| 3549 | + "items": { |
| 3550 | + "type": "string", |
| 3551 | + "example": "models\\cortex.so\\tinyllama\\1b-gguf\\model.gguf" |
| 3552 | + } |
| 3553 | + }, |
| 3554 | + "stop": { |
| 3555 | + "type": "array", |
| 3556 | + "description": "Tokens that signal the end of generation.", |
| 3557 | + "items": { |
| 3558 | + "type": "string" |
| 3559 | + }, |
| 3560 | + "example": [ |
| 3561 | + "</s>" |
| 3562 | + ] |
| 3563 | + }, |
| 3564 | + "stream": { |
| 3565 | + "type": "boolean", |
| 3566 | + "description": "Whether to stream the output as it is generated.", |
| 3567 | + "example": true |
| 3568 | + }, |
| 3569 | + "top_p": { |
| 3570 | + "type": "number", |
| 3571 | + "description": "Controls nucleus sampling; the model considers the results of the tokens with top_p probability mass.", |
| 3572 | + "example": 0.95 |
| 3573 | + }, |
| 3574 | + "temperature": { |
| 3575 | + "type": "number", |
| 3576 | + "description": "Controls randomness in token selection; lower values make the output more deterministic.", |
| 3577 | + "example": 0.7 |
| 3578 | + }, |
| 3579 | + "frequency_penalty": { |
| 3580 | + "type": "number", |
| 3581 | + "description": "Penalizes repeated tokens based on their frequency.", |
| 3582 | + "example": 0 |
| 3583 | + }, |
| 3584 | + "presence_penalty": { |
| 3585 | + "type": "number", |
| 3586 | + "description": "Penalizes tokens that have already appeared in the output.", |
| 3587 | + "example": 0 |
| 3588 | + }, |
| 3589 | + "max_tokens": { |
| 3590 | + "type": "integer", |
| 3591 | + "description": "Maximum number of tokens to generate.", |
| 3592 | + "example": 4096 |
| 3593 | + }, |
| 3594 | + "seed": { |
| 3595 | + "type": "integer", |
| 3596 | + "description": "Seed for random number generation to ensure reproducibility; -1 for random seed.", |
| 3597 | + "example": -1 |
| 3598 | + }, |
| 3599 | + "dynatemp_range": { |
| 3600 | + "type": "number", |
| 3601 | + "description": "Range for dynamic temperature adjustment.", |
| 3602 | + "example": 0 |
| 3603 | + }, |
| 3604 | + "dynatemp_exponent": { |
| 3605 | + "type": "number", |
| 3606 | + "description": "Exponent for dynamic temperature adjustment.", |
| 3607 | + "example": 1 |
| 3608 | + }, |
| 3609 | + "top_k": { |
| 3610 | + "type": "integer", |
| 3611 | + "description": "Limits the sampling pool to the top_k most probable tokens.", |
| 3612 | + "example": 40 |
| 3613 | + }, |
| 3614 | + "min_p": { |
| 3615 | + "type": "number", |
| 3616 | + "description": "Minimum probability threshold for token selection.", |
| 3617 | + "example": 0.05 |
| 3618 | + }, |
| 3619 | + "tfs_z": { |
| 3620 | + "type": "number", |
| 3621 | + "description": "Threshold for token frequency sampling.", |
| 3622 | + "example": 1 |
| 3623 | + }, |
| 3624 | + "typ_p": { |
| 3625 | + "type": "number", |
| 3626 | + "description": "Controls typical sampling; similar to top_p but focuses on local token distribution.", |
| 3627 | + "example": 1 |
| 3628 | + }, |
| 3629 | + "repeat_last_n": { |
| 3630 | + "type": "integer", |
| 3631 | + "description": "Number of recent tokens to consider for repetition penalty.", |
| 3632 | + "example": 64 |
| 3633 | + }, |
| 3634 | + "repeat_penalty": { |
| 3635 | + "type": "number", |
| 3636 | + "description": "Penalty applied to repeated tokens.", |
| 3637 | + "example": 1 |
| 3638 | + }, |
| 3639 | + "mirostat": { |
| 3640 | + "type": "boolean", |
| 3641 | + "description": "Enables or disables Mirostat sampling.", |
| 3642 | + "example": false |
| 3643 | + }, |
| 3644 | + "mirostat_tau": { |
| 3645 | + "type": "number", |
| 3646 | + "description": "Target entropy for Mirostat sampling.", |
| 3647 | + "example": 5 |
| 3648 | + }, |
| 3649 | + "mirostat_eta": { |
| 3650 | + "type": "number", |
| 3651 | + "description": "Learning rate for Mirostat sampling.", |
| 3652 | + "example": 0.1 |
| 3653 | + }, |
| 3654 | + "penalize_nl": { |
| 3655 | + "type": "boolean", |
| 3656 | + "description": "Whether to penalize newline tokens.", |
| 3657 | + "example": false |
| 3658 | + }, |
| 3659 | + "ignore_eos": { |
| 3660 | + "type": "boolean", |
| 3661 | + "description": "Whether to ignore end-of-sequence tokens during generation.", |
| 3662 | + "example": false |
| 3663 | + }, |
| 3664 | + "n_probs": { |
| 3665 | + "type": "integer", |
| 3666 | + "description": "Number of probabilities to consider for each token.", |
| 3667 | + "example": 0 |
| 3668 | + }, |
| 3669 | + "min_keep": { |
| 3670 | + "type": "integer", |
| 3671 | + "description": "Minimum number of tokens to keep in the buffer.", |
| 3672 | + "example": 0 |
| 3673 | + }, |
| 3674 | + "engine": { |
| 3675 | + "type": "string", |
| 3676 | + "description": "The engine used to run the model.", |
| 3677 | + "example": "llama-cpp" |
| 3678 | + }, |
| 3679 | + "prompt_template": { |
| 3680 | + "type": "string", |
| 3681 | + "description": "Template used for formatting prompts.", |
| 3682 | + "example": "\n\n<|system|>\n{system_message}</s>\n\n\n\n\n<|user|>\n{prompt}</s>\n\n\n<|assistant|>\n\n" |
| 3683 | + }, |
| 3684 | + "ctx_len": { |
| 3685 | + "type": "integer", |
| 3686 | + "description": "Context length for the model.", |
| 3687 | + "example": 4096 |
| 3688 | + }, |
| 3689 | + "n_parallel": { |
| 3690 | + "type": "integer", |
| 3691 | + "description": "Number of parallel threads for execution.", |
| 3692 | + "example": 1 |
| 3693 | + }, |
| 3694 | + "ngl": { |
| 3695 | + "type": "integer", |
| 3696 | + "description": "Number of GPU layers.", |
| 3697 | + "example": 33 |
| 3698 | + } |
| 3699 | + } |
3541 | 3700 | }, |
3542 | 3701 | "DeleteModelResponseDto": { |
3543 | 3702 | "type": "object", |
|
0 commit comments