diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 53a19e853a94..f4d3f9ee6ab2 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=d132f22fc92f36848f7ccf2fc9987cd0b0120825 +LLAMA_VERSION?=e62fa13c2497b2cd1958cb496e9489e86bbd5182 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= diff --git a/backend/go/sam3-cpp/Makefile b/backend/go/sam3-cpp/Makefile index 689ed490a4fd..3983b596e217 100644 --- a/backend/go/sam3-cpp/Makefile +++ b/backend/go/sam3-cpp/Makefile @@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1) # sam3.cpp SAM3_REPO?=https://github.com/PABannier/sam3.cpp -SAM3_VERSION?=8cc6e62bc740d7972746fcd47465ddf1c2b1e3c3 +SAM3_VERSION?=01832ef85fcc8eb6488f1d01cd247f07e96ff5a9 ifeq ($(NATIVE),false) CMAKE_ARGS+=-DGGML_NATIVE=OFF diff --git a/core/application/watchdog.go b/core/application/watchdog.go index f11655512e80..c1aee6c7adb5 100644 --- a/core/application/watchdog.go +++ b/core/application/watchdog.go @@ -5,6 +5,27 @@ import ( "github.com/mudler/xlog" ) +// SyncPinnedModelsToWatchdog reads pinned status from all model configs and updates the watchdog +func (a *Application) SyncPinnedModelsToWatchdog() { + cl := a.ModelConfigLoader() + if cl == nil { + return + } + wd := a.modelLoader.GetWatchDog() + if wd == nil { + return + } + configs := cl.GetAllModelsConfigs() + var pinned []string + for _, cfg := range configs { + if cfg.IsPinned() { + pinned = append(pinned, cfg.Name) + } + } + wd.SetPinnedModels(pinned) + xlog.Debug("Synced pinned models to watchdog", "count", len(pinned)) +} + func (a *Application) StopWatchdog() error { if a.watchdogStop != nil { close(a.watchdogStop) @@ -44,6 +65,9 @@ func (a *Application) startWatchdog() error { // Set the watchdog on the model loader a.modelLoader.SetWatchDog(wd) + // Sync pinned models from config to the watchdog + a.SyncPinnedModelsToWatchdog() + // Start watchdog goroutine if any periodic checks are enabled // LRU eviction doesn't need the Run() loop - it's triggered on model load // But memory reclaimer needs the Run() loop for periodic checking @@ -124,5 +148,8 @@ func (a *Application) RestartWatchdog() error { newWD.RestoreState(oldState) } + // Re-sync pinned models after restart + a.SyncPinnedModelsToWatchdog() + return nil } diff --git a/core/config/model_config.go b/core/config/model_config.go index d56d92fac585..5f1780b7650d 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -78,6 +78,7 @@ type ModelConfig struct { Description string `yaml:"description,omitempty" json:"description,omitempty"` Usage string `yaml:"usage,omitempty" json:"usage,omitempty"` Disabled *bool `yaml:"disabled,omitempty" json:"disabled,omitempty"` + Pinned *bool `yaml:"pinned,omitempty" json:"pinned,omitempty"` Options []string `yaml:"options,omitempty" json:"options,omitempty"` Overrides []string `yaml:"overrides,omitempty" json:"overrides,omitempty"` @@ -554,6 +555,11 @@ func (c *ModelConfig) IsDisabled() bool { return c.Disabled != nil && *c.Disabled } +// IsPinned returns true if the model is pinned (excluded from idle unloading and eviction) +func (c *ModelConfig) IsPinned() bool { + return c.Pinned != nil && *c.Pinned +} + type ModelConfigUsecase int const ( diff --git a/core/http/endpoints/localai/pin_model.go b/core/http/endpoints/localai/pin_model.go new file mode 100644 index 000000000000..78861825792f --- /dev/null +++ b/core/http/endpoints/localai/pin_model.go @@ -0,0 +1,144 @@ +package localai + +import ( + "fmt" + "net/http" + "net/url" + "os" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/pkg/utils" + + "gopkg.in/yaml.v3" +) + +// TogglePinnedModelEndpoint handles pinning or unpinning a model. +// Pinned models are excluded from idle unloading, LRU eviction, and memory-pressure eviction. +// +// @Summary Toggle model pinned status +// @Description Pin or unpin a model. Pinned models stay loaded and are excluded from automatic eviction. +// @Tags config +// @Param name path string true "Model name" +// @Param action path string true "Action: 'pin' or 'unpin'" +// @Success 200 {object} ModelResponse +// @Failure 400 {object} ModelResponse +// @Failure 404 {object} ModelResponse +// @Failure 500 {object} ModelResponse +// @Router /api/models/toggle-pinned/{name}/{action} [put] +func TogglePinnedModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, syncPinnedFn func()) echo.HandlerFunc { + return func(c echo.Context) error { + modelName := c.Param("name") + if decoded, err := url.PathUnescape(modelName); err == nil { + modelName = decoded + } + if modelName == "" { + return c.JSON(http.StatusBadRequest, ModelResponse{ + Success: false, + Error: "Model name is required", + }) + } + + action := c.Param("action") + if action != "pin" && action != "unpin" { + return c.JSON(http.StatusBadRequest, ModelResponse{ + Success: false, + Error: "Action must be 'pin' or 'unpin'", + }) + } + + // Get existing model config + modelConfig, exists := cl.GetModelConfig(modelName) + if !exists { + return c.JSON(http.StatusNotFound, ModelResponse{ + Success: false, + Error: "Model configuration not found", + }) + } + + // Get the config file path + configPath := modelConfig.GetModelConfigFile() + if configPath == "" { + return c.JSON(http.StatusNotFound, ModelResponse{ + Success: false, + Error: "Model configuration file not found", + }) + } + + // Verify the path is trusted + if err := utils.VerifyPath(configPath, appConfig.SystemState.Model.ModelsPath); err != nil { + return c.JSON(http.StatusForbidden, ModelResponse{ + Success: false, + Error: "Model configuration not trusted: " + err.Error(), + }) + } + + // Read the existing config file + configData, err := os.ReadFile(configPath) + if err != nil { + return c.JSON(http.StatusInternalServerError, ModelResponse{ + Success: false, + Error: "Failed to read configuration file: " + err.Error(), + }) + } + + // Parse the YAML config as a generic map to preserve all fields + var configMap map[string]interface{} + if err := yaml.Unmarshal(configData, &configMap); err != nil { + return c.JSON(http.StatusInternalServerError, ModelResponse{ + Success: false, + Error: "Failed to parse configuration file: " + err.Error(), + }) + } + + // Update the pinned field + pinned := action == "pin" + if pinned { + configMap["pinned"] = true + } else { + // Remove the pinned key entirely when unpinning (clean YAML) + delete(configMap, "pinned") + } + + // Marshal back to YAML + updatedData, err := yaml.Marshal(configMap) + if err != nil { + return c.JSON(http.StatusInternalServerError, ModelResponse{ + Success: false, + Error: "Failed to serialize configuration: " + err.Error(), + }) + } + + // Write updated config back to file + if err := os.WriteFile(configPath, updatedData, 0644); err != nil { + return c.JSON(http.StatusInternalServerError, ModelResponse{ + Success: false, + Error: "Failed to write configuration file: " + err.Error(), + }) + } + + // Reload model configurations from disk + if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil { + return c.JSON(http.StatusInternalServerError, ModelResponse{ + Success: false, + Error: "Failed to reload configurations: " + err.Error(), + }) + } + + // Sync pinned models to the watchdog + if syncPinnedFn != nil { + syncPinnedFn() + } + + msg := fmt.Sprintf("Model '%s' has been %sned successfully.", modelName, action) + if pinned { + msg += " The model will be excluded from automatic eviction." + } + + return c.JSON(http.StatusOK, ModelResponse{ + Success: true, + Message: msg, + Filename: configPath, + }) + } +} diff --git a/core/http/react-ui/src/pages/Manage.jsx b/core/http/react-ui/src/pages/Manage.jsx index 4c301dfa73f7..6ac429ff4051 100644 --- a/core/http/react-ui/src/pages/Manage.jsx +++ b/core/http/react-ui/src/pages/Manage.jsx @@ -25,6 +25,7 @@ export default function Manage() { const [confirmDialog, setConfirmDialog] = useState(null) const [distributedMode, setDistributedMode] = useState(false) const [togglingModels, setTogglingModels] = useState(new Set()) + const [pinningModels, setPinningModels] = useState(new Set()) const handleTabChange = (tab) => { setActiveTab(tab) @@ -122,6 +123,24 @@ export default function Manage() { } } + const handleTogglePinned = async (modelId, currentlyPinned) => { + const action = currentlyPinned ? 'unpin' : 'pin' + setPinningModels(prev => new Set(prev).add(modelId)) + try { + await modelsApi.togglePinned(modelId, action) + addToast(`Model ${modelId} ${action}ned`, 'success') + refetchModels() + } catch (err) { + addToast(`Failed to ${action} model: ${err.message}`, 'error') + } finally { + setPinningModels(prev => { + const next = new Set(prev) + next.delete(modelId) + return next + }) + } + } + const handleReload = async () => { setReloading(true) try { @@ -303,6 +322,22 @@ export default function Manage() { )} + {/* Pin button - prevents model from being unloaded */} + {/* Toggle switch for enabling/disabling model loading on demand */}