diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile
index 53a19e853a94..f4d3f9ee6ab2 100644
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@
-LLAMA_VERSION?=d132f22fc92f36848f7ccf2fc9987cd0b0120825
+LLAMA_VERSION?=e62fa13c2497b2cd1958cb496e9489e86bbd5182
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=
diff --git a/backend/go/sam3-cpp/Makefile b/backend/go/sam3-cpp/Makefile
index 689ed490a4fd..3983b596e217 100644
--- a/backend/go/sam3-cpp/Makefile
+++ b/backend/go/sam3-cpp/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# sam3.cpp
SAM3_REPO?=https://github.com/PABannier/sam3.cpp
-SAM3_VERSION?=8cc6e62bc740d7972746fcd47465ddf1c2b1e3c3
+SAM3_VERSION?=01832ef85fcc8eb6488f1d01cd247f07e96ff5a9
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
diff --git a/core/application/watchdog.go b/core/application/watchdog.go
index f11655512e80..c1aee6c7adb5 100644
--- a/core/application/watchdog.go
+++ b/core/application/watchdog.go
@@ -5,6 +5,27 @@ import (
"github.com/mudler/xlog"
)
+// SyncPinnedModelsToWatchdog reads pinned status from all model configs and updates the watchdog
+func (a *Application) SyncPinnedModelsToWatchdog() {
+ cl := a.ModelConfigLoader()
+ if cl == nil {
+ return
+ }
+ wd := a.modelLoader.GetWatchDog()
+ if wd == nil {
+ return
+ }
+ configs := cl.GetAllModelsConfigs()
+ var pinned []string
+ for _, cfg := range configs {
+ if cfg.IsPinned() {
+ pinned = append(pinned, cfg.Name)
+ }
+ }
+ wd.SetPinnedModels(pinned)
+ xlog.Debug("Synced pinned models to watchdog", "count", len(pinned))
+}
+
func (a *Application) StopWatchdog() error {
if a.watchdogStop != nil {
close(a.watchdogStop)
@@ -44,6 +65,9 @@ func (a *Application) startWatchdog() error {
// Set the watchdog on the model loader
a.modelLoader.SetWatchDog(wd)
+ // Sync pinned models from config to the watchdog
+ a.SyncPinnedModelsToWatchdog()
+
// Start watchdog goroutine if any periodic checks are enabled
// LRU eviction doesn't need the Run() loop - it's triggered on model load
// But memory reclaimer needs the Run() loop for periodic checking
@@ -124,5 +148,8 @@ func (a *Application) RestartWatchdog() error {
newWD.RestoreState(oldState)
}
+ // Re-sync pinned models after restart
+ a.SyncPinnedModelsToWatchdog()
+
return nil
}
diff --git a/core/config/model_config.go b/core/config/model_config.go
index d56d92fac585..5f1780b7650d 100644
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -78,6 +78,7 @@ type ModelConfig struct {
Description string `yaml:"description,omitempty" json:"description,omitempty"`
Usage string `yaml:"usage,omitempty" json:"usage,omitempty"`
Disabled *bool `yaml:"disabled,omitempty" json:"disabled,omitempty"`
+ Pinned *bool `yaml:"pinned,omitempty" json:"pinned,omitempty"`
Options []string `yaml:"options,omitempty" json:"options,omitempty"`
Overrides []string `yaml:"overrides,omitempty" json:"overrides,omitempty"`
@@ -554,6 +555,11 @@ func (c *ModelConfig) IsDisabled() bool {
return c.Disabled != nil && *c.Disabled
}
+// IsPinned returns true if the model is pinned (excluded from idle unloading and eviction)
+func (c *ModelConfig) IsPinned() bool {
+ return c.Pinned != nil && *c.Pinned
+}
+
type ModelConfigUsecase int
const (
diff --git a/core/http/endpoints/localai/pin_model.go b/core/http/endpoints/localai/pin_model.go
new file mode 100644
index 000000000000..78861825792f
--- /dev/null
+++ b/core/http/endpoints/localai/pin_model.go
@@ -0,0 +1,144 @@
+package localai
+
+import (
+ "fmt"
+ "net/http"
+ "net/url"
+ "os"
+
+ "github.com/labstack/echo/v4"
+ "github.com/mudler/LocalAI/core/config"
+ "github.com/mudler/LocalAI/pkg/utils"
+
+ "gopkg.in/yaml.v3"
+)
+
+// TogglePinnedModelEndpoint handles pinning or unpinning a model.
+// Pinned models are excluded from idle unloading, LRU eviction, and memory-pressure eviction.
+//
+// @Summary Toggle model pinned status
+// @Description Pin or unpin a model. Pinned models stay loaded and are excluded from automatic eviction.
+// @Tags config
+// @Param name path string true "Model name"
+// @Param action path string true "Action: 'pin' or 'unpin'"
+// @Success 200 {object} ModelResponse
+// @Failure 400 {object} ModelResponse
+// @Failure 404 {object} ModelResponse
+// @Failure 500 {object} ModelResponse
+// @Router /api/models/toggle-pinned/{name}/{action} [put]
+func TogglePinnedModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, syncPinnedFn func()) echo.HandlerFunc {
+ return func(c echo.Context) error {
+ modelName := c.Param("name")
+ if decoded, err := url.PathUnescape(modelName); err == nil {
+ modelName = decoded
+ }
+ if modelName == "" {
+ return c.JSON(http.StatusBadRequest, ModelResponse{
+ Success: false,
+ Error: "Model name is required",
+ })
+ }
+
+ action := c.Param("action")
+ if action != "pin" && action != "unpin" {
+ return c.JSON(http.StatusBadRequest, ModelResponse{
+ Success: false,
+ Error: "Action must be 'pin' or 'unpin'",
+ })
+ }
+
+ // Get existing model config
+ modelConfig, exists := cl.GetModelConfig(modelName)
+ if !exists {
+ return c.JSON(http.StatusNotFound, ModelResponse{
+ Success: false,
+ Error: "Model configuration not found",
+ })
+ }
+
+ // Get the config file path
+ configPath := modelConfig.GetModelConfigFile()
+ if configPath == "" {
+ return c.JSON(http.StatusNotFound, ModelResponse{
+ Success: false,
+ Error: "Model configuration file not found",
+ })
+ }
+
+ // Verify the path is trusted
+ if err := utils.VerifyPath(configPath, appConfig.SystemState.Model.ModelsPath); err != nil {
+ return c.JSON(http.StatusForbidden, ModelResponse{
+ Success: false,
+ Error: "Model configuration not trusted: " + err.Error(),
+ })
+ }
+
+ // Read the existing config file
+ configData, err := os.ReadFile(configPath)
+ if err != nil {
+ return c.JSON(http.StatusInternalServerError, ModelResponse{
+ Success: false,
+ Error: "Failed to read configuration file: " + err.Error(),
+ })
+ }
+
+ // Parse the YAML config as a generic map to preserve all fields
+ var configMap map[string]interface{}
+ if err := yaml.Unmarshal(configData, &configMap); err != nil {
+ return c.JSON(http.StatusInternalServerError, ModelResponse{
+ Success: false,
+ Error: "Failed to parse configuration file: " + err.Error(),
+ })
+ }
+
+ // Update the pinned field
+ pinned := action == "pin"
+ if pinned {
+ configMap["pinned"] = true
+ } else {
+ // Remove the pinned key entirely when unpinning (clean YAML)
+ delete(configMap, "pinned")
+ }
+
+ // Marshal back to YAML
+ updatedData, err := yaml.Marshal(configMap)
+ if err != nil {
+ return c.JSON(http.StatusInternalServerError, ModelResponse{
+ Success: false,
+ Error: "Failed to serialize configuration: " + err.Error(),
+ })
+ }
+
+ // Write updated config back to file
+ if err := os.WriteFile(configPath, updatedData, 0644); err != nil {
+ return c.JSON(http.StatusInternalServerError, ModelResponse{
+ Success: false,
+ Error: "Failed to write configuration file: " + err.Error(),
+ })
+ }
+
+ // Reload model configurations from disk
+ if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
+ return c.JSON(http.StatusInternalServerError, ModelResponse{
+ Success: false,
+ Error: "Failed to reload configurations: " + err.Error(),
+ })
+ }
+
+ // Sync pinned models to the watchdog
+ if syncPinnedFn != nil {
+ syncPinnedFn()
+ }
+
+ msg := fmt.Sprintf("Model '%s' has been %sned successfully.", modelName, action)
+ if pinned {
+ msg += " The model will be excluded from automatic eviction."
+ }
+
+ return c.JSON(http.StatusOK, ModelResponse{
+ Success: true,
+ Message: msg,
+ Filename: configPath,
+ })
+ }
+}
diff --git a/core/http/react-ui/src/pages/Manage.jsx b/core/http/react-ui/src/pages/Manage.jsx
index 4c301dfa73f7..6ac429ff4051 100644
--- a/core/http/react-ui/src/pages/Manage.jsx
+++ b/core/http/react-ui/src/pages/Manage.jsx
@@ -25,6 +25,7 @@ export default function Manage() {
const [confirmDialog, setConfirmDialog] = useState(null)
const [distributedMode, setDistributedMode] = useState(false)
const [togglingModels, setTogglingModels] = useState(new Set())
+ const [pinningModels, setPinningModels] = useState(new Set())
const handleTabChange = (tab) => {
setActiveTab(tab)
@@ -122,6 +123,24 @@ export default function Manage() {
}
}
+ const handleTogglePinned = async (modelId, currentlyPinned) => {
+ const action = currentlyPinned ? 'unpin' : 'pin'
+ setPinningModels(prev => new Set(prev).add(modelId))
+ try {
+ await modelsApi.togglePinned(modelId, action)
+ addToast(`Model ${modelId} ${action}ned`, 'success')
+ refetchModels()
+ } catch (err) {
+ addToast(`Failed to ${action} model: ${err.message}`, 'error')
+ } finally {
+ setPinningModels(prev => {
+ const next = new Set(prev)
+ next.delete(modelId)
+ return next
+ })
+ }
+ }
+
const handleReload = async () => {
setReloading(true)
try {
@@ -303,6 +322,22 @@ export default function Manage() {
)}
+ {/* Pin button - prevents model from being unloaded */}
+
{/* Toggle switch for enabling/disabling model loading on demand */}