Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/cpp/llama-cpp/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

LLAMA_VERSION?=d132f22fc92f36848f7ccf2fc9987cd0b0120825
LLAMA_VERSION?=e62fa13c2497b2cd1958cb496e9489e86bbd5182
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

CMAKE_ARGS?=
Expand Down
2 changes: 1 addition & 1 deletion backend/go/sam3-cpp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

# sam3.cpp
SAM3_REPO?=https://github.com/PABannier/sam3.cpp
SAM3_VERSION?=8cc6e62bc740d7972746fcd47465ddf1c2b1e3c3
SAM3_VERSION?=01832ef85fcc8eb6488f1d01cd247f07e96ff5a9

ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
Expand Down
27 changes: 27 additions & 0 deletions core/application/watchdog.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,27 @@ import (
"github.com/mudler/xlog"
)

// SyncPinnedModelsToWatchdog reads pinned status from all model configs and updates the watchdog
func (a *Application) SyncPinnedModelsToWatchdog() {
cl := a.ModelConfigLoader()
if cl == nil {
return
}
wd := a.modelLoader.GetWatchDog()
if wd == nil {
return
}
configs := cl.GetAllModelsConfigs()
var pinned []string
for _, cfg := range configs {
if cfg.IsPinned() {
pinned = append(pinned, cfg.Name)
}
}
wd.SetPinnedModels(pinned)
xlog.Debug("Synced pinned models to watchdog", "count", len(pinned))
}

func (a *Application) StopWatchdog() error {
if a.watchdogStop != nil {
close(a.watchdogStop)
Expand Down Expand Up @@ -44,6 +65,9 @@ func (a *Application) startWatchdog() error {
// Set the watchdog on the model loader
a.modelLoader.SetWatchDog(wd)

// Sync pinned models from config to the watchdog
a.SyncPinnedModelsToWatchdog()

// Start watchdog goroutine if any periodic checks are enabled
// LRU eviction doesn't need the Run() loop - it's triggered on model load
// But memory reclaimer needs the Run() loop for periodic checking
Expand Down Expand Up @@ -124,5 +148,8 @@ func (a *Application) RestartWatchdog() error {
newWD.RestoreState(oldState)
}

// Re-sync pinned models after restart
a.SyncPinnedModelsToWatchdog()

return nil
}
6 changes: 6 additions & 0 deletions core/config/model_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ type ModelConfig struct {
Description string `yaml:"description,omitempty" json:"description,omitempty"`
Usage string `yaml:"usage,omitempty" json:"usage,omitempty"`
Disabled *bool `yaml:"disabled,omitempty" json:"disabled,omitempty"`
Pinned *bool `yaml:"pinned,omitempty" json:"pinned,omitempty"`

Options []string `yaml:"options,omitempty" json:"options,omitempty"`
Overrides []string `yaml:"overrides,omitempty" json:"overrides,omitempty"`
Expand Down Expand Up @@ -554,6 +555,11 @@ func (c *ModelConfig) IsDisabled() bool {
return c.Disabled != nil && *c.Disabled
}

// IsPinned returns true if the model is pinned (excluded from idle unloading and eviction)
func (c *ModelConfig) IsPinned() bool {
return c.Pinned != nil && *c.Pinned
}

type ModelConfigUsecase int

const (
Expand Down
144 changes: 144 additions & 0 deletions core/http/endpoints/localai/pin_model.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package localai

import (
"fmt"
"net/http"
"net/url"
"os"

"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/utils"

"gopkg.in/yaml.v3"
)

// TogglePinnedModelEndpoint handles pinning or unpinning a model.
// Pinned models are excluded from idle unloading, LRU eviction, and memory-pressure eviction.
//
// @Summary Toggle model pinned status
// @Description Pin or unpin a model. Pinned models stay loaded and are excluded from automatic eviction.
// @Tags config
// @Param name path string true "Model name"
// @Param action path string true "Action: 'pin' or 'unpin'"
// @Success 200 {object} ModelResponse
// @Failure 400 {object} ModelResponse
// @Failure 404 {object} ModelResponse
// @Failure 500 {object} ModelResponse
// @Router /api/models/toggle-pinned/{name}/{action} [put]
func TogglePinnedModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, syncPinnedFn func()) echo.HandlerFunc {
return func(c echo.Context) error {
modelName := c.Param("name")
if decoded, err := url.PathUnescape(modelName); err == nil {
modelName = decoded
}
if modelName == "" {
return c.JSON(http.StatusBadRequest, ModelResponse{
Success: false,
Error: "Model name is required",
})
}

action := c.Param("action")
if action != "pin" && action != "unpin" {
return c.JSON(http.StatusBadRequest, ModelResponse{
Success: false,
Error: "Action must be 'pin' or 'unpin'",
})
}

// Get existing model config
modelConfig, exists := cl.GetModelConfig(modelName)
if !exists {
return c.JSON(http.StatusNotFound, ModelResponse{
Success: false,
Error: "Model configuration not found",
})
}

// Get the config file path
configPath := modelConfig.GetModelConfigFile()
if configPath == "" {
return c.JSON(http.StatusNotFound, ModelResponse{
Success: false,
Error: "Model configuration file not found",
})
}

// Verify the path is trusted
if err := utils.VerifyPath(configPath, appConfig.SystemState.Model.ModelsPath); err != nil {
return c.JSON(http.StatusForbidden, ModelResponse{
Success: false,
Error: "Model configuration not trusted: " + err.Error(),
})
}

// Read the existing config file
configData, err := os.ReadFile(configPath)
if err != nil {
return c.JSON(http.StatusInternalServerError, ModelResponse{
Success: false,
Error: "Failed to read configuration file: " + err.Error(),
})
}

// Parse the YAML config as a generic map to preserve all fields
var configMap map[string]interface{}
if err := yaml.Unmarshal(configData, &configMap); err != nil {
return c.JSON(http.StatusInternalServerError, ModelResponse{
Success: false,
Error: "Failed to parse configuration file: " + err.Error(),
})
}

// Update the pinned field
pinned := action == "pin"
if pinned {
configMap["pinned"] = true
} else {
// Remove the pinned key entirely when unpinning (clean YAML)
delete(configMap, "pinned")
}

// Marshal back to YAML
updatedData, err := yaml.Marshal(configMap)
if err != nil {
return c.JSON(http.StatusInternalServerError, ModelResponse{
Success: false,
Error: "Failed to serialize configuration: " + err.Error(),
})
}

// Write updated config back to file
if err := os.WriteFile(configPath, updatedData, 0644); err != nil {
return c.JSON(http.StatusInternalServerError, ModelResponse{
Success: false,
Error: "Failed to write configuration file: " + err.Error(),
})
}

// Reload model configurations from disk
if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
return c.JSON(http.StatusInternalServerError, ModelResponse{
Success: false,
Error: "Failed to reload configurations: " + err.Error(),
})
}

// Sync pinned models to the watchdog
if syncPinnedFn != nil {
syncPinnedFn()
}

msg := fmt.Sprintf("Model '%s' has been %sned successfully.", modelName, action)
if pinned {
msg += " The model will be excluded from automatic eviction."
}

return c.JSON(http.StatusOK, ModelResponse{
Success: true,
Message: msg,
Filename: configPath,
})
}
}
35 changes: 35 additions & 0 deletions core/http/react-ui/src/pages/Manage.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export default function Manage() {
const [confirmDialog, setConfirmDialog] = useState(null)
const [distributedMode, setDistributedMode] = useState(false)
const [togglingModels, setTogglingModels] = useState(new Set())
const [pinningModels, setPinningModels] = useState(new Set())

const handleTabChange = (tab) => {
setActiveTab(tab)
Expand Down Expand Up @@ -122,6 +123,24 @@ export default function Manage() {
}
}

const handleTogglePinned = async (modelId, currentlyPinned) => {
const action = currentlyPinned ? 'unpin' : 'pin'
setPinningModels(prev => new Set(prev).add(modelId))
try {
await modelsApi.togglePinned(modelId, action)
addToast(`Model ${modelId} ${action}ned`, 'success')
refetchModels()
} catch (err) {
addToast(`Failed to ${action} model: ${err.message}`, 'error')
} finally {
setPinningModels(prev => {
const next = new Set(prev)
next.delete(modelId)
return next
})
}
}

const handleReload = async () => {
setReloading(true)
try {
Expand Down Expand Up @@ -303,6 +322,22 @@ export default function Manage() {
<i className="fas fa-stop" />
</button>
)}
{/* Pin button - prevents model from being unloaded */}
<button
className="btn btn-sm"
onClick={() => handleTogglePinned(model.id, model.pinned)}
disabled={pinningModels.has(model.id) || model.disabled}
title={model.pinned ? 'Unpin model (allow idle unloading)' : 'Pin model (prevent idle unloading)'}
style={{
padding: '2px 6px',
minWidth: 28,
color: model.pinned ? 'var(--color-warning, #f59e0b)' : 'var(--color-text-muted)',
opacity: model.disabled ? 0.3 : (pinningModels.has(model.id) ? 0.5 : 1),
cursor: pinningModels.has(model.id) ? 'wait' : (model.disabled ? 'not-allowed' : 'pointer'),
}}
>
<i className={`fas fa-thumbtack${pinningModels.has(model.id) ? ' fa-spin' : ''}`} />
</button>
{/* Toggle switch for enabling/disabling model loading on demand */}
<label
title={model.disabled ? 'Model is disabled — click to enable loading on demand' : 'Model is enabled — click to disable loading on demand'}
Expand Down
1 change: 1 addition & 0 deletions core/http/react-ui/src/utils/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ export const modelsApi = {
getEditConfig: (name) => fetchJSON(API_CONFIG.endpoints.modelEditGet(name)),
editConfig: (name, body) => postJSON(API_CONFIG.endpoints.modelEdit(name), body),
toggleState: (name, action) => fetchJSON(API_CONFIG.endpoints.modelToggleState(name, action), { method: 'PUT' }),
togglePinned: (name, action) => fetchJSON(API_CONFIG.endpoints.modelTogglePinned(name, action), { method: 'PUT' }),
getConfigMetadata: (section) => fetchJSON(
section ? `${API_CONFIG.endpoints.configMetadata}?section=${section}`
: API_CONFIG.endpoints.configMetadata
Expand Down
1 change: 1 addition & 0 deletions core/http/react-ui/src/utils/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ export const API_CONFIG = {
modelEditGet: (name) => `/api/models/edit/${name}`,
modelEdit: (name) => `/models/edit/${name}`,
modelToggleState: (name, action) => `/models/toggle-state/${name}/${action}`,
modelTogglePinned: (name, action) => `/models/toggle-pinned/${name}/${action}`,
backendsAvailable: '/backends/available',
backendsInstalled: '/backends',
version: '/version',
Expand Down
5 changes: 5 additions & 0 deletions core/http/routes/localai.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ func RegisterLocalAIRoutes(router *echo.Echo,
// Toggle model enable/disable endpoint
router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, appConfig), adminMiddleware)

// Toggle model pinned status endpoint
router.PUT("/models/toggle-pinned/:name/:action", localai.TogglePinnedModelEndpoint(cl, appConfig, func() {
app.SyncPinnedModelsToWatchdog()
}), adminMiddleware)

// Reload models endpoint
router.POST("/models/reload", localai.ReloadModelsEndpoint(cl, appConfig), adminMiddleware)
}
Expand Down
2 changes: 2 additions & 0 deletions core/http/routes/ui_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
Capabilities []string `json:"capabilities"`
Backend string `json:"backend"`
Disabled bool `json:"disabled"`
Pinned bool `json:"pinned"`
}

result := make([]modelCapability, 0, len(modelConfigs)+len(modelsWithoutConfig))
Expand All @@ -524,6 +525,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
Capabilities: cfg.KnownUsecaseStrings,
Backend: cfg.Backend,
Disabled: cfg.IsDisabled(),
Pinned: cfg.IsPinned(),
})
}
for _, name := range modelsWithoutConfig {
Expand Down
2 changes: 1 addition & 1 deletion gallery/z-image-ggml.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ config_file: |
- vae_path:ae.safetensors
- offload_params_to_cpu:true
parameters:
model: z_image_turbo-Q4_K.gguf
model: z_image_turbo-Q4_0.gguf
step: 25
Loading
Loading