From 6a20c4da4b628ab711b61c6ca26eb8fff2190f28 Mon Sep 17 00:00:00 2001 From: Jared Pleva Date: Thu, 2 Apr 2026 04:49:38 +0000 Subject: [PATCH] =?UTF-8?q?fix(setup):=20remote=20Ollama=20support=20+=20d?= =?UTF-8?q?ogfood=20readiness=20=E2=80=94=20closes=20#76?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - setup: add Remote Ollama option on headless Linux (prompts for OLLAMA_HOST instead of defaulting to API-drivers-only) - setup: offer Goose install on all platforms — it works headlessly and pairs with remote Ollama for server deployments - serve: mustOllama() gives a useful error for remote endpoints (no longer says "run ollama serve" for non-localhost hosts) - scheduler: add inference: remote to ServeConfig — skips RAM-based concurrency detection, defaults to max_parallel=4 (GPU-bound) - agents.yaml: document inference: remote option as a comment - main.go: version sentinel changed from stale "0.4.8" to "dev" (goreleaser ldflags still inject the real version at release time) - 5 new scheduler tests (inference field parsing, New() behavior) Co-Authored-By: Claude Sonnet 4.6 --- agents.yaml | 1 + cmd/shellforge/main.go | 61 +++++++++++++++---- internal/scheduler/scheduler.go | 10 +++- internal/scheduler/scheduler_test.go | 90 ++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 13 deletions(-) create mode 100644 internal/scheduler/scheduler_test.go diff --git a/agents.yaml b/agents.yaml index 9946fed..f02a593 100644 --- a/agents.yaml +++ b/agents.yaml @@ -5,6 +5,7 @@ max_parallel: 0 # 0 = auto-detect from available RAM log_dir: outputs/logs model_ram_gb: 19 # qwen3:30b Q4_K_M — adjust for your model +# inference: remote # uncomment for remote Ollama (RunPod/GPU server) — disables RAM-based concurrency agents: - name: qa-agent diff --git a/cmd/shellforge/main.go b/cmd/shellforge/main.go index b4fa52d..5e2ffdb 100644 --- a/cmd/shellforge/main.go +++ b/cmd/shellforge/main.go @@ -25,7 +25,7 @@ import ( "github.com/AgentGuardHQ/shellforge/internal/scheduler" ) -var version = "0.4.8" +var version = "dev" func main() { if len(os.Args) < 2 { @@ -147,13 +147,35 @@ total := 6 // ── Detect environment ── isServer := !hasGPU() && runtime.GOOS == "linux" model := "" +remoteOllamaHost := "" // set if user configures a remote Ollama endpoint -// ── Step 1: Ollama (skip on headless server) ── +// ── Step 1: Ollama (local install or remote endpoint) ── steps++ if isServer { -fmt.Printf("── Step %d/%d: Ollama (skipped — server mode) ──\n", steps, total) -fmt.Println(" Detected: Linux, no GPU — skipping local model setup") -fmt.Println(" Use CLI drivers instead: shellforge run claude, copilot, codex, gemini") +fmt.Printf("── Step %d/%d: Ollama ──\n", steps, total) +fmt.Println(" Detected: Linux, no GPU") +fmt.Println(" Options:") +fmt.Println(" 1) Configure remote Ollama endpoint (OLLAMA_HOST)") +fmt.Println(" 2) Use API drivers only (Claude, Copilot, Codex, Gemini)") +fmt.Print(" Pick [2]: ") +serverChoice := readLine(reader) +if strings.TrimSpace(serverChoice) == "1" { +fmt.Print(" Remote Ollama URL [http://localhost:11434]: ") +hostInput := strings.TrimSpace(readLine(reader)) +if hostInput == "" { +hostInput = "http://localhost:11434" +} +remoteOllamaHost = hostInput +fmt.Printf(" → OLLAMA_HOST=%s\n", remoteOllamaHost) +if ollama.IsRunning() { +fmt.Printf(" ✓ Ollama reachable at %s\n", remoteOllamaHost) +} else { +fmt.Printf(" ⚠ Ollama not reachable at %s — verify it is running\n", remoteOllamaHost) +} +fmt.Printf(" Tip: export OLLAMA_HOST=%s\n", remoteOllamaHost) +} else { +fmt.Println(" Skipping Ollama — use CLI drivers: shellforge run claude, copilot, codex, gemini") +} fmt.Println() } else { fmt.Printf("── Step %d/%d: Ollama (local LLM inference) ──\n", steps, total) @@ -296,10 +318,9 @@ fmt.Println() steps++ fmt.Printf("── Step %d/%d: Agent drivers ──\n", steps, total) -// On Mac/GPU: offer Goose (local models via Ollama). On server: skip, show API drivers. -if !isServer { +// Offer Goose on all platforms — it runs headlessly and works with remote Ollama. if _, err := exec.LookPath("goose"); err != nil { -fmt.Println(" Goose — AI agent with native Ollama support (actually executes tools)") +fmt.Println(" Goose — AI agent with native Ollama support (works headlessly)") fmt.Print(" Install Goose? [Y/n] ") if confirm(reader) { fmt.Println(" → Installing Goose...") @@ -310,14 +331,17 @@ run("sh", "-c", "curl -fsSL https://github.com/block/goose/releases/download/sta } if _, err := exec.LookPath("goose"); err == nil { fmt.Println(" ✓ Goose installed") +if remoteOllamaHost != "" { +fmt.Printf(" → Run 'goose configure' and set Ollama host to %s\n", remoteOllamaHost) +} else { fmt.Println(" → Run 'goose configure' to set up Ollama provider") +} } else { fmt.Println(" ⚠ Install failed — try: brew install --cask block-goose") } } } else { -fmt.Println(" ✓ Goose installed (local model driver)") -} +fmt.Println(" ✓ Goose installed") } // Show API-based drivers @@ -393,14 +417,22 @@ fmt.Println("║ Setup Complete ║") fmt.Println("╚══════════════════════════════════════╝") fmt.Println() if isServer { +if remoteOllamaHost != "" { +fmt.Println(" Server mode — remote Ollama configured:") +fmt.Printf(" export OLLAMA_HOST=%s\n", remoteOllamaHost) +fmt.Println(" shellforge run goose \"describe this project\"") +fmt.Println(" shellforge serve agents.yaml") +fmt.Println() +fmt.Println(" Or use CLI drivers:") +} else { fmt.Println(" Server mode — use CLI drivers:") +} fmt.Println(" shellforge run claude \"review open PRs\"") fmt.Println(" shellforge run copilot \"update docs\"") fmt.Println(" shellforge run codex \"generate tests\"") fmt.Println() fmt.Println(" Run a swarm:") -fmt.Println(" shellforge swarm # start Dagu dashboard") -fmt.Println(" dagu start dags/multi-driver-swarm.yaml") +fmt.Println(" shellforge swarm # start Octi Pulpo dashboard") } else { fmt.Println(" Quick start:") fmt.Println(" shellforge run goose \"describe this project\"") @@ -1060,7 +1092,12 @@ return eng func mustOllama() { if !ollama.IsRunning() { +if strings.HasPrefix(ollama.Host, "http://localhost") || strings.HasPrefix(ollama.Host, "http://127.") { fmt.Fprintln(os.Stderr, "ERROR: Ollama not running. Start: ollama serve") +} else { +fmt.Fprintf(os.Stderr, "ERROR: Ollama not reachable at %s\n", ollama.Host) +fmt.Fprintln(os.Stderr, " Check that the remote Ollama server is running and accessible.") +} os.Exit(1) } } diff --git a/internal/scheduler/scheduler.go b/internal/scheduler/scheduler.go index 9e97bf8..2df1e07 100644 --- a/internal/scheduler/scheduler.go +++ b/internal/scheduler/scheduler.go @@ -35,6 +35,7 @@ type ServeConfig struct { MaxParallel int `yaml:"max_parallel"` // 0 = auto-detect LogDir string `yaml:"log_dir"` ModelRAM int `yaml:"model_ram_gb"` // estimated model RAM in GB, default 19 + Inference string `yaml:"inference"` // "remote" disables RAM-based concurrency detection Agents []AgentConfig `yaml:"agents"` } @@ -132,7 +133,14 @@ func detectTotalRAM() uint64 { func New(cfg *ServeConfig, run RunFunc) *Scheduler { maxP := cfg.MaxParallel if maxP <= 0 { - maxP = DetectMaxParallel(cfg.ModelRAM) + if cfg.Inference == "remote" { + // Remote inference: GPU VRAM is the bottleneck, not local RAM. + // Default to 4 concurrent slots; user can override via max_parallel. + maxP = 4 + fmt.Println("[scheduler] inference=remote — max_parallel=4 (GPU-bound, not RAM-bound)") + } else { + maxP = DetectMaxParallel(cfg.ModelRAM) + } } os.MkdirAll(cfg.LogDir, 0o755) return &Scheduler{ diff --git a/internal/scheduler/scheduler_test.go b/internal/scheduler/scheduler_test.go new file mode 100644 index 0000000..b7e8f31 --- /dev/null +++ b/internal/scheduler/scheduler_test.go @@ -0,0 +1,90 @@ +package scheduler + +import ( + "os" + "testing" + + "gopkg.in/yaml.v3" +) + +func TestServeConfigInferenceField(t *testing.T) { + yamlInput := ` +max_parallel: 0 +log_dir: outputs/logs +model_ram_gb: 8 +inference: remote +agents: [] +` + var cfg ServeConfig + if err := yaml.Unmarshal([]byte(yamlInput), &cfg); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if cfg.Inference != "remote" { + t.Errorf("inference = %q, want %q", cfg.Inference, "remote") + } +} + +func TestServeConfigInferenceDefaultsEmpty(t *testing.T) { + yamlInput := ` +max_parallel: 0 +log_dir: outputs/logs +model_ram_gb: 8 +agents: [] +` + var cfg ServeConfig + if err := yaml.Unmarshal([]byte(yamlInput), &cfg); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if cfg.Inference != "" { + t.Errorf("inference = %q, want empty", cfg.Inference) + } +} + +func TestNewSchedulerRemoteInferenceDefaultsToFour(t *testing.T) { + cfg := &ServeConfig{ + MaxParallel: 0, + Inference: "remote", + LogDir: t.TempDir(), + ModelRAM: 8, + } + var noopRun RunFunc = func(name, system, prompt string, timeoutSec int) error { return nil } + sched := New(cfg, noopRun) + if cap(sched.slots) != 4 { + t.Errorf("remote inference: slots cap = %d, want 4", cap(sched.slots)) + } +} + +func TestNewSchedulerRemoteInferenceRespectsExplicitMaxParallel(t *testing.T) { + cfg := &ServeConfig{ + MaxParallel: 8, + Inference: "remote", + LogDir: t.TempDir(), + ModelRAM: 8, + } + var noopRun RunFunc = func(name, system, prompt string, timeoutSec int) error { return nil } + sched := New(cfg, noopRun) + if cap(sched.slots) != 8 { + t.Errorf("remote inference with explicit max_parallel: slots cap = %d, want 8", cap(sched.slots)) + } +} + +func TestLoadConfigInferenceField(t *testing.T) { + content := ` +max_parallel: 0 +log_dir: outputs/logs +model_ram_gb: 8 +inference: remote +agents: [] +` + path := t.TempDir() + "/agents.yaml" + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatalf("write temp config: %v", err) + } + cfg, err := LoadConfig(path) + if err != nil { + t.Fatalf("LoadConfig: %v", err) + } + if cfg.Inference != "remote" { + t.Errorf("loaded inference = %q, want %q", cfg.Inference, "remote") + } +}