From b7b3cde8f0bf4981ae35685c22b970110e95b7f3 Mon Sep 17 00:00:00 2001 From: Yolean k8s-qa Date: Sun, 21 Jun 2026 14:04:29 +0000 Subject: [PATCH 1/3] feat(lifetime): cost-control auto-expiry for clusters A dev cluster left running after a task is paused or finished is pure cost. A lifetime.maxRun budget makes the cluster expire on its own, taking the cheapest environment-appropriate action. Model: - Trigger is an absolute TTL (maxRun, a Go duration), counted from when the cluster STARTS, re-anchored on every `y-cluster start`. An appliance disk may boot days after it was built, so the countdown must begin at boot, not at provision. - The trigger lives where the cost lives. LOCAL (qemu): a host-side timer fires `y-cluster lifetime reap`, which runs onExpiry (stop by default; pause or teardown opt-in). The host is the cost, so a host timer is correct. CLOUD (GCP appliance): GCP-native --max-run-duration deletes the instance with no dependency on this host or the cluster staying up; the boot=no data disk survives. Surface: - config: lifetime{maxRun,onExpiry} on CommonConfig, validated; schemas regenerated. - qemu state sidecar: additive lifetime/onExpiry/expiresAt (no stateVersion bump); armed at provision, re-anchored on start. - pkg/lifetime: host timer (systemd-run --user, at fallback) and the GCP flag translation; reap re-checks the deadline and re-arms if not due, so extend is safe and a stale timer is harmless. - cmd: `y-cluster lifetime` status/reap/extend/arm/disarm/gcp-flags; provision/start arm the host timer, stop/teardown disarm it. qemu-only for the local provisioner today, matching the rest of the lifecycle surface; docker/multipass keep the "not yet implemented" shape. Idle-based expiry and an in-cluster reaper are deferred. Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 22 +- cmd/y-cluster/lifecycle.go | 6 + cmd/y-cluster/lifetime.go | 332 +++++++++++++++++++++ cmd/y-cluster/main.go | 7 + e2e/lifetime_test.go | 132 ++++++++ pkg/lifetime/gcp.go | 33 ++ pkg/lifetime/gcp_test.go | 49 +++ pkg/lifetime/timer.go | 183 ++++++++++++ pkg/lifetime/timer_test.go | 80 +++++ pkg/provision/config/common.go | 118 +++++++- pkg/provision/config/lifetime_test.go | 113 +++++++ pkg/provision/qemu/lifecycle.go | 9 + pkg/provision/qemu/lifetime.go | 51 ++++ pkg/provision/qemu/lifetime_state_test.go | 179 +++++++++++ pkg/provision/qemu/qemu.go | 28 +- pkg/provision/qemu/state.go | 143 ++++++++- pkg/provision/schema/common.schema.json | 24 ++ pkg/provision/schema/docker.schema.json | 24 ++ pkg/provision/schema/multipass.schema.json | 24 ++ pkg/provision/schema/qemu.schema.json | 24 ++ 20 files changed, 1555 insertions(+), 26 deletions(-) create mode 100644 cmd/y-cluster/lifetime.go create mode 100644 e2e/lifetime_test.go create mode 100644 pkg/lifetime/gcp.go create mode 100644 pkg/lifetime/gcp_test.go create mode 100644 pkg/lifetime/timer.go create mode 100644 pkg/lifetime/timer_test.go create mode 100644 pkg/provision/config/lifetime_test.go create mode 100644 pkg/provision/qemu/lifetime.go create mode 100644 pkg/provision/qemu/lifetime_state_test.go diff --git a/README.md b/README.md index 5d94e40..397b6fc 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,13 @@ Subcommand groups: and arbitrary user-built images. - **cache info / purge** — inspect or wipe y-cluster's shared download cache (k3s airgap bundles, image OCI layouts). +- **lifetime status / reap / extend / arm / disarm / gcp-flags** — + cost-control auto-expiry. A `lifetime.maxRun` in the config gives + the cluster a wall-clock budget counted from when it starts; on + expiry a local cluster runs its `onExpiry` action (stop by + default) via a host timer, and a GCP appliance is deleted by GCP + itself (`gcp-flags` emits the `--max-run-duration` flags). See the + "lifetime" idea below. - **serve / serve ensure / serve stop / serve logs** — a lightweight HTTP server that exposes config assets to the cluster: kustomize-built Secrets named @@ -42,7 +49,20 @@ context. The README is intentionally short — when something is discoverable from `y-cluster --help`, that's where it lives. -## Two ideas worth knowing before you start +## Three ideas worth knowing before you start + +**lifetime: the budget is counted from start, and the trigger lives +where the cost is.** `lifetime.maxRun` is a wall-clock budget that +begins when the cluster *starts* (re-anchored on every `y-cluster +start`), not when it was provisioned — an appliance disk may boot +days after it was built. Locally the host *is* the cost, so a host +timer fires `y-cluster lifetime reap`, which stops the cluster (or +the configured `onExpiry` action). On a GCP appliance the host +mustn't be the trigger (it may be offline), so `lifetime gcp-flags` +hands the duration to GCP's native `--max-run-duration`, and GCP +deletes the instance on its own — the attached data disk survives. +`reap` re-checks the persisted deadline and re-arms if it isn't due, +so `lifetime extend 2h` is safe and a stale timer is harmless. **yconverge: ordering vs checks come from different places.** CUE imports in `yconverge.cue` declare ordering — each import is diff --git a/cmd/y-cluster/lifecycle.go b/cmd/y-cluster/lifecycle.go index 736aa40..31f7115 100644 --- a/cmd/y-cluster/lifecycle.go +++ b/cmd/y-cluster/lifecycle.go @@ -55,6 +55,9 @@ func stopCmd() *cobra.Command { } switch lr.Backend { case cluster.BackendQEMU: + // A manual stop ends this run's budget; remove the + // host expiry timer. `start` re-arms a fresh window. + disarmHostTimer(contextName, logger) return qemu.Stop(qemuCacheDir(), lr.ClusterName, logger) case cluster.BackendDocker: return docker.Stop(ctx, lr.ClusterName, logger) @@ -240,6 +243,9 @@ func startCmd() *cobra.Command { if err != nil { return err } + // Start re-anchored the deadline to now; install the host + // timer for the fresh window. + armHostTimerIfLifetime(qemuCacheDir(), clusterName, contextName, logger) logger.Info("cluster started", zap.String("context", c.Context()), ) diff --git a/cmd/y-cluster/lifetime.go b/cmd/y-cluster/lifetime.go new file mode 100644 index 0000000..dd6331c --- /dev/null +++ b/cmd/y-cluster/lifetime.go @@ -0,0 +1,332 @@ +package main + +import ( + "errors" + "fmt" + "os" + "time" + + "github.com/spf13/cobra" + "go.uber.org/zap" + + "github.com/Yolean/y-cluster/pkg/cluster" + "github.com/Yolean/y-cluster/pkg/lifetime" + "github.com/Yolean/y-cluster/pkg/provision/config" + "github.com/Yolean/y-cluster/pkg/provision/qemu" +) + +// lifetimeCmd is the cost-control auto-expiry surface. A dev cluster +// left running after a task is paused or finished is pure cost; a +// lifetime budget makes the cluster expire on its own. +// +// Two enforcement paths, picked by where the cost lives: +// - LOCAL (qemu): a host-side timer (status/reap/extend/arm/disarm +// below) runs the onExpiry action. The host is the cost, so a +// host timer is the right trigger. +// - CLOUD (GCP appliance): `gcp-flags` emits gcloud scheduling +// flags so GCP itself deletes the instance at the deadline -- no +// host or cluster dependency. This is the only correct trigger +// for a paid cloud resource. +// +// The local subcommands are qemu-only today, matching the rest of +// the lifecycle surface; the qemu state sidecar is where the +// deadline lives. +func lifetimeCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "lifetime", + Short: "Cost-control auto-expiry: stop/decommission a cluster that is no longer needed", + } + cmd.AddCommand( + lifetimeStatusCmd(), + lifetimeReapCmd(), + lifetimeExtendCmd(), + lifetimeArmCmd(), + lifetimeDisarmCmd(), + lifetimeGCPFlagsCmd(), + ) + return cmd +} + +// armHostTimerIfLifetime installs the host-side reap timer when the +// qemu cluster has an armed deadline. Called from provision/start +// after the deadline has been (re)anchored. Best-effort: a failure +// to arm is logged, not fatal -- the deadline is persisted and +// `y-cluster lifetime reap` (by hand or from any scheduler) remains +// the backstop. No-op when no lifetime is configured. +func armHostTimerIfLifetime(cacheDir, name, contextName string, logger *zap.Logger) { + ls, err := qemu.LoadLifetime(cacheDir, name) + if err != nil || !ls.Enabled() || ls.ExpiresAt.IsZero() { + return + } + bin, err := os.Executable() + if err != nil { + logger.Warn("could not resolve binary path to arm lifetime timer", zap.Error(err)) + return + } + if err := lifetime.Arm(bin, contextName, ls.Remaining(), logger); err != nil { + logger.Warn("could not arm lifetime host timer", zap.Error(err)) + } +} + +// disarmHostTimer removes the host-side reap timer for a context. +// Called from stop/teardown. Best-effort by design. +func disarmHostTimer(contextName string, logger *zap.Logger) { + _ = lifetime.Disarm(contextName, logger) +} + +// resolveQemuCluster maps a kubeconfig context to the qemu cache dir +// + cluster name that the lifetime sidecar is keyed on. Works for a +// stopped cluster too (the context survives in kubeconfig), unlike +// cluster.Lookup which needs a running runtime. +func resolveQemuCluster(contextName string) (cacheDir, name string, err error) { + name, err = cluster.ResolveClusterName("", contextName) + if err != nil { + return "", "", err + } + if name == "" { + return "", "", fmt.Errorf("kubeconfig context %q has no associated cluster", contextName) + } + return qemuCacheDir(), name, nil +} + +// lifetimeStateErr renders the missing-sidecar case as a clear +// qemu-only message rather than a raw "no such file" error. +func lifetimeStateErr(name string, err error) error { + if errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("no lifetime state for cluster %q; lifetime is implemented for the qemu provider only", name) + } + return err +} + +func lifetimeStatusCmd() *cobra.Command { + var contextName string + cmd := &cobra.Command{ + Use: "status", + Short: "Show the cluster's lifetime policy and time remaining", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + cacheDir, name, err := resolveQemuCluster(contextName) + if err != nil { + return err + } + ls, err := qemu.LoadLifetime(cacheDir, name) + if err != nil { + return lifetimeStateErr(name, err) + } + out := cmd.OutOrStdout() + if !ls.Enabled() { + fmt.Fprintf(out, "lifetime: disabled (no maxRun) for %q\n", name) + return nil + } + fmt.Fprintf(out, "cluster: %s\n", name) + fmt.Fprintf(out, "maxRun: %s\n", ls.MaxRun) + fmt.Fprintf(out, "onExpiry: %s\n", ls.OnExpiry) + if ls.ExpiresAt.IsZero() { + fmt.Fprintln(out, "expiresAt: (not armed; run `y-cluster start` or `y-cluster lifetime arm`)") + return nil + } + fmt.Fprintf(out, "expiresAt: %s\n", ls.ExpiresAt.Format(time.RFC3339)) + rem := ls.Remaining().Round(time.Second) + if rem < 0 { + fmt.Fprintf(out, "remaining: EXPIRED (%s ago)\n", (-rem).String()) + } else { + fmt.Fprintf(out, "remaining: %s\n", rem) + } + return nil + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + return cmd +} + +func lifetimeReapCmd() *cobra.Command { + var contextName string + cmd := &cobra.Command{ + Use: "reap", + Short: "Run the expiry action if the deadline has passed; otherwise re-arm", + Long: `reap is what the host timer fires at the deadline. It is +idempotent and self-healing: it re-reads the persisted deadline and +acts only if it has truly elapsed. If the deadline was pushed out +(e.g. via ` + "`lifetime extend`" + `) since the timer was set, reap +simply re-arms for the remaining window and exits. Safe to run by +hand or from an external cron.`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + logger := loggerFromContext(cmd.Context()) + cacheDir, name, err := resolveQemuCluster(contextName) + if err != nil { + return err + } + ls, err := qemu.LoadLifetime(cacheDir, name) + if err != nil { + return lifetimeStateErr(name, err) + } + if !ls.Enabled() { + logger.Info("no lifetime configured; nothing to reap", zap.String("cluster", name)) + return nil + } + if ls.ExpiresAt.IsZero() { + logger.Info("lifetime not armed; nothing to reap", zap.String("cluster", name)) + return nil + } + if !ls.Expired() { + rem := ls.Remaining() + if bin, err := os.Executable(); err == nil { + if err := lifetime.Arm(bin, contextName, rem, logger); err != nil { + logger.Warn("could not re-arm host timer", zap.Error(err)) + } + } + logger.Info("not yet expired; re-armed", + zap.String("cluster", name), zap.Duration("remaining", rem.Round(time.Second))) + return nil + } + + logger.Info("lifetime expired; reaping", + zap.String("cluster", name), zap.String("onExpiry", ls.OnExpiry)) + switch ls.OnExpiry { + case config.OnExpiryPause: + err = qemu.Pause(cacheDir, name, logger) + case config.OnExpiryTeardown: + err = qemu.TeardownByName(cacheDir, name, false, logger) + default: // stop is the default and the empty-value behaviour + err = qemu.Stop(cacheDir, name, logger) + } + if err != nil { + return err + } + // Action performed: remove the host timer (best-effort; + // reap's recheck makes a stray timer harmless anyway). + _ = lifetime.Disarm(contextName, logger) + return nil + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + return cmd +} + +func lifetimeExtendCmd() *cobra.Command { + var contextName string + cmd := &cobra.Command{ + Use: "extend ", + Short: "Push the deadline out by (e.g. 2h) and re-arm", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + logger := loggerFromContext(cmd.Context()) + d, err := time.ParseDuration(args[0]) + if err != nil { + return fmt.Errorf("invalid duration %q: %w", args[0], err) + } + if d <= 0 { + return fmt.Errorf("extend duration must be positive, got %q", args[0]) + } + cacheDir, name, err := resolveQemuCluster(contextName) + if err != nil { + return err + } + nt, err := qemu.ExtendDeadline(cacheDir, name, d) + if err != nil { + return lifetimeStateErr(name, err) + } + if bin, err := os.Executable(); err == nil { + if err := lifetime.Arm(bin, contextName, time.Until(nt), logger); err != nil { + logger.Warn("could not re-arm host timer", zap.Error(err)) + } + } + fmt.Fprintf(cmd.OutOrStdout(), "extended; expiresAt %s\n", nt.Format(time.RFC3339)) + return nil + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + return cmd +} + +func lifetimeArmCmd() *cobra.Command { + var contextName string + cmd := &cobra.Command{ + Use: "arm", + Short: "(Re)install the host timer that fires the expiry action", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + logger := loggerFromContext(cmd.Context()) + cacheDir, name, err := resolveQemuCluster(contextName) + if err != nil { + return err + } + ls, err := qemu.LoadLifetime(cacheDir, name) + if err != nil { + return lifetimeStateErr(name, err) + } + if !ls.Enabled() { + return fmt.Errorf("no lifetime configured for %q; set lifetime.maxRun and re-provision", name) + } + if ls.ExpiresAt.IsZero() { + return fmt.Errorf("no deadline armed for %q; `y-cluster start` re-anchors it", name) + } + bin, err := os.Executable() + if err != nil { + return err + } + return lifetime.Arm(bin, contextName, ls.Remaining(), logger) + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + return cmd +} + +func lifetimeDisarmCmd() *cobra.Command { + var contextName string + cmd := &cobra.Command{ + Use: "disarm", + Short: "Remove the host timer (the persisted deadline is left intact)", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + logger := loggerFromContext(cmd.Context()) + return lifetime.Disarm(contextName, logger) + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + return cmd +} + +func lifetimeGCPFlagsCmd() *cobra.Command { + var configDir string + cmd := &cobra.Command{ + Use: "gcp-flags", + Short: "Print gcloud instances-create flags that enforce the lifetime cloud-side", + Long: `Reads lifetime.maxRun from the y-cluster-provision.yaml in -c + and prints the matching +` + "`--max-run-duration=s --instance-termination-action=DELETE`" + ` +flags for ` + "`gcloud compute instances create`" + `. Prints nothing +when no lifetime is configured, so a build script can append the +output unconditionally: + + EXTRA=$(y-cluster lifetime gcp-flags -c "$CONFIG_DIR") + gcloud compute instances create ... $EXTRA`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + loaded, err := loadProvision(configDir) + if err != nil { + return err + } + acc, ok := loaded.(interface { + LifetimePolicy() config.LifetimeConfig + }) + if !ok { + return nil // provider with no lifetime surface: emit nothing + } + flags, err := lifetime.GCPFlags(acc.LifetimePolicy().MaxRun) + if err != nil { + return err + } + if flags != "" { + fmt.Fprintln(cmd.OutOrStdout(), flags) + } + return nil + }, + } + cmd.Flags().StringVarP(&configDir, "config", "c", "", "directory containing y-cluster-provision.yaml") + if err := cmd.MarkFlagRequired("config"); err != nil { + panic(err) + } + return cmd +} diff --git a/cmd/y-cluster/main.go b/cmd/y-cluster/main.go index ee7ab6b..b1c06d5 100644 --- a/cmd/y-cluster/main.go +++ b/cmd/y-cluster/main.go @@ -131,6 +131,7 @@ func rootCmd() *cobra.Command { root.AddCommand(prepareExportCmd()) root.AddCommand(exportCmd()) root.AddCommand(importCmd()) + root.AddCommand(lifetimeCmd()) root.AddCommand(serveCmd()) root.AddCommand(imagesCmd()) root.AddCommand(manifestsCmd()) @@ -361,6 +362,9 @@ message naming what was checked.`, if _, err := qemu.Provision(cmd.Context(), rt, logger); err != nil { return err } + // Provision armed the deadline; install the host-side + // timer that fires the local expiry action. + armHostTimerIfLifetime(rt.CacheDir, rt.Name, rt.Context, logger) logger.Info("cluster ready", zap.String("ssh", fmt.Sprintf("ssh -p %s -i %s ystack@localhost", rt.SSHPort, filepath.Join(rt.CacheDir, rt.Name+"-ssh"))), @@ -411,6 +415,9 @@ func teardownCmd() *cobra.Command { } switch v := loaded.(type) { case *config.QEMUConfig: + // Remove the host expiry timer before the cluster goes; + // the deadline is moot once teardown removes the sidecar. + disarmHostTimer(v.Context, logger) return qemu.TeardownConfig(qemu.FromConfig(v), keepDisk, logger) case *config.DockerConfig: // docker has no persistent disk; keepDisk is diff --git a/e2e/lifetime_test.go b/e2e/lifetime_test.go new file mode 100644 index 0000000..2e309d1 --- /dev/null +++ b/e2e/lifetime_test.go @@ -0,0 +1,132 @@ +//go:build e2e && kvm + +package e2e + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "go.uber.org/zap" + + "github.com/Yolean/y-cluster/pkg/provision/qemu" +) + +// TestQemu_Lifetime exercises the local cost-control auto-expiry path +// against a real qemu boot: +// +// - Provision with a lifetime budget; assert the deadline is armed +// in the sidecar and anchored to roughly now+budget. +// - Push the deadline into the past (the time-travel a real expiry +// would reach naturally) and assert Expired() flips. +// - Run the onExpiry action (stop) and assert the VM is down with +// disk + sidecar preserved -- the cost is gone, the cluster is +// resumable. +// - Start and assert the deadline is re-anchored to this start (the +// "count from when the cluster starts" guarantee), giving a fresh +// budget rather than an already-expired one. +// +// The host timer (systemd-run/at) is unit-tested in pkg/lifetime; +// here we cover the runtime substance: persistence, expiry detection, +// the real stop action, and the start re-anchor. +func TestQemu_Lifetime(t *testing.T) { + if _, err := os.Stat("/dev/kvm"); err != nil { + t.Skip("QEMU tests require /dev/kvm") + } + if err := qemu.CheckPrerequisites(); err != nil { + t.Skip(err) + } + + logger, _ := zap.NewDevelopment() + cfg := e2eQEMURuntime() + cfg.Name = "y-cluster-e2e-lifetime" + cfg.Context = "y-cluster-e2e-lifetime" + cfg.CacheDir = t.TempDir() + cfg.Memory = "4096" + cfg.CPUs = "2" + cfg.SSHPort = "2227" + cfg.PortForwards = e2eUniqueForwards("26447", "28447") + cfg.Kubeconfig = os.Getenv("KUBECONFIG") + if cfg.Kubeconfig == "" { + t.Skip("KUBECONFIG must be set") + } + cfg.Lifetime = "2h" + cfg.OnExpiry = "stop" + t.Setenv("Y_CLUSTER_QEMU_CACHE_DIR", cfg.CacheDir) + + ctx := context.Background() + + cluster, err := qemu.Provision(ctx, cfg, logger) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = cluster.Teardown(false) }) + + // Deadline armed at provision, anchored ~now+2h. + ls, err := qemu.LoadLifetime(cfg.CacheDir, cfg.Name) + if err != nil { + t.Fatalf("LoadLifetime after provision: %v", err) + } + if !ls.Enabled() { + t.Fatal("lifetime should be enabled after provision with maxRun set") + } + if ls.ExpiresAt.IsZero() { + t.Fatal("ExpiresAt should be armed after provision") + } + if rem := time.Until(ls.ExpiresAt); rem < 90*time.Minute || rem > 2*time.Hour+5*time.Minute { + t.Fatalf("provision deadline %s out of expected ~2h window (remaining %s)", ls.ExpiresAt, rem) + } + provisionDeadline := ls.ExpiresAt + + // Simulate the deadline elapsing: push it three hours into the + // past so it is unambiguously due. + if _, err := qemu.ExtendDeadline(cfg.CacheDir, cfg.Name, -3*time.Hour); err != nil { + t.Fatalf("ExtendDeadline (negative, to force expiry): %v", err) + } + ls, err = qemu.LoadLifetime(cfg.CacheDir, cfg.Name) + if err != nil { + t.Fatal(err) + } + if !ls.Expired() { + t.Fatalf("deadline %s should be expired after pushing it into the past", ls.ExpiresAt) + } + + // The onExpiry action: stop. Pidfile gone, disk + sidecar kept. + vmPid := readPid(t, cfg.CacheDir, cfg.Name) + if err := qemu.Stop(cfg.CacheDir, cfg.Name, logger); err != nil { + t.Fatalf("Stop (reap action): %v", err) + } + if _, err := os.Stat(filepath.Join(cfg.CacheDir, cfg.Name+".pid")); !os.IsNotExist(err) { + t.Fatalf("pidfile should be gone after expiry stop; stat err=%v", err) + } + if _, err := os.Stat(cluster.DiskPath()); err != nil { + t.Fatalf("disk should be preserved after expiry stop: %v", err) + } + if _, err := os.Stat(filepath.Join(cfg.CacheDir, cfg.Name+".json")); err != nil { + t.Fatalf("state sidecar should be preserved after expiry stop: %v", err) + } + assertPidGone(t, vmPid) + + // Start re-anchors the deadline to now: a stopped-then-started + // cluster gets a fresh budget, not the expired one we forced. + cluster2, err := qemu.Start(ctx, cfg.CacheDir, cfg.Name, logger) + if err != nil { + t.Fatalf("Start: %v", err) + } + _ = cluster2 + assertNodeReady(t, cfg.Context, cfg.Kubeconfig) + + ls, err = qemu.LoadLifetime(cfg.CacheDir, cfg.Name) + if err != nil { + t.Fatal(err) + } + if ls.Expired() { + t.Fatal("deadline should be re-anchored (not expired) after Start") + } + if !ls.ExpiresAt.After(provisionDeadline) { + t.Fatalf("start deadline %s should be later than the original provision deadline %s", + ls.ExpiresAt, provisionDeadline) + } +} diff --git a/pkg/lifetime/gcp.go b/pkg/lifetime/gcp.go new file mode 100644 index 0000000..ca9509a --- /dev/null +++ b/pkg/lifetime/gcp.go @@ -0,0 +1,33 @@ +package lifetime + +import ( + "fmt" + "time" +) + +// GCPFlags renders the `gcloud compute instances create` scheduling +// flags that enforce a cluster lifetime CLOUD-side. GCP measures +// max-run-duration from when the instance STARTS and then performs +// the termination action -- here DELETE -- with no dependency on the +// provisioning host or on the cluster staying up. That is exactly +// the "anchor to start, never host-bound" contract the appliance +// needs: the disk image carries only the duration, never an absolute +// deadline baked in at build time. +// +// Returns "" (no flags) when no budget is configured. The duration +// is normalized to integer seconds so gcloud's duration parser can +// never disagree with Go's time.ParseDuration. +func GCPFlags(maxRun string) (string, error) { + if maxRun == "" || maxRun == "0" { + return "", nil + } + d, err := time.ParseDuration(maxRun) + if err != nil { + return "", fmt.Errorf("lifetime maxRun %q is not a valid Go duration: %w", maxRun, err) + } + if d <= 0 { + return "", fmt.Errorf("lifetime maxRun must be positive, got %q", maxRun) + } + secs := int(d / time.Second) + return fmt.Sprintf("--max-run-duration=%ds --instance-termination-action=DELETE", secs), nil +} diff --git a/pkg/lifetime/gcp_test.go b/pkg/lifetime/gcp_test.go new file mode 100644 index 0000000..ab766d2 --- /dev/null +++ b/pkg/lifetime/gcp_test.go @@ -0,0 +1,49 @@ +package lifetime + +import ( + "strings" + "testing" +) + +func TestGCPFlags(t *testing.T) { + tests := []struct { + in string + want string + wantErr bool + }{ + {"", "", false}, + {"0", "", false}, + {"8h", "--max-run-duration=28800s --instance-termination-action=DELETE", false}, + {"90m", "--max-run-duration=5400s --instance-termination-action=DELETE", false}, + {"banana", "", true}, + {"-5m", "", true}, + } + for _, tt := range tests { + got, err := GCPFlags(tt.in) + if tt.wantErr { + if err == nil { + t.Errorf("GCPFlags(%q): expected error, got %q", tt.in, got) + } + continue + } + if err != nil { + t.Errorf("GCPFlags(%q): unexpected error %v", tt.in, err) + continue + } + if got != tt.want { + t.Errorf("GCPFlags(%q) = %q, want %q", tt.in, got, tt.want) + } + } +} + +// The DELETE action is what preserves the separately-attached data +// disk while stopping compute billing -- guard it explicitly. +func TestGCPFlags_DeletesInstance(t *testing.T) { + got, err := GCPFlags("1h") + if err != nil { + t.Fatal(err) + } + if !strings.Contains(got, "--instance-termination-action=DELETE") { + t.Fatalf("expected DELETE termination action, got %q", got) + } +} diff --git a/pkg/lifetime/timer.go b/pkg/lifetime/timer.go new file mode 100644 index 0000000..9a8dd41 --- /dev/null +++ b/pkg/lifetime/timer.go @@ -0,0 +1,183 @@ +// Package lifetime arms and disarms the host-side timer that fires a +// cluster's auto-expiry action. It is the LOCAL trigger: for a local +// dev cluster the host machine is itself the cost, so a host timer is +// the right place for the trigger (if the host sleeps or logs out, +// the VM is down too, so there is nothing to reap). Paid CLOUD +// resources must NOT be reaped from the provisioning host -- that +// path uses cloud-enforced expiry (GCP max-run-duration) instead and +// never goes through this package. +// +// The timer runs ` lifetime reap --context=` at the +// deadline. reap re-reads the persisted deadline and acts only if it +// has truly elapsed, otherwise it re-arms for the remaining window. +// That idempotency makes a stale timer (e.g. one left behind after an +// `extend`) harmless, which is why Disarm is best-effort. +package lifetime + +import ( + "fmt" + "os/exec" + "strings" + "time" + + "go.uber.org/zap" +) + +// reapInvocation is the argv tail every backend schedules: the +// y-cluster subcommand that performs the expiry check + action. +func reapInvocation(bin, kubeContext string) []string { + return []string{bin, "lifetime", "reap", "--context=" + kubeContext} +} + +// unitName is the transient systemd unit name for a context's timer. +// Sanitized to the systemd unit charset; the context is already +// DNS-label-ish but a kubeconfig context can in principle carry +// characters systemd rejects, so map anything outside [a-z0-9-] to +// '-'. +func unitName(kubeContext string) string { + var b strings.Builder + b.WriteString("y-cluster-lifetime-") + for _, r := range kubeContext { + switch { + case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9', r == '-': + b.WriteRune(r) + default: + b.WriteRune('-') + } + } + return b.String() +} + +// remainingSeconds clamps a deadline-relative duration to a minimum +// of one second so the timer is always in the future even if the +// deadline is already (nearly) here -- in which case reap fires +// almost immediately and acts. +func remainingSeconds(remaining time.Duration) int { + s := int(remaining / time.Second) + if s < 1 { + return 1 + } + return s +} + +// systemdRunArgs builds the argv for arming via a transient +// `systemd-run --user` timer. `--on-active` is relative to now, so a +// computed remaining window arms the deadline; `--unit` names it so +// status/disarm can find it. +func systemdRunArgs(bin, kubeContext string, remaining time.Duration) []string { + args := []string{ + "--user", + "--unit=" + unitName(kubeContext), + fmt.Sprintf("--on-active=%ds", remainingSeconds(remaining)), + "--timer-property=AccuracySec=1s", + "--", + } + return append(args, reapInvocation(bin, kubeContext)...) +} + +// atTimeSpec renders the `at` time argument. at granularity is +// minutes, so round up to at least one minute. +func atTimeSpec(remaining time.Duration) string { + mins := int((remaining + time.Minute - 1) / time.Minute) + if mins < 1 { + mins = 1 + } + return fmt.Sprintf("now + %d minutes", mins) +} + +// atScript is the shell line piped to `at`. The trailing comment is a +// stable marker so Disarm can find this job among the user's at queue +// (at has no job naming). +func atScript(bin, kubeContext string) string { + return strings.Join(reapInvocation(bin, kubeContext), " ") + + " # " + unitName(kubeContext) +} + +// Arm schedules the reap for `remaining` from now via systemd-run +// (preferred) or `at` (fallback). It disarms any existing timer for +// the context first so re-arming is idempotent. A nil error means a +// timer is in place; an error means no host timer was armed (the +// persisted deadline still stands, so a manual or external +// `lifetime reap` remains the backstop). +func Arm(bin, kubeContext string, remaining time.Duration, logger *zap.Logger) error { + if logger == nil { + logger = zap.NewNop() + } + _ = Disarm(kubeContext, logger) // best-effort; ignore "nothing to remove" + + if _, err := exec.LookPath("systemd-run"); err == nil { + args := systemdRunArgs(bin, kubeContext, remaining) + out, err := exec.Command("systemd-run", args...).CombinedOutput() + if err == nil { + logger.Info("lifetime timer armed (systemd)", + zap.String("unit", unitName(kubeContext)), + zap.Duration("in", remaining)) + return nil + } + // User bus may be unavailable (e.g. headless without linger); + // fall through to at rather than failing outright. + logger.Debug("systemd-run failed; trying at", + zap.Error(err), zap.ByteString("output", out)) + } + + if _, err := exec.LookPath("at"); err == nil { + cmd := exec.Command("at", strings.Fields(atTimeSpec(remaining))...) + cmd.Stdin = strings.NewReader(atScript(bin, kubeContext) + "\n") + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("at scheduling failed: %w: %s", err, strings.TrimSpace(string(out))) + } + logger.Info("lifetime timer armed (at)", zap.Duration("in", remaining)) + return nil + } + + return fmt.Errorf("no host scheduler available (need systemd-run --user or at); " + + "the deadline is persisted but will not fire automatically - " + + "run `y-cluster lifetime reap` from a cron/timer of your choosing") +} + +// Disarm removes the context's host timer. Best-effort by design: +// reap re-checks the persisted deadline, so a leftover timer that +// fires is a no-op. Returns nil when nothing needed removing. +func Disarm(kubeContext string, logger *zap.Logger) error { + if logger == nil { + logger = zap.NewNop() + } + if _, err := exec.LookPath("systemctl"); err == nil { + // Stopping a transient timer unit also cleans it up. + _ = exec.Command("systemctl", "--user", "stop", unitName(kubeContext)+".timer").Run() + } + if _, err := exec.LookPath("atq"); err == nil { + for _, id := range atJobIDsFor(kubeContext) { + if _, err := exec.LookPath("atrm"); err == nil { + _ = exec.Command("atrm", id).Run() + } + } + } + return nil +} + +// atJobIDsFor returns at(1) job ids whose script carries this +// context's marker. Best-effort: any error yields no ids. +func atJobIDsFor(kubeContext string) []string { + out, err := exec.Command("atq").Output() + if err != nil { + return nil + } + marker := unitName(kubeContext) + var ids []string + for _, line := range strings.Split(string(out), "\n") { + fields := strings.Fields(line) + if len(fields) == 0 { + continue + } + id := fields[0] + body, err := exec.Command("at", "-c", id).Output() + if err != nil { + continue + } + if strings.Contains(string(body), marker) { + ids = append(ids, id) + } + } + return ids +} diff --git a/pkg/lifetime/timer_test.go b/pkg/lifetime/timer_test.go new file mode 100644 index 0000000..4fa8480 --- /dev/null +++ b/pkg/lifetime/timer_test.go @@ -0,0 +1,80 @@ +package lifetime + +import ( + "strings" + "testing" + "time" +) + +func TestUnitName_Sanitizes(t *testing.T) { + tests := map[string]string{ + "local": "y-cluster-lifetime-local", + "alice-dev1": "y-cluster-lifetime-alice-dev1", + "weird/ctx @1": "y-cluster-lifetime-weird-ctx--1", + } + for in, want := range tests { + if got := unitName(in); got != want { + t.Errorf("unitName(%q) = %q, want %q", in, got, want) + } + } +} + +func TestRemainingSeconds_FloorIsOne(t *testing.T) { + if got := remainingSeconds(-5 * time.Minute); got != 1 { + t.Errorf("past-due remaining should floor to 1s, got %d", got) + } + if got := remainingSeconds(0); got != 1 { + t.Errorf("zero remaining should floor to 1s, got %d", got) + } + if got := remainingSeconds(90 * time.Second); got != 90 { + t.Errorf("remainingSeconds(90s) = %d, want 90", got) + } +} + +func TestSystemdRunArgs(t *testing.T) { + args := systemdRunArgs("/usr/local/bin/y-cluster", "local", 8*time.Hour) + joined := strings.Join(args, " ") + for _, want := range []string{ + "--user", + "--unit=y-cluster-lifetime-local", + "--on-active=28800s", + "--", + "/usr/local/bin/y-cluster lifetime reap --context=local", + } { + if !strings.Contains(joined, want) { + t.Errorf("systemd-run args missing %q in: %s", want, joined) + } + } +} + +func TestAtTimeSpec_RoundsUpToMinute(t *testing.T) { + tests := map[time.Duration]string{ + 30 * time.Second: "now + 1 minutes", + 90 * time.Second: "now + 2 minutes", + 8 * time.Hour: "now + 480 minutes", + -1 * time.Minute: "now + 1 minutes", + } + for in, want := range tests { + if got := atTimeSpec(in); got != want { + t.Errorf("atTimeSpec(%v) = %q, want %q", in, got, want) + } + } +} + +func TestAtScript_CarriesMarkerAndCommand(t *testing.T) { + s := atScript("/usr/local/bin/y-cluster", "alice-dev1") + if !strings.Contains(s, "/usr/local/bin/y-cluster lifetime reap --context=alice-dev1") { + t.Errorf("at script missing reap command: %s", s) + } + if !strings.Contains(s, "# y-cluster-lifetime-alice-dev1") { + t.Errorf("at script missing disarm marker: %s", s) + } +} + +func TestReapInvocation(t *testing.T) { + got := reapInvocation("y-cluster", "local") + want := []string{"y-cluster", "lifetime", "reap", "--context=local"} + if strings.Join(got, " ") != strings.Join(want, " ") { + t.Errorf("reapInvocation = %v, want %v", got, want) + } +} diff --git a/pkg/provision/config/common.go b/pkg/provision/config/common.go index c6df467..76a8a81 100644 --- a/pkg/provision/config/common.go +++ b/pkg/provision/config/common.go @@ -30,6 +30,8 @@ // keys is portable across providers package config +import "time" + // Provider IDs. Single source of truth for both the per-provider // `Validate()` checks and the `enum` constraint on // CommonConfig.Provider — schemagen reads AllProviders to build @@ -54,16 +56,87 @@ var AllProviders = []string{ProviderDocker, ProviderMultipass, ProviderQEMU} // Per-provider Validate() must call validateCommon to enforce the // shared invariants (provider discriminator, k3s.version present). type CommonConfig struct { - Provider string `yaml:"provider" json:"provider" jsonschema:"description=Provisioner to use. Optional in the common schema -- when omitted at provision time the host is probed (multipass daemon reachable -> multipass; Linux+/dev/kvm+qemu-system-x86_64 -> qemu; else reachable docker daemon -> docker). Per-provider schemas narrow this to a single literal and keep it required."` - Name string `yaml:"name,omitempty" json:"name,omitempty" jsonschema:"default=y-cluster,description=Cluster instance identifier; used as the docker container name / qemu -name / kubeconfig cluster name / prefix for cache files."` - Context string `yaml:"context,omitempty" json:"context,omitempty" jsonschema:"default=local,description=kubeconfig context name to write."` - Memory string `yaml:"memory,omitempty" json:"memory,omitempty" jsonschema:"default=8192,description=Memory in MB. qemu allocates this to the VM; docker passes it to --memory."` - CPUs string `yaml:"cpus,omitempty" json:"cpus,omitempty" jsonschema:"default=4,description=vCPU count. qemu sets -smp; docker passes --cpus."` - K3s K3sConfig `yaml:"k3s,omitempty" json:"k3s,omitempty" jsonschema:"description=k3s install settings. Defaults track pkg/provision/config/k3s.yaml."` - PortForwards []PortForward `yaml:"portForwards,omitempty" json:"portForwards,omitempty" jsonschema:"description=Host->guest TCP port forwards. Defaults to 6443/80/443 when omitted. Must include a guest:6443 entry so the host's kubectl can reach the API server."` - Registries Registries `yaml:"registries,omitempty" json:"registries,omitempty" jsonschema:"description=k3s registries.yaml content. Written to /etc/rancher/k3s/registries.yaml on the node before k3s starts. ${VAR} substitution is supported on credential and endpoint fields."` - Gateway GatewayConfig `yaml:"gateway,omitempty" json:"gateway,omitempty" jsonschema:"description=Bundled Envoy Gateway install. Skip the install entirely (no CRDs, controller, or GatewayClass) by setting skip:true; rename the default GatewayClass via name."` - Storage StorageConfig `yaml:"storage,omitempty" json:"storage,omitempty" jsonschema:"description=Bundled local-path-provisioner install. Defaults give a predictable on-disk layout (/data/yolean/_) and Retain reclaim so PV content survives PVC delete and an appliance upgrade rebinds the same directory by name."` + Provider string `yaml:"provider" json:"provider" jsonschema:"description=Provisioner to use. Optional in the common schema -- when omitted at provision time the host is probed (multipass daemon reachable -> multipass; Linux+/dev/kvm+qemu-system-x86_64 -> qemu; else reachable docker daemon -> docker). Per-provider schemas narrow this to a single literal and keep it required."` + Name string `yaml:"name,omitempty" json:"name,omitempty" jsonschema:"default=y-cluster,description=Cluster instance identifier; used as the docker container name / qemu -name / kubeconfig cluster name / prefix for cache files."` + Context string `yaml:"context,omitempty" json:"context,omitempty" jsonschema:"default=local,description=kubeconfig context name to write."` + Memory string `yaml:"memory,omitempty" json:"memory,omitempty" jsonschema:"default=8192,description=Memory in MB. qemu allocates this to the VM; docker passes it to --memory."` + CPUs string `yaml:"cpus,omitempty" json:"cpus,omitempty" jsonschema:"default=4,description=vCPU count. qemu sets -smp; docker passes --cpus."` + K3s K3sConfig `yaml:"k3s,omitempty" json:"k3s,omitempty" jsonschema:"description=k3s install settings. Defaults track pkg/provision/config/k3s.yaml."` + PortForwards []PortForward `yaml:"portForwards,omitempty" json:"portForwards,omitempty" jsonschema:"description=Host->guest TCP port forwards. Defaults to 6443/80/443 when omitted. Must include a guest:6443 entry so the host's kubectl can reach the API server."` + Registries Registries `yaml:"registries,omitempty" json:"registries,omitempty" jsonschema:"description=k3s registries.yaml content. Written to /etc/rancher/k3s/registries.yaml on the node before k3s starts. ${VAR} substitution is supported on credential and endpoint fields."` + Gateway GatewayConfig `yaml:"gateway,omitempty" json:"gateway,omitempty" jsonschema:"description=Bundled Envoy Gateway install. Skip the install entirely (no CRDs, controller, or GatewayClass) by setting skip:true; rename the default GatewayClass via name."` + Storage StorageConfig `yaml:"storage,omitempty" json:"storage,omitempty" jsonschema:"description=Bundled local-path-provisioner install. Defaults give a predictable on-disk layout (/data/yolean/_) and Retain reclaim so PV content survives PVC delete and an appliance upgrade rebinds the same directory by name."` + Lifetime LifetimeConfig `yaml:"lifetime,omitempty" json:"lifetime,omitempty" jsonschema:"description=Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables."` +} + +// LifetimeConfig is the cluster-level cost-control policy. A dev +// cluster left running after a task is paused or finished is pure +// cost (host RAM/CPU locally, hourly billing in cloud); a lifetime +// makes the cluster expire on its own. +// +// The budget is always counted from when the cluster STARTS, never +// from provision time. This matters for the appliance flow: a disk +// is provisioned, exported, then imported and booted cloud-side +// possibly days later -- the countdown must begin at that boot. On +// the GCP path this falls out for free because the duration is +// handed to GCP's native max-run-duration, which GCP measures from +// instance start; locally the deadline is recomputed on each +// `y-cluster start`. +// +// MaxRun empty (or "0") disables the whole feature -- a cluster +// with no lifetime runs until manually stopped, the historical +// behaviour. +type LifetimeConfig struct { + // MaxRun is the wall-clock budget as a Go duration string + // (e.g. "8h", "90m", "24h"). Empty disables. Validated to + // parse via time.ParseDuration and be strictly positive. + MaxRun string `yaml:"maxRun,omitempty" json:"maxRun,omitempty" jsonschema:"description=Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry."` + + // OnExpiry is the action a LOCAL cluster takes when MaxRun + // elapses: stop (graceful, disk preserved -- the default and + // cheapest reversible action), pause (SIGSTOP; RAM stays + // reserved), or teardown (delete). Ignored on the GCP + // appliance path, which always decommissions via instance + // delete. Defaulted to stop when MaxRun is set. + OnExpiry string `yaml:"onExpiry,omitempty" json:"onExpiry,omitempty" jsonschema:"enum=stop,enum=pause,enum=teardown,default=stop,description=Local action on expiry. Ignored on the GCP appliance path (always deletes the instance)."` +} + +// Lifetime action names. Single source of truth for the OnExpiry +// enum and the reaper's dispatch switch. +const ( + OnExpiryStop = "stop" + OnExpiryPause = "pause" + OnExpiryTeardown = "teardown" +) + +// AllOnExpiry is the canonical OnExpiry value list, used by +// validation error messages. +var AllOnExpiry = []string{OnExpiryStop, OnExpiryPause, OnExpiryTeardown} + +// LifetimePolicy returns the configured lifetime. Promoted to every +// provider config via CommonConfig embedding, so a caller holding an +// `any` from LoadProvision can read the budget without switching on +// the concrete provider type. +func (c CommonConfig) LifetimePolicy() LifetimeConfig { return c.Lifetime } + +// Enabled reports whether a lifetime budget is configured. +func (l LifetimeConfig) Enabled() bool { + return l.MaxRun != "" && l.MaxRun != "0" +} + +// MaxRunDuration parses MaxRun. Returns (0, nil) when disabled so +// callers can treat "no lifetime" and "zero budget" uniformly; a +// non-nil error means MaxRun is set but unparseable, which Validate +// rejects up front. +func (l LifetimeConfig) MaxRunDuration() (time.Duration, error) { + if !l.Enabled() { + return 0, nil + } + d, err := time.ParseDuration(l.MaxRun) + if err != nil { + return 0, errInvalid("lifetime.maxRun %q is not a valid Go duration (e.g. 8h, 90m): %w", l.MaxRun, err) + } + return d, nil } // StorageConfig controls the local-path-provisioner install that @@ -328,9 +401,34 @@ func (c *CommonConfig) validateCommon(expected string) error { if c.K3s.Version == "" { return errInvalid("k3s.version is empty; check pkg/provision/config/k3s.yaml") } + if err := c.Lifetime.validate(); err != nil { + return err + } return nil } +// validate enforces the lifetime invariants. A disabled lifetime +// (empty MaxRun) is always valid. When set, MaxRun must parse to a +// strictly positive duration and OnExpiry must name a known action. +func (l LifetimeConfig) validate() error { + if !l.Enabled() { + return nil + } + d, err := l.MaxRunDuration() + if err != nil { + return err + } + if d <= 0 { + return errInvalid("lifetime.maxRun must be positive, got %q", l.MaxRun) + } + switch l.OnExpiry { + case "", OnExpiryStop, OnExpiryPause, OnExpiryTeardown: + return nil + default: + return errInvalid("lifetime.onExpiry must be one of %v, got %q", AllOnExpiry, l.OnExpiry) + } +} + // requireHostAPIPort enforces the guest:6443 PortForwards invariant // for host-tunneled providers. qemu and docker call this from their // own Validate; multipass does not because the host dials the VM IP diff --git a/pkg/provision/config/lifetime_test.go b/pkg/provision/config/lifetime_test.go new file mode 100644 index 0000000..b574b24 --- /dev/null +++ b/pkg/provision/config/lifetime_test.go @@ -0,0 +1,113 @@ +package config + +import ( + "strings" + "testing" + "time" +) + +func TestLifetime_DisabledByDefault(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{Provider: ProviderQEMU}} + c.ApplyDefaults() + if c.Lifetime.Enabled() { + t.Fatalf("lifetime should be disabled when maxRun is unset, got %+v", c.Lifetime) + } + if err := c.Validate(); err != nil { + t.Fatalf("disabled lifetime must validate, got %v", err) + } +} + +func TestLifetime_DefaultsOnExpiryStop(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{ + Provider: ProviderQEMU, + Lifetime: LifetimeConfig{MaxRun: "8h"}, + }} + c.ApplyDefaults() + if !c.Lifetime.Enabled() { + t.Fatal("lifetime should be enabled when maxRun set") + } + if c.Lifetime.OnExpiry != OnExpiryStop { + t.Fatalf("OnExpiry default: got %q want %q", c.Lifetime.OnExpiry, OnExpiryStop) + } + if err := c.Validate(); err != nil { + t.Fatalf("valid lifetime rejected: %v", err) + } +} + +func TestLifetime_RespectsExplicitOnExpiry(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{ + Provider: ProviderQEMU, + Lifetime: LifetimeConfig{MaxRun: "1h", OnExpiry: OnExpiryTeardown}, + }} + c.ApplyDefaults() + if c.Lifetime.OnExpiry != OnExpiryTeardown { + t.Fatalf("explicit OnExpiry overridden: %q", c.Lifetime.OnExpiry) + } + if err := c.Validate(); err != nil { + t.Fatalf("valid lifetime rejected: %v", err) + } +} + +func TestLifetime_MaxRunDuration(t *testing.T) { + tests := []struct { + in string + want time.Duration + ok bool + }{ + {"", 0, true}, + {"0", 0, true}, + {"8h", 8 * time.Hour, true}, + {"90m", 90 * time.Minute, true}, + {"banana", 0, false}, + } + for _, tt := range tests { + d, err := LifetimeConfig{MaxRun: tt.in}.MaxRunDuration() + if tt.ok && err != nil { + t.Errorf("MaxRunDuration(%q): unexpected error %v", tt.in, err) + continue + } + if !tt.ok && err == nil { + t.Errorf("MaxRunDuration(%q): expected error, got nil", tt.in) + continue + } + if tt.ok && d != tt.want { + t.Errorf("MaxRunDuration(%q) = %v, want %v", tt.in, d, tt.want) + } + } +} + +func TestLifetime_Validate_BadDuration(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{ + Provider: ProviderQEMU, + Lifetime: LifetimeConfig{MaxRun: "lol"}, + }} + c.ApplyDefaults() + err := c.Validate() + if err == nil || !strings.Contains(err.Error(), "maxRun") { + t.Fatalf("want maxRun parse error, got %v", err) + } +} + +func TestLifetime_Validate_NegativeDuration(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{ + Provider: ProviderQEMU, + Lifetime: LifetimeConfig{MaxRun: "-5m"}, + }} + c.ApplyDefaults() + err := c.Validate() + if err == nil || !strings.Contains(err.Error(), "positive") { + t.Fatalf("want positive-duration error, got %v", err) + } +} + +func TestLifetime_Validate_BadOnExpiry(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{ + Provider: ProviderQEMU, + Lifetime: LifetimeConfig{MaxRun: "8h", OnExpiry: "explode"}, + }} + c.ApplyDefaults() + err := c.Validate() + if err == nil || !strings.Contains(err.Error(), "onExpiry") { + t.Fatalf("want onExpiry enum error, got %v", err) + } +} diff --git a/pkg/provision/qemu/lifecycle.go b/pkg/provision/qemu/lifecycle.go index 6a721bf..fdc469e 100644 --- a/pkg/provision/qemu/lifecycle.go +++ b/pkg/provision/qemu/lifecycle.go @@ -173,6 +173,15 @@ func Start(ctx context.Context, cacheDir, name string, logger *zap.Logger) (*Clu if err := c.Kubeconfig.Import(rawKubeconfig); err != nil { return nil, fmt.Errorf("merge kubeconfig: %w", err) } + // Re-anchor the auto-expiry deadline to this start. A + // stopped-then-started cluster gets a fresh budget; this is the + // "count from when the cluster starts" guarantee (no-op when no + // lifetime is configured). + if deadline, err := armLifetime(cacheDir, name); err != nil { + logger.Warn("could not re-arm lifetime deadline on start", zap.Error(err)) + } else if !deadline.IsZero() { + logger.Info("lifetime armed", zap.Time("expiresAt", deadline)) + } logger.Info("k3s ready", zap.String("context", c.cfg.Context)) return c, nil } diff --git a/pkg/provision/qemu/lifetime.go b/pkg/provision/qemu/lifetime.go new file mode 100644 index 0000000..ba29989 --- /dev/null +++ b/pkg/provision/qemu/lifetime.go @@ -0,0 +1,51 @@ +package qemu + +import ( + "fmt" + "time" + + "go.uber.org/zap" +) + +// This file is the exported auto-expiry surface the cmd layer's +// `y-cluster lifetime` verb drives. The deadline math and sidecar +// persistence live in state.go (unexported); these wrappers keep the +// command package free of the sidecar's internals. + +// LoadLifetime returns the persisted auto-expiry state for the named +// cluster (policy + absolute deadline). A missing sidecar surfaces +// as the underlying os error so the caller can render a clear +// "qemu-only / not provisioned" message. +func LoadLifetime(cacheDir, name string) (LifetimeState, error) { + return loadLifetime(cacheDir, name) +} + +// ExtendDeadline pushes the persisted deadline out by d and returns +// the new deadline. It errors when no deadline is armed, since +// "extend" only makes sense against an existing budget. +func ExtendDeadline(cacheDir, name string, d time.Duration) (time.Time, error) { + ls, err := loadLifetime(cacheDir, name) + if err != nil { + return time.Time{}, err + } + if ls.ExpiresAt.IsZero() { + return time.Time{}, fmt.Errorf("no lifetime deadline armed for %q; nothing to extend", name) + } + nt := ls.ExpiresAt.Add(d) + if err := setExpiresAt(cacheDir, name, nt); err != nil { + return time.Time{}, err + } + return nt, nil +} + +// TeardownByName tears down a cluster identified by its cache sidecar +// rather than a freshly loaded config dir. Used by the `onExpiry: +// teardown` reap action, which only knows the cluster name + cache +// dir. keepDisk is forwarded to the provider teardown. +func TeardownByName(cacheDir, name string, keepDisk bool, logger *zap.Logger) error { + cfg, err := loadState(cacheDir, name) + if err != nil { + return fmt.Errorf("load state for teardown of %q: %w", name, err) + } + return TeardownConfig(cfg, keepDisk, logger) +} diff --git a/pkg/provision/qemu/lifetime_state_test.go b/pkg/provision/qemu/lifetime_state_test.go new file mode 100644 index 0000000..ba476ff --- /dev/null +++ b/pkg/provision/qemu/lifetime_state_test.go @@ -0,0 +1,179 @@ +package qemu + +import ( + "testing" + "time" +) + +// pinClock pins nowFunc for the duration of a test and restores it. +func pinClock(t *testing.T, at time.Time) { + t.Helper() + prev := nowFunc + nowFunc = func() time.Time { return at } + t.Cleanup(func() { nowFunc = prev }) +} + +func TestArmLifetime_Disabled(t *testing.T) { + dir := t.TempDir() + cfg := Config{Name: "c", CacheDir: dir} // no Lifetime + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + deadline, err := armLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if !deadline.IsZero() { + t.Fatalf("disabled lifetime should arm no deadline, got %v", deadline) + } + ls, err := loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if ls.Enabled() || !ls.ExpiresAt.IsZero() { + t.Fatalf("expected no lifetime state, got %+v", ls) + } +} + +func TestArmLifetime_AnchorsToNow(t *testing.T) { + dir := t.TempDir() + base := time.Date(2026, 6, 21, 12, 0, 0, 0, time.UTC) + pinClock(t, base) + + cfg := Config{Name: "c", CacheDir: dir, Lifetime: "8h", OnExpiry: "stop"} + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + deadline, err := armLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + want := base.Add(8 * time.Hour) + if !deadline.Equal(want) { + t.Fatalf("deadline = %v, want %v", deadline, want) + } + + ls, err := loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if ls.MaxRun != "8h" || ls.OnExpiry != "stop" { + t.Fatalf("policy not persisted: %+v", ls) + } + if !ls.ExpiresAt.Equal(want) { + t.Fatalf("persisted ExpiresAt = %v, want %v", ls.ExpiresAt, want) + } +} + +func TestArmLifetime_ReanchorsOnReArm(t *testing.T) { + dir := t.TempDir() + cfg := Config{Name: "c", CacheDir: dir, Lifetime: "2h", OnExpiry: "stop"} + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + + t0 := time.Date(2026, 6, 21, 10, 0, 0, 0, time.UTC) + pinClock(t, t0) + if _, err := armLifetime(dir, "c"); err != nil { + t.Fatal(err) + } + + // Simulate stop+start three hours later: re-arm gives a fresh + // window from the new "now", not from the original provision. + t1 := t0.Add(3 * time.Hour) + pinClock(t, t1) + deadline, err := armLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if want := t1.Add(2 * time.Hour); !deadline.Equal(want) { + t.Fatalf("re-armed deadline = %v, want %v", deadline, want) + } +} + +func TestLifetimeState_ExpiredAndRemaining(t *testing.T) { + dir := t.TempDir() + cfg := Config{Name: "c", CacheDir: dir, Lifetime: "1h", OnExpiry: "stop"} + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + t0 := time.Date(2026, 6, 21, 9, 0, 0, 0, time.UTC) + pinClock(t, t0) + if _, err := armLifetime(dir, "c"); err != nil { + t.Fatal(err) + } + + // 30m in: not expired, ~30m remaining. + pinClock(t, t0.Add(30*time.Minute)) + ls, err := loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if ls.Expired() { + t.Fatal("should not be expired at 30m of a 1h budget") + } + if r := ls.Remaining(); r != 30*time.Minute { + t.Fatalf("Remaining = %v, want 30m", r) + } + + // 90m in: expired, negative remaining. + pinClock(t, t0.Add(90*time.Minute)) + ls, err = loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if !ls.Expired() { + t.Fatal("should be expired at 90m of a 1h budget") + } + if r := ls.Remaining(); r >= 0 { + t.Fatalf("Remaining = %v, want negative", r) + } +} + +func TestSetExpiresAt_Extend(t *testing.T) { + dir := t.TempDir() + cfg := Config{Name: "c", CacheDir: dir, Lifetime: "1h", OnExpiry: "stop"} + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + t0 := time.Date(2026, 6, 21, 9, 0, 0, 0, time.UTC) + pinClock(t, t0) + if _, err := armLifetime(dir, "c"); err != nil { + t.Fatal(err) + } + ls, err := loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + extended := ls.ExpiresAt.Add(2 * time.Hour) + if err := setExpiresAt(dir, "c", extended); err != nil { + t.Fatal(err) + } + ls, err = loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if !ls.ExpiresAt.Equal(extended) { + t.Fatalf("extended ExpiresAt = %v, want %v", ls.ExpiresAt, extended) + } + // Extend must preserve the policy. + if ls.MaxRun != "1h" || ls.OnExpiry != "stop" { + t.Fatalf("extend clobbered policy: %+v", ls) + } +} + +// Old sidecars (no lifetime fields) decode to "no lifetime". +func TestLoadLifetime_LegacySidecar(t *testing.T) { + dir := t.TempDir() + cfg := Config{Name: "c", CacheDir: dir} // no lifetime at all + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + ls, err := loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if ls.Enabled() || !ls.ExpiresAt.IsZero() { + t.Fatalf("legacy sidecar should yield empty lifetime, got %+v", ls) + } +} diff --git a/pkg/provision/qemu/qemu.go b/pkg/provision/qemu/qemu.go index bfb58c5..c9eb93b 100644 --- a/pkg/provision/qemu/qemu.go +++ b/pkg/provision/qemu/qemu.go @@ -66,6 +66,13 @@ type Config struct { // Provision creates the file if missing; Teardown leaves it. DataDisk string DataDiskSize string + + // Lifetime/OnExpiry are the cost-control auto-expiry policy + // (Go duration string + local action). Persisted to the state + // sidecar; the absolute deadline is armed separately so it + // anchors to start, not provision. Empty Lifetime disables. + Lifetime string + OnExpiry string } // K3s carries the runtime view of K3sConfig: which version to @@ -178,6 +185,8 @@ func FromConfig(c *config.QEMUConfig) Config { Storage: c.Storage, DataDisk: dataDisk, DataDiskSize: dataDiskSize, + Lifetime: c.Lifetime.MaxRun, + OnExpiry: c.Lifetime.OnExpiry, } } @@ -349,6 +358,17 @@ func Provision(ctx context.Context, cfg Config, logger *zap.Logger) (*Cluster, e logger.Warn("could not save state sidecar (start will not work without it)", zap.Error(err)) } + // Arm the auto-expiry deadline (no-op when no lifetime budget). + // Anchored to now: the clock starts when the cluster comes up, + // which for provision is also "now" but for the appliance start + // path is the meaningful anchor (the disk may have been built + // long before this boot). + if deadline, err := armLifetime(cfg.CacheDir, cfg.Name); err != nil { + logger.Warn("could not arm lifetime deadline", zap.Error(err)) + } else if !deadline.IsZero() { + logger.Info("lifetime armed", zap.Time("expiresAt", deadline), zap.String("onExpiry", cfg.OnExpiry)) + } + // Wait for SSH if err := c.waitForSSH(ctx); err != nil { return nil, err @@ -705,11 +725,11 @@ func (c *Cluster) DiskPath() string { // inputPath: // // - .vmdk -> qemu-img convert -f vmdk -O qcow2 (the original -// VMware-import path; vmdk subformat doesn't matter -// because qemu-img -f vmdk auto-detects the variant). +// VMware-import path; vmdk subformat doesn't matter +// because qemu-img -f vmdk auto-detects the variant). // - .qcow2 -> qemu-img convert -f qcow2 -O qcow2 (rewrites the -// qcow2 into the cache layout; usually a quick -// copy + compaction, no format change). +// qcow2 into the cache layout; usually a quick +// copy + compaction, no format change). // // A local-qemu e2e loop that does `y-cluster export // --format=qcow2 ... | y-cluster import` doesn't need any diff --git a/pkg/provision/qemu/state.go b/pkg/provision/qemu/state.go index 784c166..4303b08 100644 --- a/pkg/provision/qemu/state.go +++ b/pkg/provision/qemu/state.go @@ -5,8 +5,13 @@ import ( "fmt" "os" "path/filepath" + "time" ) +// nowFunc is the wall clock used for lifetime deadline math. A +// package var so tests can pin it; production uses time.Now. +var nowFunc = time.Now + // stateVersion guards forward-compat: a newer y-cluster reading an // older sidecar bails out with a clear error rather than guessing. // Bump when the schema changes incompatibly. @@ -33,6 +38,19 @@ type savedState struct { Context string `json:"context"` CacheDir string `json:"cacheDir"` K3s K3s `json:"k3s"` + + // Lifetime fields are additive (added after stateVersion 1) + // and all omitempty, so an old sidecar without them decodes + // cleanly to "no lifetime" and an old binary ignores them. + // They are NOT a reason to bump stateVersion. + // + // Lifetime/OnExpiry are the policy copied from config at + // provision; ExpiresAt is the absolute deadline, anchored to + // the most recent provision/start (not to provision alone -- + // an appliance disk may boot long after it was built). + Lifetime string `json:"lifetime,omitempty"` + OnExpiry string `json:"onExpiry,omitempty"` + ExpiresAt string `json:"expiresAt,omitempty"` // RFC3339; empty = no deadline } // statePath returns the sidecar path for (cacheDir, name). @@ -41,8 +59,10 @@ func statePath(cacheDir, name string) string { } // saveState writes the launch-relevant subset of cfg to the -// sidecar. Atomic via a .tmp+rename so a crash mid-write doesn't -// leave a half-written file Start would later fail to parse. +// sidecar, including the lifetime policy (Lifetime/OnExpiry). The +// ExpiresAt deadline is NOT set here -- it is armed separately by +// armLifetime so the deadline anchors to start, and so `extend` can +// move it without rewriting launch state. Atomic via .tmp+rename. func saveState(cfg Config) error { s := savedState{ Version: stateVersion, @@ -55,12 +75,21 @@ func saveState(cfg Config) error { Context: cfg.Context, CacheDir: cfg.CacheDir, K3s: cfg.K3s, + Lifetime: cfg.Lifetime, + OnExpiry: cfg.OnExpiry, } + return writeSidecar(cfg.CacheDir, cfg.Name, s) +} + +// writeSidecar atomically marshals s to /.json via +// a .tmp+rename so a crash mid-write doesn't leave a half-written +// file Start would later fail to parse. +func writeSidecar(cacheDir, name string, s savedState) error { data, err := json.MarshalIndent(s, "", " ") if err != nil { return err } - path := statePath(cfg.CacheDir, cfg.Name) + path := statePath(cacheDir, name) tmp := path + ".tmp" if err := os.WriteFile(tmp, data, 0o644); err != nil { return fmt.Errorf("write %s: %w", tmp, err) @@ -72,25 +101,35 @@ func saveState(cfg Config) error { return nil } -// loadState reads /.json and rehydrates a runtime -// Config. Kubeconfig is re-resolved from $KUBECONFIG at call time -// rather than persisted -- it's an environmental concern that -// shouldn't bake into the sidecar. -func loadState(cacheDir, name string) (Config, error) { +// readSidecar reads and version-checks the raw sidecar. Shared by +// loadState and the lifetime helpers. +func readSidecar(cacheDir, name string) (savedState, error) { path := statePath(cacheDir, name) data, err := os.ReadFile(path) if err != nil { - return Config{}, err + return savedState{}, err } var s savedState if err := json.Unmarshal(data, &s); err != nil { - return Config{}, fmt.Errorf("parse %s: %w", path, err) + return savedState{}, fmt.Errorf("parse %s: %w", path, err) } if s.Version != stateVersion { - return Config{}, fmt.Errorf( + return savedState{}, fmt.Errorf( "%s: unsupported state version %d (want %d); re-provision to refresh", path, s.Version, stateVersion) } + return s, nil +} + +// loadState reads /.json and rehydrates a runtime +// Config. Kubeconfig is re-resolved from $KUBECONFIG at call time +// rather than persisted -- it's an environmental concern that +// shouldn't bake into the sidecar. +func loadState(cacheDir, name string) (Config, error) { + s, err := readSidecar(cacheDir, name) + if err != nil { + return Config{}, err + } return Config{ Name: s.Name, DiskSize: s.DiskSize, @@ -102,9 +141,91 @@ func loadState(cacheDir, name string) (Config, error) { CacheDir: s.CacheDir, Kubeconfig: os.Getenv("KUBECONFIG"), K3s: s.K3s, + Lifetime: s.Lifetime, + OnExpiry: s.OnExpiry, }, nil } +// LifetimeState is the persisted auto-expiry view of a cluster. +type LifetimeState struct { + // MaxRun is the configured budget (Go duration string), empty + // when no lifetime is set. + MaxRun string + // OnExpiry is the local action at the deadline. + OnExpiry string + // ExpiresAt is the absolute deadline; zero when unset. + ExpiresAt time.Time +} + +// Enabled reports whether a budget is configured. +func (l LifetimeState) Enabled() bool { return l.MaxRun != "" && l.MaxRun != "0" } + +// Remaining is the time left until the deadline; negative when past +// due, zero when no deadline is armed. +func (l LifetimeState) Remaining() time.Duration { + if l.ExpiresAt.IsZero() { + return 0 + } + return l.ExpiresAt.Sub(nowFunc()) +} + +// Expired reports whether an armed deadline is at or past now. +func (l LifetimeState) Expired() bool { + return !l.ExpiresAt.IsZero() && !nowFunc().Before(l.ExpiresAt) +} + +// loadLifetime returns the persisted lifetime policy + deadline. +func loadLifetime(cacheDir, name string) (LifetimeState, error) { + s, err := readSidecar(cacheDir, name) + if err != nil { + return LifetimeState{}, err + } + ls := LifetimeState{MaxRun: s.Lifetime, OnExpiry: s.OnExpiry} + if s.ExpiresAt != "" { + t, err := time.Parse(time.RFC3339, s.ExpiresAt) + if err != nil { + return LifetimeState{}, fmt.Errorf("parse expiresAt %q: %w", s.ExpiresAt, err) + } + ls.ExpiresAt = t + } + return ls, nil +} + +// armLifetime sets ExpiresAt = now + MaxRun, anchoring the deadline +// to the current moment (provision or start). It is a no-op that +// returns a zero deadline when the cluster has no lifetime budget. +// Returns the new deadline so callers can schedule a host timer. +func armLifetime(cacheDir, name string) (time.Time, error) { + s, err := readSidecar(cacheDir, name) + if err != nil { + return time.Time{}, err + } + if s.Lifetime == "" || s.Lifetime == "0" { + return time.Time{}, nil + } + d, err := time.ParseDuration(s.Lifetime) + if err != nil { + return time.Time{}, fmt.Errorf("parse lifetime %q: %w", s.Lifetime, err) + } + deadline := nowFunc().Add(d) + s.ExpiresAt = deadline.Format(time.RFC3339) + if err := writeSidecar(cacheDir, name, s); err != nil { + return time.Time{}, err + } + return deadline, nil +} + +// setExpiresAt persists an explicit deadline, used by `extend` to +// push the deadline out without re-anchoring to now. +func setExpiresAt(cacheDir, name string, t time.Time) error { + s, err := readSidecar(cacheDir, name) + if err != nil { + return err + } + s.ExpiresAt = t.Format(time.RFC3339) + return writeSidecar(cacheDir, name, s) +} + // removeState deletes the sidecar and ignores "not present" // errors so teardown is idempotent. func removeState(cacheDir, name string) error { diff --git a/pkg/provision/schema/common.schema.json b/pkg/provision/schema/common.schema.json index 8b8d0f2..0474a72 100644 --- a/pkg/provision/schema/common.schema.json +++ b/pkg/provision/schema/common.schema.json @@ -21,6 +21,10 @@ "$ref": "#/$defs/K3sConfig", "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml." }, + "lifetime": { + "$ref": "#/$defs/LifetimeConfig", + "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables." + }, "memory": { "default": "8192", "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.", @@ -111,6 +115,26 @@ }, "type": "object" }, + "LifetimeConfig": { + "additionalProperties": false, + "properties": { + "maxRun": { + "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.", + "type": "string" + }, + "onExpiry": { + "default": "stop", + "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).", + "enum": [ + "stop", + "pause", + "teardown" + ], + "type": "string" + } + }, + "type": "object" + }, "PortForward": { "additionalProperties": false, "properties": { diff --git a/pkg/provision/schema/docker.schema.json b/pkg/provision/schema/docker.schema.json index 796b644..ceee6c1 100644 --- a/pkg/provision/schema/docker.schema.json +++ b/pkg/provision/schema/docker.schema.json @@ -21,6 +21,10 @@ "$ref": "#/$defs/K3sConfig", "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml." }, + "lifetime": { + "$ref": "#/$defs/LifetimeConfig", + "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables." + }, "memory": { "default": "8192", "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.", @@ -110,6 +114,26 @@ }, "type": "object" }, + "LifetimeConfig": { + "additionalProperties": false, + "properties": { + "maxRun": { + "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.", + "type": "string" + }, + "onExpiry": { + "default": "stop", + "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).", + "enum": [ + "stop", + "pause", + "teardown" + ], + "type": "string" + } + }, + "type": "object" + }, "PortForward": { "additionalProperties": false, "properties": { diff --git a/pkg/provision/schema/multipass.schema.json b/pkg/provision/schema/multipass.schema.json index df86eba..ee1336b 100644 --- a/pkg/provision/schema/multipass.schema.json +++ b/pkg/provision/schema/multipass.schema.json @@ -53,6 +53,26 @@ }, "type": "object" }, + "LifetimeConfig": { + "additionalProperties": false, + "properties": { + "maxRun": { + "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.", + "type": "string" + }, + "onExpiry": { + "default": "stop", + "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).", + "enum": [ + "stop", + "pause", + "teardown" + ], + "type": "string" + } + }, + "type": "object" + }, "MultipassConfig": { "additionalProperties": false, "properties": { @@ -79,6 +99,10 @@ "$ref": "#/$defs/K3sConfig", "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml." }, + "lifetime": { + "$ref": "#/$defs/LifetimeConfig", + "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables." + }, "memory": { "default": "8192", "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.", diff --git a/pkg/provision/schema/qemu.schema.json b/pkg/provision/schema/qemu.schema.json index 9decf95..f76e77f 100644 --- a/pkg/provision/schema/qemu.schema.json +++ b/pkg/provision/schema/qemu.schema.json @@ -53,6 +53,26 @@ }, "type": "object" }, + "LifetimeConfig": { + "additionalProperties": false, + "properties": { + "maxRun": { + "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.", + "type": "string" + }, + "onExpiry": { + "default": "stop", + "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).", + "enum": [ + "stop", + "pause", + "teardown" + ], + "type": "string" + } + }, + "type": "object" + }, "PortForward": { "additionalProperties": false, "properties": { @@ -109,6 +129,10 @@ "$ref": "#/$defs/K3sConfig", "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml." }, + "lifetime": { + "$ref": "#/$defs/LifetimeConfig", + "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables." + }, "memory": { "default": "8192", "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.", From bfb0fb60f0eb03e51672755b12987f47fb3139cb Mon Sep 17 00:00:00 2001 From: Yolean k8s-qa Date: Mon, 22 Jun 2026 06:38:39 +0000 Subject: [PATCH 2/3] fix(libguestfs): one durable remedy for unreadable host kernel libguestfs (virt-customize / virt-sysprep / virt-tar-out / virt-format) builds a supermin appliance from /boot/vmlinuz-$(uname -r). Ubuntu ships those images mode 0600, so after every kernel upgrade a fresh root-only image lands and the tools fail with the opaque "supermin exited with error status 1". Downstream users were left rediscovering workarounds, and the ones they found do not hold: a one-off `chmod` is lost on the next upgrade, and `dpkg-statoverride` is pinned to one versioned path (and easy to typo, e.g. vmlinux vs vmlinuz) so the next kernel arrives 0600 again. Standardize on one durable remedy and surface it from both places the failure can appear: - pkg/provision/qemu/libguestfs.go: requireReadableHostKernel() checks the running kernel image is readable before the libguestfs call sites (prepare-export's virt-customize/virt-tar-out; the data-disk virt-format preflight) and, if not, returns an actionable error whose fix is a /etc/kernel/postinst.d hook that re-applies 0644 on every future kernel. The binary previously had NO check here. - scripts/_check-host-kernel.sh: one sourced helper carrying the same message, replacing four divergent copy-pasted blocks (which recommended the fragile per-version dpkg-statoverride) across the appliance build / e2e scripts. Kept in sync with the Go message. Detect-and-print only: y-cluster never modifies the host. The remedy is a single copy-pasteable block the user runs once with sudo; the hook then covers all future kernels. Co-Authored-By: Claude Opus 4.8 (1M context) --- pkg/provision/qemu/data_disk.go | 5 ++ pkg/provision/qemu/libguestfs.go | 85 +++++++++++++++++++++++++++ pkg/provision/qemu/libguestfs_test.go | 63 ++++++++++++++++++++ pkg/provision/qemu/prepare_export.go | 6 ++ scripts/_check-host-kernel.sh | 37 ++++++++++++ 5 files changed, 196 insertions(+) create mode 100644 pkg/provision/qemu/libguestfs.go create mode 100644 pkg/provision/qemu/libguestfs_test.go create mode 100644 scripts/_check-host-kernel.sh diff --git a/pkg/provision/qemu/data_disk.go b/pkg/provision/qemu/data_disk.go index c67e096..10e31d7 100644 --- a/pkg/provision/qemu/data_disk.go +++ b/pkg/provision/qemu/data_disk.go @@ -90,5 +90,10 @@ func checkDataDiskTools(path string) error { "DataDisk %s does not exist and virt-format is not on PATH; "+ "install libguestfs-tools to let y-cluster create labeled data disks", path) } + // virt-format builds a supermin appliance from the host kernel; + // bail early with a durable fix if it isn't readable. + if err := requireReadableHostKernel(); err != nil { + return err + } return nil } diff --git a/pkg/provision/qemu/libguestfs.go b/pkg/provision/qemu/libguestfs.go new file mode 100644 index 0000000..32c3b52 --- /dev/null +++ b/pkg/provision/qemu/libguestfs.go @@ -0,0 +1,85 @@ +package qemu + +import ( + "fmt" + "os" + "strings" +) + +// kernelReadableHookName is the /etc/kernel/postinst.d/ filename the +// remediation suggests. Sorted late (zz-) so it runs after the +// distro hooks that lay the image down. +const kernelReadableHookName = "zz-vmlinuz-readable" + +// runningKernelRelease returns the running kernel's release string +// (the `uname -r` value) from /proc, and whether it could be read. +// Used to locate the host kernel image libguestfs needs. +func runningKernelRelease() (string, bool) { + b, err := os.ReadFile("/proc/sys/kernel/osrelease") + if err != nil { + return "", false + } + rel := strings.TrimSpace(string(b)) + if rel == "" { + return "", false + } + return rel, true +} + +// requireReadableHostKernel verifies the running kernel image is +// readable by the current process. libguestfs builds a supermin +// appliance from the host kernel, so virt-customize / virt-sysprep / +// virt-tar-out / virt-format all fail with the opaque "supermin +// exited with error status 1" when /boot/vmlinuz- is not +// readable. Ubuntu ships those images mode 0600, and a fresh 0600 +// image lands on every kernel upgrade -- which is why per-version +// chmod / dpkg-statoverride does not hold. The error surfaces a +// durable, copy-pasteable fix (a kernel postinst.d hook) so a +// downstream user fixes it once instead of rediscovering a +// workaround after every upgrade. +// +// Returns nil when the image is readable, when its path can't be +// determined, when it isn't found (we can't assert it's the +// blocker), or on a non-permission error -- in those cases we let +// libguestfs run and surface its own diagnostics rather than block +// on a false positive. +func requireReadableHostKernel() error { + rel, ok := runningKernelRelease() + if !ok { + return nil + } + return checkKernelReadable("/boot/vmlinuz-" + rel) +} + +// checkKernelReadable is the path-parameterized core of +// requireReadableHostKernel, split out so it can be tested against a +// temp file without depending on the host's real /boot. +func checkKernelReadable(path string) error { + f, err := os.Open(path) + if err == nil { + _ = f.Close() + return nil + } + if !os.IsPermission(err) { + return nil + } + return fmt.Errorf(`host kernel %s is not readable by this user, so libguestfs +(virt-customize / virt-sysprep / virt-tar-out / virt-format) will fail +building its supermin appliance with "supermin exited with error status 1". + +Ubuntu ships /boot/vmlinuz-* mode 0600, and a fresh 0600 image lands on +every kernel upgrade -- a one-off chmod or a per-version dpkg-statoverride +does not survive that. Install a kernel hook once so current and future +kernels stay readable (this makes vmlinuz world-readable): + + sudo tee /etc/kernel/postinst.d/%s >/dev/null <<'HOOK' +#!/bin/sh +# Keep installed kernels readable for libguestfs/supermin. +v="$1"; [ -n "$v" ] && [ -e "/boot/vmlinuz-$v" ] && chmod 0644 "/boot/vmlinuz-$v" +HOOK + sudo chmod 0755 /etc/kernel/postinst.d/%s + sudo chmod 0644 /boot/vmlinuz-* + +The hook re-applies on every future kernel; the chmod fixes the ones +already installed.`, path, kernelReadableHookName, kernelReadableHookName) +} diff --git a/pkg/provision/qemu/libguestfs_test.go b/pkg/provision/qemu/libguestfs_test.go new file mode 100644 index 0000000..772f261 --- /dev/null +++ b/pkg/provision/qemu/libguestfs_test.go @@ -0,0 +1,63 @@ +package qemu + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestCheckKernelReadable_Readable(t *testing.T) { + dir := t.TempDir() + p := filepath.Join(dir, "vmlinuz-test") + if err := os.WriteFile(p, []byte("kernel"), 0o644); err != nil { + t.Fatal(err) + } + if err := checkKernelReadable(p); err != nil { + t.Fatalf("readable kernel should pass, got: %v", err) + } +} + +func TestCheckKernelReadable_Missing(t *testing.T) { + // A missing image is not treated as the blocker -- we let + // libguestfs surface its own error rather than false-positive. + if err := checkKernelReadable(filepath.Join(t.TempDir(), "nope")); err != nil { + t.Fatalf("missing kernel should not block, got: %v", err) + } +} + +func TestCheckKernelReadable_PermissionDenied(t *testing.T) { + if os.Geteuid() == 0 { + t.Skip("root bypasses DAC; permission-denied path is unobservable as root") + } + dir := t.TempDir() + p := filepath.Join(dir, "vmlinuz-locked") + if err := os.WriteFile(p, []byte("kernel"), 0o600); err != nil { + t.Fatal(err) + } + if err := os.Chmod(p, 0o000); err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = os.Chmod(p, 0o644) }) + + err := checkKernelReadable(p) + if err == nil { + t.Fatal("unreadable kernel should return an actionable error") + } + msg := err.Error() + // The remediation must point at the durable postinst.d hook. + for _, want := range []string{p, "/etc/kernel/postinst.d/", "supermin", "chmod 0644 /boot/vmlinuz-*"} { + if !strings.Contains(msg, want) { + t.Errorf("error message missing %q; got:\n%s", want, msg) + } + } +} + +func TestRunningKernelRelease(t *testing.T) { + // On the Linux CI/dev host this should resolve; if /proc is + // unavailable the function returns ok=false and callers no-op. + rel, ok := runningKernelRelease() + if ok && strings.TrimSpace(rel) == "" { + t.Fatal("ok=true but release is empty") + } +} diff --git a/pkg/provision/qemu/prepare_export.go b/pkg/provision/qemu/prepare_export.go index 6d57ac9..5c22584 100644 --- a/pkg/provision/qemu/prepare_export.go +++ b/pkg/provision/qemu/prepare_export.go @@ -78,6 +78,12 @@ func PrepareExport(ctx context.Context, cacheDir, name string, logger *zap.Logge if _, err := exec.LookPath("virt-customize"); err != nil { return fmt.Errorf("virt-customize not found in PATH; install with: sudo apt install libguestfs-tools") } + // virt-customize (offline phase) and virt-tar-out (seed snapshot) + // both build a supermin appliance from the host kernel; bail early + // with a durable fix if it isn't readable. + if err := requireReadableHostKernel(); err != nil { + return err + } if _, err := exec.LookPath("kubectl"); err != nil { return fmt.Errorf("kubectl not found in PATH; install kubectl (prepare-export now snapshots reconciled Gateway state, which needs kubectl)") } diff --git a/scripts/_check-host-kernel.sh b/scripts/_check-host-kernel.sh new file mode 100644 index 0000000..a9567f8 --- /dev/null +++ b/scripts/_check-host-kernel.sh @@ -0,0 +1,37 @@ +# shellcheck shell=bash +# Sourced by the appliance build / e2e scripts before any libguestfs +# (virt-sysprep / virt-customize) work, to fail fast with a DURABLE +# fix when the running kernel image is not readable. libguestfs builds +# a supermin appliance from the host kernel, and Ubuntu ships +# /boot/vmlinuz-* mode 0600, so a fresh 0600 image lands on every +# kernel upgrade. This message is kept in sync with +# requireReadableHostKernel() in pkg/provision/qemu/libguestfs.go (the +# binary enforces the same check at its libguestfs call sites). +__krel="$(uname -r)" +if ! [ -r "/boot/vmlinuz-$__krel" ]; then + { + echo "host kernel /boot/vmlinuz-$__krel is not readable by this user, so" + echo "libguestfs (virt-customize / virt-sysprep) will fail building its" + echo 'supermin appliance with "supermin exited with error status 1".' + cat <<'EOM' + +Ubuntu ships /boot/vmlinuz-* mode 0600, and a fresh 0600 image lands on +every kernel upgrade -- a one-off chmod or a per-version dpkg-statoverride +does not survive that. Install a kernel hook once so current and future +kernels stay readable (this makes vmlinuz world-readable): + + sudo tee /etc/kernel/postinst.d/zz-vmlinuz-readable >/dev/null <<'HOOK' +#!/bin/sh +# Keep installed kernels readable for libguestfs/supermin. +v="$1"; [ -n "$v" ] && [ -e "/boot/vmlinuz-$v" ] && chmod 0644 "/boot/vmlinuz-$v" +HOOK + sudo chmod 0755 /etc/kernel/postinst.d/zz-vmlinuz-readable + sudo chmod 0644 /boot/vmlinuz-* + +The hook re-applies on every future kernel; the chmod fixes the ones +already installed. +EOM + } >&2 + exit 1 +fi +unset __krel From d8f764bb43d5e5610b06e2cf215a388259622596 Mon Sep 17 00:00:00 2001 From: Yolean k8s-qa Date: Mon, 22 Jun 2026 11:26:52 +0000 Subject: [PATCH 3/3] fix(ci): lint + the GHA failure with the libguestfs setup helper Surfaced when PR #29 rebased this work onto main and CI ran on it for the first time. Lint (staticcheck): - ST1005 (pkg/provision/qemu/libguestfs.go): the remediation error string ended with a period. Drop it; the multi-line body is unchanged (only a trailing newline/punctuation trips ST1005). - SA4006 (pkg/lifetime/timer.go): Disarm assigned logger but never used it. Log the disarm at debug. Test (prepare-export ordering): - TestPrepareExport_NoSavedState/VMNotRunning failed on the CI runner because its kernel image is mode 0600. requireReadableHostKernel ran BEFORE the cheap correctness preconditions, so an unreadable kernel masked the actionable "run provision" / "start the cluster" errors. Move the kernel capability check to after loadState/IsRunning/disk and before the live phase mutates anything. requireReadableHostKernel is now a var so a host-independent regression test can force it to fail and assert the precondition error still wins (this gap was invisible on dev hosts whose running kernel happens to be readable). scripts/_check-host-kernel.sh is kept though nothing on this main-based branch sources it yet: the appliance workflow branch (which carries the appliance-*.sh scripts that source it) will be rebased onto this. Co-Authored-By: Claude Opus 4.8 (1M context) --- pkg/lifetime/timer.go | 1 + pkg/provision/qemu/libguestfs.go | 9 ++++++-- pkg/provision/qemu/prepare_export.go | 15 ++++++++----- pkg/provision/qemu/prepare_export_test.go | 27 +++++++++++++++++++++++ 4 files changed, 44 insertions(+), 8 deletions(-) diff --git a/pkg/lifetime/timer.go b/pkg/lifetime/timer.go index 9a8dd41..786d3ce 100644 --- a/pkg/lifetime/timer.go +++ b/pkg/lifetime/timer.go @@ -142,6 +142,7 @@ func Disarm(kubeContext string, logger *zap.Logger) error { if logger == nil { logger = zap.NewNop() } + logger.Debug("disarming lifetime host timer", zap.String("context", kubeContext)) if _, err := exec.LookPath("systemctl"); err == nil { // Stopping a transient timer unit also cleans it up. _ = exec.Command("systemctl", "--user", "stop", unitName(kubeContext)+".timer").Run() diff --git a/pkg/provision/qemu/libguestfs.go b/pkg/provision/qemu/libguestfs.go index 32c3b52..bef2126 100644 --- a/pkg/provision/qemu/libguestfs.go +++ b/pkg/provision/qemu/libguestfs.go @@ -43,7 +43,12 @@ func runningKernelRelease() (string, bool) { // blocker), or on a non-permission error -- in those cases we let // libguestfs run and surface its own diagnostics rather than block // on a false positive. -func requireReadableHostKernel() error { +// +// A var (not a plain func) so a test can force it to fail and assert +// that a call site checks it only after its cheap correctness +// preconditions -- otherwise an unreadable kernel on the build host +// masks the actionable error (it did, on the CI runner). +var requireReadableHostKernel = func() error { rel, ok := runningKernelRelease() if !ok { return nil @@ -81,5 +86,5 @@ HOOK sudo chmod 0644 /boot/vmlinuz-* The hook re-applies on every future kernel; the chmod fixes the ones -already installed.`, path, kernelReadableHookName, kernelReadableHookName) +already installed`, path, kernelReadableHookName, kernelReadableHookName) } diff --git a/pkg/provision/qemu/prepare_export.go b/pkg/provision/qemu/prepare_export.go index 5c22584..a375c3f 100644 --- a/pkg/provision/qemu/prepare_export.go +++ b/pkg/provision/qemu/prepare_export.go @@ -78,12 +78,6 @@ func PrepareExport(ctx context.Context, cacheDir, name string, logger *zap.Logge if _, err := exec.LookPath("virt-customize"); err != nil { return fmt.Errorf("virt-customize not found in PATH; install with: sudo apt install libguestfs-tools") } - // virt-customize (offline phase) and virt-tar-out (seed snapshot) - // both build a supermin appliance from the host kernel; bail early - // with a durable fix if it isn't readable. - if err := requireReadableHostKernel(); err != nil { - return err - } if _, err := exec.LookPath("kubectl"); err != nil { return fmt.Errorf("kubectl not found in PATH; install kubectl (prepare-export now snapshots reconciled Gateway state, which needs kubectl)") } @@ -103,6 +97,15 @@ func PrepareExport(ctx context.Context, cacheDir, name string, logger *zap.Logge return fmt.Errorf("disk image not found at %s: %w", diskPath, err) } + // The offline phase runs virt-customize + virt-tar-out, which + // build a supermin appliance from the host kernel. Gate that here, + // AFTER the cheap correctness preconditions (so a missing/stopped + // cluster reports its own actionable error) but BEFORE the live + // phase mutates anything, so an unreadable kernel fails clean. + if err := requireReadableHostKernel(); err != nil { + return err + } + // --- LIVE phase --- // Clear the per-deploy dns-hint-ip annotation so the snapshot // doesn't ship our LB IP. Then dump reconciled gateway state diff --git a/pkg/provision/qemu/prepare_export_test.go b/pkg/provision/qemu/prepare_export_test.go index 8b987e4..f59b4ad 100644 --- a/pkg/provision/qemu/prepare_export_test.go +++ b/pkg/provision/qemu/prepare_export_test.go @@ -2,6 +2,7 @@ package qemu import ( "context" + "errors" "os" "path/filepath" "runtime" @@ -310,3 +311,29 @@ func TestPrepareExport_MissingVirtCustomize(t *testing.T) { t.Errorf("error should hint at apt install libguestfs-tools: %v", err) } } + +// TestPrepareExport_PreconditionsBeforeKernelCheck guards the +// ordering that broke CI: an unreadable host kernel (libguestfs +// capability) must not mask a cheap correctness precondition like +// "no saved state". On the CI runner the kernel image was 0600, so +// the kernel check -- placed too early -- returned its own error +// instead of the actionable "run provision" hint. Force the kernel +// check to fail regardless of host state and assert the precondition +// still wins. +func TestPrepareExport_PreconditionsBeforeKernelCheck(t *testing.T) { + stubPrepareExportTools(t) + orig := requireReadableHostKernel + requireReadableHostKernel = func() error { return errors.New("host kernel unreadable (forced)") } + t.Cleanup(func() { requireReadableHostKernel = orig }) + + err := PrepareExport(context.Background(), t.TempDir(), "missing", nil) + if err == nil { + t.Fatal("expected error when no saved state exists") + } + if !strings.Contains(err.Error(), "y-cluster provision") { + t.Errorf("no-saved-state precondition must win over the kernel check: %v", err) + } + if strings.Contains(err.Error(), "unreadable") { + t.Errorf("kernel check fired before the precondition: %v", err) + } +}