diff --git a/README.md b/README.md index 5d94e40..397b6fc 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,13 @@ Subcommand groups: and arbitrary user-built images. - **cache info / purge** — inspect or wipe y-cluster's shared download cache (k3s airgap bundles, image OCI layouts). +- **lifetime status / reap / extend / arm / disarm / gcp-flags** — + cost-control auto-expiry. A `lifetime.maxRun` in the config gives + the cluster a wall-clock budget counted from when it starts; on + expiry a local cluster runs its `onExpiry` action (stop by + default) via a host timer, and a GCP appliance is deleted by GCP + itself (`gcp-flags` emits the `--max-run-duration` flags). See the + "lifetime" idea below. - **serve / serve ensure / serve stop / serve logs** — a lightweight HTTP server that exposes config assets to the cluster: kustomize-built Secrets named @@ -42,7 +49,20 @@ context. The README is intentionally short — when something is discoverable from `y-cluster --help`, that's where it lives. -## Two ideas worth knowing before you start +## Three ideas worth knowing before you start + +**lifetime: the budget is counted from start, and the trigger lives +where the cost is.** `lifetime.maxRun` is a wall-clock budget that +begins when the cluster *starts* (re-anchored on every `y-cluster +start`), not when it was provisioned — an appliance disk may boot +days after it was built. Locally the host *is* the cost, so a host +timer fires `y-cluster lifetime reap`, which stops the cluster (or +the configured `onExpiry` action). On a GCP appliance the host +mustn't be the trigger (it may be offline), so `lifetime gcp-flags` +hands the duration to GCP's native `--max-run-duration`, and GCP +deletes the instance on its own — the attached data disk survives. +`reap` re-checks the persisted deadline and re-arms if it isn't due, +so `lifetime extend 2h` is safe and a stale timer is harmless. **yconverge: ordering vs checks come from different places.** CUE imports in `yconverge.cue` declare ordering — each import is diff --git a/cmd/y-cluster/lifecycle.go b/cmd/y-cluster/lifecycle.go index 736aa40..31f7115 100644 --- a/cmd/y-cluster/lifecycle.go +++ b/cmd/y-cluster/lifecycle.go @@ -55,6 +55,9 @@ func stopCmd() *cobra.Command { } switch lr.Backend { case cluster.BackendQEMU: + // A manual stop ends this run's budget; remove the + // host expiry timer. `start` re-arms a fresh window. + disarmHostTimer(contextName, logger) return qemu.Stop(qemuCacheDir(), lr.ClusterName, logger) case cluster.BackendDocker: return docker.Stop(ctx, lr.ClusterName, logger) @@ -240,6 +243,9 @@ func startCmd() *cobra.Command { if err != nil { return err } + // Start re-anchored the deadline to now; install the host + // timer for the fresh window. + armHostTimerIfLifetime(qemuCacheDir(), clusterName, contextName, logger) logger.Info("cluster started", zap.String("context", c.Context()), ) diff --git a/cmd/y-cluster/lifetime.go b/cmd/y-cluster/lifetime.go new file mode 100644 index 0000000..dd6331c --- /dev/null +++ b/cmd/y-cluster/lifetime.go @@ -0,0 +1,332 @@ +package main + +import ( + "errors" + "fmt" + "os" + "time" + + "github.com/spf13/cobra" + "go.uber.org/zap" + + "github.com/Yolean/y-cluster/pkg/cluster" + "github.com/Yolean/y-cluster/pkg/lifetime" + "github.com/Yolean/y-cluster/pkg/provision/config" + "github.com/Yolean/y-cluster/pkg/provision/qemu" +) + +// lifetimeCmd is the cost-control auto-expiry surface. A dev cluster +// left running after a task is paused or finished is pure cost; a +// lifetime budget makes the cluster expire on its own. +// +// Two enforcement paths, picked by where the cost lives: +// - LOCAL (qemu): a host-side timer (status/reap/extend/arm/disarm +// below) runs the onExpiry action. The host is the cost, so a +// host timer is the right trigger. +// - CLOUD (GCP appliance): `gcp-flags` emits gcloud scheduling +// flags so GCP itself deletes the instance at the deadline -- no +// host or cluster dependency. This is the only correct trigger +// for a paid cloud resource. +// +// The local subcommands are qemu-only today, matching the rest of +// the lifecycle surface; the qemu state sidecar is where the +// deadline lives. +func lifetimeCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "lifetime", + Short: "Cost-control auto-expiry: stop/decommission a cluster that is no longer needed", + } + cmd.AddCommand( + lifetimeStatusCmd(), + lifetimeReapCmd(), + lifetimeExtendCmd(), + lifetimeArmCmd(), + lifetimeDisarmCmd(), + lifetimeGCPFlagsCmd(), + ) + return cmd +} + +// armHostTimerIfLifetime installs the host-side reap timer when the +// qemu cluster has an armed deadline. Called from provision/start +// after the deadline has been (re)anchored. Best-effort: a failure +// to arm is logged, not fatal -- the deadline is persisted and +// `y-cluster lifetime reap` (by hand or from any scheduler) remains +// the backstop. No-op when no lifetime is configured. +func armHostTimerIfLifetime(cacheDir, name, contextName string, logger *zap.Logger) { + ls, err := qemu.LoadLifetime(cacheDir, name) + if err != nil || !ls.Enabled() || ls.ExpiresAt.IsZero() { + return + } + bin, err := os.Executable() + if err != nil { + logger.Warn("could not resolve binary path to arm lifetime timer", zap.Error(err)) + return + } + if err := lifetime.Arm(bin, contextName, ls.Remaining(), logger); err != nil { + logger.Warn("could not arm lifetime host timer", zap.Error(err)) + } +} + +// disarmHostTimer removes the host-side reap timer for a context. +// Called from stop/teardown. Best-effort by design. +func disarmHostTimer(contextName string, logger *zap.Logger) { + _ = lifetime.Disarm(contextName, logger) +} + +// resolveQemuCluster maps a kubeconfig context to the qemu cache dir +// + cluster name that the lifetime sidecar is keyed on. Works for a +// stopped cluster too (the context survives in kubeconfig), unlike +// cluster.Lookup which needs a running runtime. +func resolveQemuCluster(contextName string) (cacheDir, name string, err error) { + name, err = cluster.ResolveClusterName("", contextName) + if err != nil { + return "", "", err + } + if name == "" { + return "", "", fmt.Errorf("kubeconfig context %q has no associated cluster", contextName) + } + return qemuCacheDir(), name, nil +} + +// lifetimeStateErr renders the missing-sidecar case as a clear +// qemu-only message rather than a raw "no such file" error. +func lifetimeStateErr(name string, err error) error { + if errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("no lifetime state for cluster %q; lifetime is implemented for the qemu provider only", name) + } + return err +} + +func lifetimeStatusCmd() *cobra.Command { + var contextName string + cmd := &cobra.Command{ + Use: "status", + Short: "Show the cluster's lifetime policy and time remaining", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + cacheDir, name, err := resolveQemuCluster(contextName) + if err != nil { + return err + } + ls, err := qemu.LoadLifetime(cacheDir, name) + if err != nil { + return lifetimeStateErr(name, err) + } + out := cmd.OutOrStdout() + if !ls.Enabled() { + fmt.Fprintf(out, "lifetime: disabled (no maxRun) for %q\n", name) + return nil + } + fmt.Fprintf(out, "cluster: %s\n", name) + fmt.Fprintf(out, "maxRun: %s\n", ls.MaxRun) + fmt.Fprintf(out, "onExpiry: %s\n", ls.OnExpiry) + if ls.ExpiresAt.IsZero() { + fmt.Fprintln(out, "expiresAt: (not armed; run `y-cluster start` or `y-cluster lifetime arm`)") + return nil + } + fmt.Fprintf(out, "expiresAt: %s\n", ls.ExpiresAt.Format(time.RFC3339)) + rem := ls.Remaining().Round(time.Second) + if rem < 0 { + fmt.Fprintf(out, "remaining: EXPIRED (%s ago)\n", (-rem).String()) + } else { + fmt.Fprintf(out, "remaining: %s\n", rem) + } + return nil + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + return cmd +} + +func lifetimeReapCmd() *cobra.Command { + var contextName string + cmd := &cobra.Command{ + Use: "reap", + Short: "Run the expiry action if the deadline has passed; otherwise re-arm", + Long: `reap is what the host timer fires at the deadline. It is +idempotent and self-healing: it re-reads the persisted deadline and +acts only if it has truly elapsed. If the deadline was pushed out +(e.g. via ` + "`lifetime extend`" + `) since the timer was set, reap +simply re-arms for the remaining window and exits. Safe to run by +hand or from an external cron.`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + logger := loggerFromContext(cmd.Context()) + cacheDir, name, err := resolveQemuCluster(contextName) + if err != nil { + return err + } + ls, err := qemu.LoadLifetime(cacheDir, name) + if err != nil { + return lifetimeStateErr(name, err) + } + if !ls.Enabled() { + logger.Info("no lifetime configured; nothing to reap", zap.String("cluster", name)) + return nil + } + if ls.ExpiresAt.IsZero() { + logger.Info("lifetime not armed; nothing to reap", zap.String("cluster", name)) + return nil + } + if !ls.Expired() { + rem := ls.Remaining() + if bin, err := os.Executable(); err == nil { + if err := lifetime.Arm(bin, contextName, rem, logger); err != nil { + logger.Warn("could not re-arm host timer", zap.Error(err)) + } + } + logger.Info("not yet expired; re-armed", + zap.String("cluster", name), zap.Duration("remaining", rem.Round(time.Second))) + return nil + } + + logger.Info("lifetime expired; reaping", + zap.String("cluster", name), zap.String("onExpiry", ls.OnExpiry)) + switch ls.OnExpiry { + case config.OnExpiryPause: + err = qemu.Pause(cacheDir, name, logger) + case config.OnExpiryTeardown: + err = qemu.TeardownByName(cacheDir, name, false, logger) + default: // stop is the default and the empty-value behaviour + err = qemu.Stop(cacheDir, name, logger) + } + if err != nil { + return err + } + // Action performed: remove the host timer (best-effort; + // reap's recheck makes a stray timer harmless anyway). + _ = lifetime.Disarm(contextName, logger) + return nil + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + return cmd +} + +func lifetimeExtendCmd() *cobra.Command { + var contextName string + cmd := &cobra.Command{ + Use: "extend ", + Short: "Push the deadline out by (e.g. 2h) and re-arm", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + logger := loggerFromContext(cmd.Context()) + d, err := time.ParseDuration(args[0]) + if err != nil { + return fmt.Errorf("invalid duration %q: %w", args[0], err) + } + if d <= 0 { + return fmt.Errorf("extend duration must be positive, got %q", args[0]) + } + cacheDir, name, err := resolveQemuCluster(contextName) + if err != nil { + return err + } + nt, err := qemu.ExtendDeadline(cacheDir, name, d) + if err != nil { + return lifetimeStateErr(name, err) + } + if bin, err := os.Executable(); err == nil { + if err := lifetime.Arm(bin, contextName, time.Until(nt), logger); err != nil { + logger.Warn("could not re-arm host timer", zap.Error(err)) + } + } + fmt.Fprintf(cmd.OutOrStdout(), "extended; expiresAt %s\n", nt.Format(time.RFC3339)) + return nil + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + return cmd +} + +func lifetimeArmCmd() *cobra.Command { + var contextName string + cmd := &cobra.Command{ + Use: "arm", + Short: "(Re)install the host timer that fires the expiry action", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + logger := loggerFromContext(cmd.Context()) + cacheDir, name, err := resolveQemuCluster(contextName) + if err != nil { + return err + } + ls, err := qemu.LoadLifetime(cacheDir, name) + if err != nil { + return lifetimeStateErr(name, err) + } + if !ls.Enabled() { + return fmt.Errorf("no lifetime configured for %q; set lifetime.maxRun and re-provision", name) + } + if ls.ExpiresAt.IsZero() { + return fmt.Errorf("no deadline armed for %q; `y-cluster start` re-anchors it", name) + } + bin, err := os.Executable() + if err != nil { + return err + } + return lifetime.Arm(bin, contextName, ls.Remaining(), logger) + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + return cmd +} + +func lifetimeDisarmCmd() *cobra.Command { + var contextName string + cmd := &cobra.Command{ + Use: "disarm", + Short: "Remove the host timer (the persisted deadline is left intact)", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + logger := loggerFromContext(cmd.Context()) + return lifetime.Disarm(contextName, logger) + }, + } + cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name") + return cmd +} + +func lifetimeGCPFlagsCmd() *cobra.Command { + var configDir string + cmd := &cobra.Command{ + Use: "gcp-flags", + Short: "Print gcloud instances-create flags that enforce the lifetime cloud-side", + Long: `Reads lifetime.maxRun from the y-cluster-provision.yaml in -c + and prints the matching +` + "`--max-run-duration=s --instance-termination-action=DELETE`" + ` +flags for ` + "`gcloud compute instances create`" + `. Prints nothing +when no lifetime is configured, so a build script can append the +output unconditionally: + + EXTRA=$(y-cluster lifetime gcp-flags -c "$CONFIG_DIR") + gcloud compute instances create ... $EXTRA`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + loaded, err := loadProvision(configDir) + if err != nil { + return err + } + acc, ok := loaded.(interface { + LifetimePolicy() config.LifetimeConfig + }) + if !ok { + return nil // provider with no lifetime surface: emit nothing + } + flags, err := lifetime.GCPFlags(acc.LifetimePolicy().MaxRun) + if err != nil { + return err + } + if flags != "" { + fmt.Fprintln(cmd.OutOrStdout(), flags) + } + return nil + }, + } + cmd.Flags().StringVarP(&configDir, "config", "c", "", "directory containing y-cluster-provision.yaml") + if err := cmd.MarkFlagRequired("config"); err != nil { + panic(err) + } + return cmd +} diff --git a/cmd/y-cluster/main.go b/cmd/y-cluster/main.go index ee7ab6b..b1c06d5 100644 --- a/cmd/y-cluster/main.go +++ b/cmd/y-cluster/main.go @@ -131,6 +131,7 @@ func rootCmd() *cobra.Command { root.AddCommand(prepareExportCmd()) root.AddCommand(exportCmd()) root.AddCommand(importCmd()) + root.AddCommand(lifetimeCmd()) root.AddCommand(serveCmd()) root.AddCommand(imagesCmd()) root.AddCommand(manifestsCmd()) @@ -361,6 +362,9 @@ message naming what was checked.`, if _, err := qemu.Provision(cmd.Context(), rt, logger); err != nil { return err } + // Provision armed the deadline; install the host-side + // timer that fires the local expiry action. + armHostTimerIfLifetime(rt.CacheDir, rt.Name, rt.Context, logger) logger.Info("cluster ready", zap.String("ssh", fmt.Sprintf("ssh -p %s -i %s ystack@localhost", rt.SSHPort, filepath.Join(rt.CacheDir, rt.Name+"-ssh"))), @@ -411,6 +415,9 @@ func teardownCmd() *cobra.Command { } switch v := loaded.(type) { case *config.QEMUConfig: + // Remove the host expiry timer before the cluster goes; + // the deadline is moot once teardown removes the sidecar. + disarmHostTimer(v.Context, logger) return qemu.TeardownConfig(qemu.FromConfig(v), keepDisk, logger) case *config.DockerConfig: // docker has no persistent disk; keepDisk is diff --git a/e2e/lifetime_test.go b/e2e/lifetime_test.go new file mode 100644 index 0000000..2e309d1 --- /dev/null +++ b/e2e/lifetime_test.go @@ -0,0 +1,132 @@ +//go:build e2e && kvm + +package e2e + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "go.uber.org/zap" + + "github.com/Yolean/y-cluster/pkg/provision/qemu" +) + +// TestQemu_Lifetime exercises the local cost-control auto-expiry path +// against a real qemu boot: +// +// - Provision with a lifetime budget; assert the deadline is armed +// in the sidecar and anchored to roughly now+budget. +// - Push the deadline into the past (the time-travel a real expiry +// would reach naturally) and assert Expired() flips. +// - Run the onExpiry action (stop) and assert the VM is down with +// disk + sidecar preserved -- the cost is gone, the cluster is +// resumable. +// - Start and assert the deadline is re-anchored to this start (the +// "count from when the cluster starts" guarantee), giving a fresh +// budget rather than an already-expired one. +// +// The host timer (systemd-run/at) is unit-tested in pkg/lifetime; +// here we cover the runtime substance: persistence, expiry detection, +// the real stop action, and the start re-anchor. +func TestQemu_Lifetime(t *testing.T) { + if _, err := os.Stat("/dev/kvm"); err != nil { + t.Skip("QEMU tests require /dev/kvm") + } + if err := qemu.CheckPrerequisites(); err != nil { + t.Skip(err) + } + + logger, _ := zap.NewDevelopment() + cfg := e2eQEMURuntime() + cfg.Name = "y-cluster-e2e-lifetime" + cfg.Context = "y-cluster-e2e-lifetime" + cfg.CacheDir = t.TempDir() + cfg.Memory = "4096" + cfg.CPUs = "2" + cfg.SSHPort = "2227" + cfg.PortForwards = e2eUniqueForwards("26447", "28447") + cfg.Kubeconfig = os.Getenv("KUBECONFIG") + if cfg.Kubeconfig == "" { + t.Skip("KUBECONFIG must be set") + } + cfg.Lifetime = "2h" + cfg.OnExpiry = "stop" + t.Setenv("Y_CLUSTER_QEMU_CACHE_DIR", cfg.CacheDir) + + ctx := context.Background() + + cluster, err := qemu.Provision(ctx, cfg, logger) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = cluster.Teardown(false) }) + + // Deadline armed at provision, anchored ~now+2h. + ls, err := qemu.LoadLifetime(cfg.CacheDir, cfg.Name) + if err != nil { + t.Fatalf("LoadLifetime after provision: %v", err) + } + if !ls.Enabled() { + t.Fatal("lifetime should be enabled after provision with maxRun set") + } + if ls.ExpiresAt.IsZero() { + t.Fatal("ExpiresAt should be armed after provision") + } + if rem := time.Until(ls.ExpiresAt); rem < 90*time.Minute || rem > 2*time.Hour+5*time.Minute { + t.Fatalf("provision deadline %s out of expected ~2h window (remaining %s)", ls.ExpiresAt, rem) + } + provisionDeadline := ls.ExpiresAt + + // Simulate the deadline elapsing: push it three hours into the + // past so it is unambiguously due. + if _, err := qemu.ExtendDeadline(cfg.CacheDir, cfg.Name, -3*time.Hour); err != nil { + t.Fatalf("ExtendDeadline (negative, to force expiry): %v", err) + } + ls, err = qemu.LoadLifetime(cfg.CacheDir, cfg.Name) + if err != nil { + t.Fatal(err) + } + if !ls.Expired() { + t.Fatalf("deadline %s should be expired after pushing it into the past", ls.ExpiresAt) + } + + // The onExpiry action: stop. Pidfile gone, disk + sidecar kept. + vmPid := readPid(t, cfg.CacheDir, cfg.Name) + if err := qemu.Stop(cfg.CacheDir, cfg.Name, logger); err != nil { + t.Fatalf("Stop (reap action): %v", err) + } + if _, err := os.Stat(filepath.Join(cfg.CacheDir, cfg.Name+".pid")); !os.IsNotExist(err) { + t.Fatalf("pidfile should be gone after expiry stop; stat err=%v", err) + } + if _, err := os.Stat(cluster.DiskPath()); err != nil { + t.Fatalf("disk should be preserved after expiry stop: %v", err) + } + if _, err := os.Stat(filepath.Join(cfg.CacheDir, cfg.Name+".json")); err != nil { + t.Fatalf("state sidecar should be preserved after expiry stop: %v", err) + } + assertPidGone(t, vmPid) + + // Start re-anchors the deadline to now: a stopped-then-started + // cluster gets a fresh budget, not the expired one we forced. + cluster2, err := qemu.Start(ctx, cfg.CacheDir, cfg.Name, logger) + if err != nil { + t.Fatalf("Start: %v", err) + } + _ = cluster2 + assertNodeReady(t, cfg.Context, cfg.Kubeconfig) + + ls, err = qemu.LoadLifetime(cfg.CacheDir, cfg.Name) + if err != nil { + t.Fatal(err) + } + if ls.Expired() { + t.Fatal("deadline should be re-anchored (not expired) after Start") + } + if !ls.ExpiresAt.After(provisionDeadline) { + t.Fatalf("start deadline %s should be later than the original provision deadline %s", + ls.ExpiresAt, provisionDeadline) + } +} diff --git a/pkg/lifetime/gcp.go b/pkg/lifetime/gcp.go new file mode 100644 index 0000000..ca9509a --- /dev/null +++ b/pkg/lifetime/gcp.go @@ -0,0 +1,33 @@ +package lifetime + +import ( + "fmt" + "time" +) + +// GCPFlags renders the `gcloud compute instances create` scheduling +// flags that enforce a cluster lifetime CLOUD-side. GCP measures +// max-run-duration from when the instance STARTS and then performs +// the termination action -- here DELETE -- with no dependency on the +// provisioning host or on the cluster staying up. That is exactly +// the "anchor to start, never host-bound" contract the appliance +// needs: the disk image carries only the duration, never an absolute +// deadline baked in at build time. +// +// Returns "" (no flags) when no budget is configured. The duration +// is normalized to integer seconds so gcloud's duration parser can +// never disagree with Go's time.ParseDuration. +func GCPFlags(maxRun string) (string, error) { + if maxRun == "" || maxRun == "0" { + return "", nil + } + d, err := time.ParseDuration(maxRun) + if err != nil { + return "", fmt.Errorf("lifetime maxRun %q is not a valid Go duration: %w", maxRun, err) + } + if d <= 0 { + return "", fmt.Errorf("lifetime maxRun must be positive, got %q", maxRun) + } + secs := int(d / time.Second) + return fmt.Sprintf("--max-run-duration=%ds --instance-termination-action=DELETE", secs), nil +} diff --git a/pkg/lifetime/gcp_test.go b/pkg/lifetime/gcp_test.go new file mode 100644 index 0000000..ab766d2 --- /dev/null +++ b/pkg/lifetime/gcp_test.go @@ -0,0 +1,49 @@ +package lifetime + +import ( + "strings" + "testing" +) + +func TestGCPFlags(t *testing.T) { + tests := []struct { + in string + want string + wantErr bool + }{ + {"", "", false}, + {"0", "", false}, + {"8h", "--max-run-duration=28800s --instance-termination-action=DELETE", false}, + {"90m", "--max-run-duration=5400s --instance-termination-action=DELETE", false}, + {"banana", "", true}, + {"-5m", "", true}, + } + for _, tt := range tests { + got, err := GCPFlags(tt.in) + if tt.wantErr { + if err == nil { + t.Errorf("GCPFlags(%q): expected error, got %q", tt.in, got) + } + continue + } + if err != nil { + t.Errorf("GCPFlags(%q): unexpected error %v", tt.in, err) + continue + } + if got != tt.want { + t.Errorf("GCPFlags(%q) = %q, want %q", tt.in, got, tt.want) + } + } +} + +// The DELETE action is what preserves the separately-attached data +// disk while stopping compute billing -- guard it explicitly. +func TestGCPFlags_DeletesInstance(t *testing.T) { + got, err := GCPFlags("1h") + if err != nil { + t.Fatal(err) + } + if !strings.Contains(got, "--instance-termination-action=DELETE") { + t.Fatalf("expected DELETE termination action, got %q", got) + } +} diff --git a/pkg/lifetime/timer.go b/pkg/lifetime/timer.go new file mode 100644 index 0000000..786d3ce --- /dev/null +++ b/pkg/lifetime/timer.go @@ -0,0 +1,184 @@ +// Package lifetime arms and disarms the host-side timer that fires a +// cluster's auto-expiry action. It is the LOCAL trigger: for a local +// dev cluster the host machine is itself the cost, so a host timer is +// the right place for the trigger (if the host sleeps or logs out, +// the VM is down too, so there is nothing to reap). Paid CLOUD +// resources must NOT be reaped from the provisioning host -- that +// path uses cloud-enforced expiry (GCP max-run-duration) instead and +// never goes through this package. +// +// The timer runs ` lifetime reap --context=` at the +// deadline. reap re-reads the persisted deadline and acts only if it +// has truly elapsed, otherwise it re-arms for the remaining window. +// That idempotency makes a stale timer (e.g. one left behind after an +// `extend`) harmless, which is why Disarm is best-effort. +package lifetime + +import ( + "fmt" + "os/exec" + "strings" + "time" + + "go.uber.org/zap" +) + +// reapInvocation is the argv tail every backend schedules: the +// y-cluster subcommand that performs the expiry check + action. +func reapInvocation(bin, kubeContext string) []string { + return []string{bin, "lifetime", "reap", "--context=" + kubeContext} +} + +// unitName is the transient systemd unit name for a context's timer. +// Sanitized to the systemd unit charset; the context is already +// DNS-label-ish but a kubeconfig context can in principle carry +// characters systemd rejects, so map anything outside [a-z0-9-] to +// '-'. +func unitName(kubeContext string) string { + var b strings.Builder + b.WriteString("y-cluster-lifetime-") + for _, r := range kubeContext { + switch { + case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9', r == '-': + b.WriteRune(r) + default: + b.WriteRune('-') + } + } + return b.String() +} + +// remainingSeconds clamps a deadline-relative duration to a minimum +// of one second so the timer is always in the future even if the +// deadline is already (nearly) here -- in which case reap fires +// almost immediately and acts. +func remainingSeconds(remaining time.Duration) int { + s := int(remaining / time.Second) + if s < 1 { + return 1 + } + return s +} + +// systemdRunArgs builds the argv for arming via a transient +// `systemd-run --user` timer. `--on-active` is relative to now, so a +// computed remaining window arms the deadline; `--unit` names it so +// status/disarm can find it. +func systemdRunArgs(bin, kubeContext string, remaining time.Duration) []string { + args := []string{ + "--user", + "--unit=" + unitName(kubeContext), + fmt.Sprintf("--on-active=%ds", remainingSeconds(remaining)), + "--timer-property=AccuracySec=1s", + "--", + } + return append(args, reapInvocation(bin, kubeContext)...) +} + +// atTimeSpec renders the `at` time argument. at granularity is +// minutes, so round up to at least one minute. +func atTimeSpec(remaining time.Duration) string { + mins := int((remaining + time.Minute - 1) / time.Minute) + if mins < 1 { + mins = 1 + } + return fmt.Sprintf("now + %d minutes", mins) +} + +// atScript is the shell line piped to `at`. The trailing comment is a +// stable marker so Disarm can find this job among the user's at queue +// (at has no job naming). +func atScript(bin, kubeContext string) string { + return strings.Join(reapInvocation(bin, kubeContext), " ") + + " # " + unitName(kubeContext) +} + +// Arm schedules the reap for `remaining` from now via systemd-run +// (preferred) or `at` (fallback). It disarms any existing timer for +// the context first so re-arming is idempotent. A nil error means a +// timer is in place; an error means no host timer was armed (the +// persisted deadline still stands, so a manual or external +// `lifetime reap` remains the backstop). +func Arm(bin, kubeContext string, remaining time.Duration, logger *zap.Logger) error { + if logger == nil { + logger = zap.NewNop() + } + _ = Disarm(kubeContext, logger) // best-effort; ignore "nothing to remove" + + if _, err := exec.LookPath("systemd-run"); err == nil { + args := systemdRunArgs(bin, kubeContext, remaining) + out, err := exec.Command("systemd-run", args...).CombinedOutput() + if err == nil { + logger.Info("lifetime timer armed (systemd)", + zap.String("unit", unitName(kubeContext)), + zap.Duration("in", remaining)) + return nil + } + // User bus may be unavailable (e.g. headless without linger); + // fall through to at rather than failing outright. + logger.Debug("systemd-run failed; trying at", + zap.Error(err), zap.ByteString("output", out)) + } + + if _, err := exec.LookPath("at"); err == nil { + cmd := exec.Command("at", strings.Fields(atTimeSpec(remaining))...) + cmd.Stdin = strings.NewReader(atScript(bin, kubeContext) + "\n") + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("at scheduling failed: %w: %s", err, strings.TrimSpace(string(out))) + } + logger.Info("lifetime timer armed (at)", zap.Duration("in", remaining)) + return nil + } + + return fmt.Errorf("no host scheduler available (need systemd-run --user or at); " + + "the deadline is persisted but will not fire automatically - " + + "run `y-cluster lifetime reap` from a cron/timer of your choosing") +} + +// Disarm removes the context's host timer. Best-effort by design: +// reap re-checks the persisted deadline, so a leftover timer that +// fires is a no-op. Returns nil when nothing needed removing. +func Disarm(kubeContext string, logger *zap.Logger) error { + if logger == nil { + logger = zap.NewNop() + } + logger.Debug("disarming lifetime host timer", zap.String("context", kubeContext)) + if _, err := exec.LookPath("systemctl"); err == nil { + // Stopping a transient timer unit also cleans it up. + _ = exec.Command("systemctl", "--user", "stop", unitName(kubeContext)+".timer").Run() + } + if _, err := exec.LookPath("atq"); err == nil { + for _, id := range atJobIDsFor(kubeContext) { + if _, err := exec.LookPath("atrm"); err == nil { + _ = exec.Command("atrm", id).Run() + } + } + } + return nil +} + +// atJobIDsFor returns at(1) job ids whose script carries this +// context's marker. Best-effort: any error yields no ids. +func atJobIDsFor(kubeContext string) []string { + out, err := exec.Command("atq").Output() + if err != nil { + return nil + } + marker := unitName(kubeContext) + var ids []string + for _, line := range strings.Split(string(out), "\n") { + fields := strings.Fields(line) + if len(fields) == 0 { + continue + } + id := fields[0] + body, err := exec.Command("at", "-c", id).Output() + if err != nil { + continue + } + if strings.Contains(string(body), marker) { + ids = append(ids, id) + } + } + return ids +} diff --git a/pkg/lifetime/timer_test.go b/pkg/lifetime/timer_test.go new file mode 100644 index 0000000..4fa8480 --- /dev/null +++ b/pkg/lifetime/timer_test.go @@ -0,0 +1,80 @@ +package lifetime + +import ( + "strings" + "testing" + "time" +) + +func TestUnitName_Sanitizes(t *testing.T) { + tests := map[string]string{ + "local": "y-cluster-lifetime-local", + "alice-dev1": "y-cluster-lifetime-alice-dev1", + "weird/ctx @1": "y-cluster-lifetime-weird-ctx--1", + } + for in, want := range tests { + if got := unitName(in); got != want { + t.Errorf("unitName(%q) = %q, want %q", in, got, want) + } + } +} + +func TestRemainingSeconds_FloorIsOne(t *testing.T) { + if got := remainingSeconds(-5 * time.Minute); got != 1 { + t.Errorf("past-due remaining should floor to 1s, got %d", got) + } + if got := remainingSeconds(0); got != 1 { + t.Errorf("zero remaining should floor to 1s, got %d", got) + } + if got := remainingSeconds(90 * time.Second); got != 90 { + t.Errorf("remainingSeconds(90s) = %d, want 90", got) + } +} + +func TestSystemdRunArgs(t *testing.T) { + args := systemdRunArgs("/usr/local/bin/y-cluster", "local", 8*time.Hour) + joined := strings.Join(args, " ") + for _, want := range []string{ + "--user", + "--unit=y-cluster-lifetime-local", + "--on-active=28800s", + "--", + "/usr/local/bin/y-cluster lifetime reap --context=local", + } { + if !strings.Contains(joined, want) { + t.Errorf("systemd-run args missing %q in: %s", want, joined) + } + } +} + +func TestAtTimeSpec_RoundsUpToMinute(t *testing.T) { + tests := map[time.Duration]string{ + 30 * time.Second: "now + 1 minutes", + 90 * time.Second: "now + 2 minutes", + 8 * time.Hour: "now + 480 minutes", + -1 * time.Minute: "now + 1 minutes", + } + for in, want := range tests { + if got := atTimeSpec(in); got != want { + t.Errorf("atTimeSpec(%v) = %q, want %q", in, got, want) + } + } +} + +func TestAtScript_CarriesMarkerAndCommand(t *testing.T) { + s := atScript("/usr/local/bin/y-cluster", "alice-dev1") + if !strings.Contains(s, "/usr/local/bin/y-cluster lifetime reap --context=alice-dev1") { + t.Errorf("at script missing reap command: %s", s) + } + if !strings.Contains(s, "# y-cluster-lifetime-alice-dev1") { + t.Errorf("at script missing disarm marker: %s", s) + } +} + +func TestReapInvocation(t *testing.T) { + got := reapInvocation("y-cluster", "local") + want := []string{"y-cluster", "lifetime", "reap", "--context=local"} + if strings.Join(got, " ") != strings.Join(want, " ") { + t.Errorf("reapInvocation = %v, want %v", got, want) + } +} diff --git a/pkg/provision/config/common.go b/pkg/provision/config/common.go index c6df467..76a8a81 100644 --- a/pkg/provision/config/common.go +++ b/pkg/provision/config/common.go @@ -30,6 +30,8 @@ // keys is portable across providers package config +import "time" + // Provider IDs. Single source of truth for both the per-provider // `Validate()` checks and the `enum` constraint on // CommonConfig.Provider — schemagen reads AllProviders to build @@ -54,16 +56,87 @@ var AllProviders = []string{ProviderDocker, ProviderMultipass, ProviderQEMU} // Per-provider Validate() must call validateCommon to enforce the // shared invariants (provider discriminator, k3s.version present). type CommonConfig struct { - Provider string `yaml:"provider" json:"provider" jsonschema:"description=Provisioner to use. Optional in the common schema -- when omitted at provision time the host is probed (multipass daemon reachable -> multipass; Linux+/dev/kvm+qemu-system-x86_64 -> qemu; else reachable docker daemon -> docker). Per-provider schemas narrow this to a single literal and keep it required."` - Name string `yaml:"name,omitempty" json:"name,omitempty" jsonschema:"default=y-cluster,description=Cluster instance identifier; used as the docker container name / qemu -name / kubeconfig cluster name / prefix for cache files."` - Context string `yaml:"context,omitempty" json:"context,omitempty" jsonschema:"default=local,description=kubeconfig context name to write."` - Memory string `yaml:"memory,omitempty" json:"memory,omitempty" jsonschema:"default=8192,description=Memory in MB. qemu allocates this to the VM; docker passes it to --memory."` - CPUs string `yaml:"cpus,omitempty" json:"cpus,omitempty" jsonschema:"default=4,description=vCPU count. qemu sets -smp; docker passes --cpus."` - K3s K3sConfig `yaml:"k3s,omitempty" json:"k3s,omitempty" jsonschema:"description=k3s install settings. Defaults track pkg/provision/config/k3s.yaml."` - PortForwards []PortForward `yaml:"portForwards,omitempty" json:"portForwards,omitempty" jsonschema:"description=Host->guest TCP port forwards. Defaults to 6443/80/443 when omitted. Must include a guest:6443 entry so the host's kubectl can reach the API server."` - Registries Registries `yaml:"registries,omitempty" json:"registries,omitempty" jsonschema:"description=k3s registries.yaml content. Written to /etc/rancher/k3s/registries.yaml on the node before k3s starts. ${VAR} substitution is supported on credential and endpoint fields."` - Gateway GatewayConfig `yaml:"gateway,omitempty" json:"gateway,omitempty" jsonschema:"description=Bundled Envoy Gateway install. Skip the install entirely (no CRDs, controller, or GatewayClass) by setting skip:true; rename the default GatewayClass via name."` - Storage StorageConfig `yaml:"storage,omitempty" json:"storage,omitempty" jsonschema:"description=Bundled local-path-provisioner install. Defaults give a predictable on-disk layout (/data/yolean/_) and Retain reclaim so PV content survives PVC delete and an appliance upgrade rebinds the same directory by name."` + Provider string `yaml:"provider" json:"provider" jsonschema:"description=Provisioner to use. Optional in the common schema -- when omitted at provision time the host is probed (multipass daemon reachable -> multipass; Linux+/dev/kvm+qemu-system-x86_64 -> qemu; else reachable docker daemon -> docker). Per-provider schemas narrow this to a single literal and keep it required."` + Name string `yaml:"name,omitempty" json:"name,omitempty" jsonschema:"default=y-cluster,description=Cluster instance identifier; used as the docker container name / qemu -name / kubeconfig cluster name / prefix for cache files."` + Context string `yaml:"context,omitempty" json:"context,omitempty" jsonschema:"default=local,description=kubeconfig context name to write."` + Memory string `yaml:"memory,omitempty" json:"memory,omitempty" jsonschema:"default=8192,description=Memory in MB. qemu allocates this to the VM; docker passes it to --memory."` + CPUs string `yaml:"cpus,omitempty" json:"cpus,omitempty" jsonschema:"default=4,description=vCPU count. qemu sets -smp; docker passes --cpus."` + K3s K3sConfig `yaml:"k3s,omitempty" json:"k3s,omitempty" jsonschema:"description=k3s install settings. Defaults track pkg/provision/config/k3s.yaml."` + PortForwards []PortForward `yaml:"portForwards,omitempty" json:"portForwards,omitempty" jsonschema:"description=Host->guest TCP port forwards. Defaults to 6443/80/443 when omitted. Must include a guest:6443 entry so the host's kubectl can reach the API server."` + Registries Registries `yaml:"registries,omitempty" json:"registries,omitempty" jsonschema:"description=k3s registries.yaml content. Written to /etc/rancher/k3s/registries.yaml on the node before k3s starts. ${VAR} substitution is supported on credential and endpoint fields."` + Gateway GatewayConfig `yaml:"gateway,omitempty" json:"gateway,omitempty" jsonschema:"description=Bundled Envoy Gateway install. Skip the install entirely (no CRDs, controller, or GatewayClass) by setting skip:true; rename the default GatewayClass via name."` + Storage StorageConfig `yaml:"storage,omitempty" json:"storage,omitempty" jsonschema:"description=Bundled local-path-provisioner install. Defaults give a predictable on-disk layout (/data/yolean/_) and Retain reclaim so PV content survives PVC delete and an appliance upgrade rebinds the same directory by name."` + Lifetime LifetimeConfig `yaml:"lifetime,omitempty" json:"lifetime,omitempty" jsonschema:"description=Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables."` +} + +// LifetimeConfig is the cluster-level cost-control policy. A dev +// cluster left running after a task is paused or finished is pure +// cost (host RAM/CPU locally, hourly billing in cloud); a lifetime +// makes the cluster expire on its own. +// +// The budget is always counted from when the cluster STARTS, never +// from provision time. This matters for the appliance flow: a disk +// is provisioned, exported, then imported and booted cloud-side +// possibly days later -- the countdown must begin at that boot. On +// the GCP path this falls out for free because the duration is +// handed to GCP's native max-run-duration, which GCP measures from +// instance start; locally the deadline is recomputed on each +// `y-cluster start`. +// +// MaxRun empty (or "0") disables the whole feature -- a cluster +// with no lifetime runs until manually stopped, the historical +// behaviour. +type LifetimeConfig struct { + // MaxRun is the wall-clock budget as a Go duration string + // (e.g. "8h", "90m", "24h"). Empty disables. Validated to + // parse via time.ParseDuration and be strictly positive. + MaxRun string `yaml:"maxRun,omitempty" json:"maxRun,omitempty" jsonschema:"description=Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry."` + + // OnExpiry is the action a LOCAL cluster takes when MaxRun + // elapses: stop (graceful, disk preserved -- the default and + // cheapest reversible action), pause (SIGSTOP; RAM stays + // reserved), or teardown (delete). Ignored on the GCP + // appliance path, which always decommissions via instance + // delete. Defaulted to stop when MaxRun is set. + OnExpiry string `yaml:"onExpiry,omitempty" json:"onExpiry,omitempty" jsonschema:"enum=stop,enum=pause,enum=teardown,default=stop,description=Local action on expiry. Ignored on the GCP appliance path (always deletes the instance)."` +} + +// Lifetime action names. Single source of truth for the OnExpiry +// enum and the reaper's dispatch switch. +const ( + OnExpiryStop = "stop" + OnExpiryPause = "pause" + OnExpiryTeardown = "teardown" +) + +// AllOnExpiry is the canonical OnExpiry value list, used by +// validation error messages. +var AllOnExpiry = []string{OnExpiryStop, OnExpiryPause, OnExpiryTeardown} + +// LifetimePolicy returns the configured lifetime. Promoted to every +// provider config via CommonConfig embedding, so a caller holding an +// `any` from LoadProvision can read the budget without switching on +// the concrete provider type. +func (c CommonConfig) LifetimePolicy() LifetimeConfig { return c.Lifetime } + +// Enabled reports whether a lifetime budget is configured. +func (l LifetimeConfig) Enabled() bool { + return l.MaxRun != "" && l.MaxRun != "0" +} + +// MaxRunDuration parses MaxRun. Returns (0, nil) when disabled so +// callers can treat "no lifetime" and "zero budget" uniformly; a +// non-nil error means MaxRun is set but unparseable, which Validate +// rejects up front. +func (l LifetimeConfig) MaxRunDuration() (time.Duration, error) { + if !l.Enabled() { + return 0, nil + } + d, err := time.ParseDuration(l.MaxRun) + if err != nil { + return 0, errInvalid("lifetime.maxRun %q is not a valid Go duration (e.g. 8h, 90m): %w", l.MaxRun, err) + } + return d, nil } // StorageConfig controls the local-path-provisioner install that @@ -328,9 +401,34 @@ func (c *CommonConfig) validateCommon(expected string) error { if c.K3s.Version == "" { return errInvalid("k3s.version is empty; check pkg/provision/config/k3s.yaml") } + if err := c.Lifetime.validate(); err != nil { + return err + } return nil } +// validate enforces the lifetime invariants. A disabled lifetime +// (empty MaxRun) is always valid. When set, MaxRun must parse to a +// strictly positive duration and OnExpiry must name a known action. +func (l LifetimeConfig) validate() error { + if !l.Enabled() { + return nil + } + d, err := l.MaxRunDuration() + if err != nil { + return err + } + if d <= 0 { + return errInvalid("lifetime.maxRun must be positive, got %q", l.MaxRun) + } + switch l.OnExpiry { + case "", OnExpiryStop, OnExpiryPause, OnExpiryTeardown: + return nil + default: + return errInvalid("lifetime.onExpiry must be one of %v, got %q", AllOnExpiry, l.OnExpiry) + } +} + // requireHostAPIPort enforces the guest:6443 PortForwards invariant // for host-tunneled providers. qemu and docker call this from their // own Validate; multipass does not because the host dials the VM IP diff --git a/pkg/provision/config/lifetime_test.go b/pkg/provision/config/lifetime_test.go new file mode 100644 index 0000000..b574b24 --- /dev/null +++ b/pkg/provision/config/lifetime_test.go @@ -0,0 +1,113 @@ +package config + +import ( + "strings" + "testing" + "time" +) + +func TestLifetime_DisabledByDefault(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{Provider: ProviderQEMU}} + c.ApplyDefaults() + if c.Lifetime.Enabled() { + t.Fatalf("lifetime should be disabled when maxRun is unset, got %+v", c.Lifetime) + } + if err := c.Validate(); err != nil { + t.Fatalf("disabled lifetime must validate, got %v", err) + } +} + +func TestLifetime_DefaultsOnExpiryStop(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{ + Provider: ProviderQEMU, + Lifetime: LifetimeConfig{MaxRun: "8h"}, + }} + c.ApplyDefaults() + if !c.Lifetime.Enabled() { + t.Fatal("lifetime should be enabled when maxRun set") + } + if c.Lifetime.OnExpiry != OnExpiryStop { + t.Fatalf("OnExpiry default: got %q want %q", c.Lifetime.OnExpiry, OnExpiryStop) + } + if err := c.Validate(); err != nil { + t.Fatalf("valid lifetime rejected: %v", err) + } +} + +func TestLifetime_RespectsExplicitOnExpiry(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{ + Provider: ProviderQEMU, + Lifetime: LifetimeConfig{MaxRun: "1h", OnExpiry: OnExpiryTeardown}, + }} + c.ApplyDefaults() + if c.Lifetime.OnExpiry != OnExpiryTeardown { + t.Fatalf("explicit OnExpiry overridden: %q", c.Lifetime.OnExpiry) + } + if err := c.Validate(); err != nil { + t.Fatalf("valid lifetime rejected: %v", err) + } +} + +func TestLifetime_MaxRunDuration(t *testing.T) { + tests := []struct { + in string + want time.Duration + ok bool + }{ + {"", 0, true}, + {"0", 0, true}, + {"8h", 8 * time.Hour, true}, + {"90m", 90 * time.Minute, true}, + {"banana", 0, false}, + } + for _, tt := range tests { + d, err := LifetimeConfig{MaxRun: tt.in}.MaxRunDuration() + if tt.ok && err != nil { + t.Errorf("MaxRunDuration(%q): unexpected error %v", tt.in, err) + continue + } + if !tt.ok && err == nil { + t.Errorf("MaxRunDuration(%q): expected error, got nil", tt.in) + continue + } + if tt.ok && d != tt.want { + t.Errorf("MaxRunDuration(%q) = %v, want %v", tt.in, d, tt.want) + } + } +} + +func TestLifetime_Validate_BadDuration(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{ + Provider: ProviderQEMU, + Lifetime: LifetimeConfig{MaxRun: "lol"}, + }} + c.ApplyDefaults() + err := c.Validate() + if err == nil || !strings.Contains(err.Error(), "maxRun") { + t.Fatalf("want maxRun parse error, got %v", err) + } +} + +func TestLifetime_Validate_NegativeDuration(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{ + Provider: ProviderQEMU, + Lifetime: LifetimeConfig{MaxRun: "-5m"}, + }} + c.ApplyDefaults() + err := c.Validate() + if err == nil || !strings.Contains(err.Error(), "positive") { + t.Fatalf("want positive-duration error, got %v", err) + } +} + +func TestLifetime_Validate_BadOnExpiry(t *testing.T) { + c := &QEMUConfig{CommonConfig: CommonConfig{ + Provider: ProviderQEMU, + Lifetime: LifetimeConfig{MaxRun: "8h", OnExpiry: "explode"}, + }} + c.ApplyDefaults() + err := c.Validate() + if err == nil || !strings.Contains(err.Error(), "onExpiry") { + t.Fatalf("want onExpiry enum error, got %v", err) + } +} diff --git a/pkg/provision/qemu/data_disk.go b/pkg/provision/qemu/data_disk.go index c67e096..10e31d7 100644 --- a/pkg/provision/qemu/data_disk.go +++ b/pkg/provision/qemu/data_disk.go @@ -90,5 +90,10 @@ func checkDataDiskTools(path string) error { "DataDisk %s does not exist and virt-format is not on PATH; "+ "install libguestfs-tools to let y-cluster create labeled data disks", path) } + // virt-format builds a supermin appliance from the host kernel; + // bail early with a durable fix if it isn't readable. + if err := requireReadableHostKernel(); err != nil { + return err + } return nil } diff --git a/pkg/provision/qemu/libguestfs.go b/pkg/provision/qemu/libguestfs.go new file mode 100644 index 0000000..bef2126 --- /dev/null +++ b/pkg/provision/qemu/libguestfs.go @@ -0,0 +1,90 @@ +package qemu + +import ( + "fmt" + "os" + "strings" +) + +// kernelReadableHookName is the /etc/kernel/postinst.d/ filename the +// remediation suggests. Sorted late (zz-) so it runs after the +// distro hooks that lay the image down. +const kernelReadableHookName = "zz-vmlinuz-readable" + +// runningKernelRelease returns the running kernel's release string +// (the `uname -r` value) from /proc, and whether it could be read. +// Used to locate the host kernel image libguestfs needs. +func runningKernelRelease() (string, bool) { + b, err := os.ReadFile("/proc/sys/kernel/osrelease") + if err != nil { + return "", false + } + rel := strings.TrimSpace(string(b)) + if rel == "" { + return "", false + } + return rel, true +} + +// requireReadableHostKernel verifies the running kernel image is +// readable by the current process. libguestfs builds a supermin +// appliance from the host kernel, so virt-customize / virt-sysprep / +// virt-tar-out / virt-format all fail with the opaque "supermin +// exited with error status 1" when /boot/vmlinuz- is not +// readable. Ubuntu ships those images mode 0600, and a fresh 0600 +// image lands on every kernel upgrade -- which is why per-version +// chmod / dpkg-statoverride does not hold. The error surfaces a +// durable, copy-pasteable fix (a kernel postinst.d hook) so a +// downstream user fixes it once instead of rediscovering a +// workaround after every upgrade. +// +// Returns nil when the image is readable, when its path can't be +// determined, when it isn't found (we can't assert it's the +// blocker), or on a non-permission error -- in those cases we let +// libguestfs run and surface its own diagnostics rather than block +// on a false positive. +// +// A var (not a plain func) so a test can force it to fail and assert +// that a call site checks it only after its cheap correctness +// preconditions -- otherwise an unreadable kernel on the build host +// masks the actionable error (it did, on the CI runner). +var requireReadableHostKernel = func() error { + rel, ok := runningKernelRelease() + if !ok { + return nil + } + return checkKernelReadable("/boot/vmlinuz-" + rel) +} + +// checkKernelReadable is the path-parameterized core of +// requireReadableHostKernel, split out so it can be tested against a +// temp file without depending on the host's real /boot. +func checkKernelReadable(path string) error { + f, err := os.Open(path) + if err == nil { + _ = f.Close() + return nil + } + if !os.IsPermission(err) { + return nil + } + return fmt.Errorf(`host kernel %s is not readable by this user, so libguestfs +(virt-customize / virt-sysprep / virt-tar-out / virt-format) will fail +building its supermin appliance with "supermin exited with error status 1". + +Ubuntu ships /boot/vmlinuz-* mode 0600, and a fresh 0600 image lands on +every kernel upgrade -- a one-off chmod or a per-version dpkg-statoverride +does not survive that. Install a kernel hook once so current and future +kernels stay readable (this makes vmlinuz world-readable): + + sudo tee /etc/kernel/postinst.d/%s >/dev/null <<'HOOK' +#!/bin/sh +# Keep installed kernels readable for libguestfs/supermin. +v="$1"; [ -n "$v" ] && [ -e "/boot/vmlinuz-$v" ] && chmod 0644 "/boot/vmlinuz-$v" +HOOK + sudo chmod 0755 /etc/kernel/postinst.d/%s + sudo chmod 0644 /boot/vmlinuz-* + +The hook re-applies on every future kernel; the chmod fixes the ones +already installed`, path, kernelReadableHookName, kernelReadableHookName) +} diff --git a/pkg/provision/qemu/libguestfs_test.go b/pkg/provision/qemu/libguestfs_test.go new file mode 100644 index 0000000..772f261 --- /dev/null +++ b/pkg/provision/qemu/libguestfs_test.go @@ -0,0 +1,63 @@ +package qemu + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestCheckKernelReadable_Readable(t *testing.T) { + dir := t.TempDir() + p := filepath.Join(dir, "vmlinuz-test") + if err := os.WriteFile(p, []byte("kernel"), 0o644); err != nil { + t.Fatal(err) + } + if err := checkKernelReadable(p); err != nil { + t.Fatalf("readable kernel should pass, got: %v", err) + } +} + +func TestCheckKernelReadable_Missing(t *testing.T) { + // A missing image is not treated as the blocker -- we let + // libguestfs surface its own error rather than false-positive. + if err := checkKernelReadable(filepath.Join(t.TempDir(), "nope")); err != nil { + t.Fatalf("missing kernel should not block, got: %v", err) + } +} + +func TestCheckKernelReadable_PermissionDenied(t *testing.T) { + if os.Geteuid() == 0 { + t.Skip("root bypasses DAC; permission-denied path is unobservable as root") + } + dir := t.TempDir() + p := filepath.Join(dir, "vmlinuz-locked") + if err := os.WriteFile(p, []byte("kernel"), 0o600); err != nil { + t.Fatal(err) + } + if err := os.Chmod(p, 0o000); err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = os.Chmod(p, 0o644) }) + + err := checkKernelReadable(p) + if err == nil { + t.Fatal("unreadable kernel should return an actionable error") + } + msg := err.Error() + // The remediation must point at the durable postinst.d hook. + for _, want := range []string{p, "/etc/kernel/postinst.d/", "supermin", "chmod 0644 /boot/vmlinuz-*"} { + if !strings.Contains(msg, want) { + t.Errorf("error message missing %q; got:\n%s", want, msg) + } + } +} + +func TestRunningKernelRelease(t *testing.T) { + // On the Linux CI/dev host this should resolve; if /proc is + // unavailable the function returns ok=false and callers no-op. + rel, ok := runningKernelRelease() + if ok && strings.TrimSpace(rel) == "" { + t.Fatal("ok=true but release is empty") + } +} diff --git a/pkg/provision/qemu/lifecycle.go b/pkg/provision/qemu/lifecycle.go index 6a721bf..fdc469e 100644 --- a/pkg/provision/qemu/lifecycle.go +++ b/pkg/provision/qemu/lifecycle.go @@ -173,6 +173,15 @@ func Start(ctx context.Context, cacheDir, name string, logger *zap.Logger) (*Clu if err := c.Kubeconfig.Import(rawKubeconfig); err != nil { return nil, fmt.Errorf("merge kubeconfig: %w", err) } + // Re-anchor the auto-expiry deadline to this start. A + // stopped-then-started cluster gets a fresh budget; this is the + // "count from when the cluster starts" guarantee (no-op when no + // lifetime is configured). + if deadline, err := armLifetime(cacheDir, name); err != nil { + logger.Warn("could not re-arm lifetime deadline on start", zap.Error(err)) + } else if !deadline.IsZero() { + logger.Info("lifetime armed", zap.Time("expiresAt", deadline)) + } logger.Info("k3s ready", zap.String("context", c.cfg.Context)) return c, nil } diff --git a/pkg/provision/qemu/lifetime.go b/pkg/provision/qemu/lifetime.go new file mode 100644 index 0000000..ba29989 --- /dev/null +++ b/pkg/provision/qemu/lifetime.go @@ -0,0 +1,51 @@ +package qemu + +import ( + "fmt" + "time" + + "go.uber.org/zap" +) + +// This file is the exported auto-expiry surface the cmd layer's +// `y-cluster lifetime` verb drives. The deadline math and sidecar +// persistence live in state.go (unexported); these wrappers keep the +// command package free of the sidecar's internals. + +// LoadLifetime returns the persisted auto-expiry state for the named +// cluster (policy + absolute deadline). A missing sidecar surfaces +// as the underlying os error so the caller can render a clear +// "qemu-only / not provisioned" message. +func LoadLifetime(cacheDir, name string) (LifetimeState, error) { + return loadLifetime(cacheDir, name) +} + +// ExtendDeadline pushes the persisted deadline out by d and returns +// the new deadline. It errors when no deadline is armed, since +// "extend" only makes sense against an existing budget. +func ExtendDeadline(cacheDir, name string, d time.Duration) (time.Time, error) { + ls, err := loadLifetime(cacheDir, name) + if err != nil { + return time.Time{}, err + } + if ls.ExpiresAt.IsZero() { + return time.Time{}, fmt.Errorf("no lifetime deadline armed for %q; nothing to extend", name) + } + nt := ls.ExpiresAt.Add(d) + if err := setExpiresAt(cacheDir, name, nt); err != nil { + return time.Time{}, err + } + return nt, nil +} + +// TeardownByName tears down a cluster identified by its cache sidecar +// rather than a freshly loaded config dir. Used by the `onExpiry: +// teardown` reap action, which only knows the cluster name + cache +// dir. keepDisk is forwarded to the provider teardown. +func TeardownByName(cacheDir, name string, keepDisk bool, logger *zap.Logger) error { + cfg, err := loadState(cacheDir, name) + if err != nil { + return fmt.Errorf("load state for teardown of %q: %w", name, err) + } + return TeardownConfig(cfg, keepDisk, logger) +} diff --git a/pkg/provision/qemu/lifetime_state_test.go b/pkg/provision/qemu/lifetime_state_test.go new file mode 100644 index 0000000..ba476ff --- /dev/null +++ b/pkg/provision/qemu/lifetime_state_test.go @@ -0,0 +1,179 @@ +package qemu + +import ( + "testing" + "time" +) + +// pinClock pins nowFunc for the duration of a test and restores it. +func pinClock(t *testing.T, at time.Time) { + t.Helper() + prev := nowFunc + nowFunc = func() time.Time { return at } + t.Cleanup(func() { nowFunc = prev }) +} + +func TestArmLifetime_Disabled(t *testing.T) { + dir := t.TempDir() + cfg := Config{Name: "c", CacheDir: dir} // no Lifetime + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + deadline, err := armLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if !deadline.IsZero() { + t.Fatalf("disabled lifetime should arm no deadline, got %v", deadline) + } + ls, err := loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if ls.Enabled() || !ls.ExpiresAt.IsZero() { + t.Fatalf("expected no lifetime state, got %+v", ls) + } +} + +func TestArmLifetime_AnchorsToNow(t *testing.T) { + dir := t.TempDir() + base := time.Date(2026, 6, 21, 12, 0, 0, 0, time.UTC) + pinClock(t, base) + + cfg := Config{Name: "c", CacheDir: dir, Lifetime: "8h", OnExpiry: "stop"} + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + deadline, err := armLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + want := base.Add(8 * time.Hour) + if !deadline.Equal(want) { + t.Fatalf("deadline = %v, want %v", deadline, want) + } + + ls, err := loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if ls.MaxRun != "8h" || ls.OnExpiry != "stop" { + t.Fatalf("policy not persisted: %+v", ls) + } + if !ls.ExpiresAt.Equal(want) { + t.Fatalf("persisted ExpiresAt = %v, want %v", ls.ExpiresAt, want) + } +} + +func TestArmLifetime_ReanchorsOnReArm(t *testing.T) { + dir := t.TempDir() + cfg := Config{Name: "c", CacheDir: dir, Lifetime: "2h", OnExpiry: "stop"} + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + + t0 := time.Date(2026, 6, 21, 10, 0, 0, 0, time.UTC) + pinClock(t, t0) + if _, err := armLifetime(dir, "c"); err != nil { + t.Fatal(err) + } + + // Simulate stop+start three hours later: re-arm gives a fresh + // window from the new "now", not from the original provision. + t1 := t0.Add(3 * time.Hour) + pinClock(t, t1) + deadline, err := armLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if want := t1.Add(2 * time.Hour); !deadline.Equal(want) { + t.Fatalf("re-armed deadline = %v, want %v", deadline, want) + } +} + +func TestLifetimeState_ExpiredAndRemaining(t *testing.T) { + dir := t.TempDir() + cfg := Config{Name: "c", CacheDir: dir, Lifetime: "1h", OnExpiry: "stop"} + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + t0 := time.Date(2026, 6, 21, 9, 0, 0, 0, time.UTC) + pinClock(t, t0) + if _, err := armLifetime(dir, "c"); err != nil { + t.Fatal(err) + } + + // 30m in: not expired, ~30m remaining. + pinClock(t, t0.Add(30*time.Minute)) + ls, err := loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if ls.Expired() { + t.Fatal("should not be expired at 30m of a 1h budget") + } + if r := ls.Remaining(); r != 30*time.Minute { + t.Fatalf("Remaining = %v, want 30m", r) + } + + // 90m in: expired, negative remaining. + pinClock(t, t0.Add(90*time.Minute)) + ls, err = loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if !ls.Expired() { + t.Fatal("should be expired at 90m of a 1h budget") + } + if r := ls.Remaining(); r >= 0 { + t.Fatalf("Remaining = %v, want negative", r) + } +} + +func TestSetExpiresAt_Extend(t *testing.T) { + dir := t.TempDir() + cfg := Config{Name: "c", CacheDir: dir, Lifetime: "1h", OnExpiry: "stop"} + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + t0 := time.Date(2026, 6, 21, 9, 0, 0, 0, time.UTC) + pinClock(t, t0) + if _, err := armLifetime(dir, "c"); err != nil { + t.Fatal(err) + } + ls, err := loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + extended := ls.ExpiresAt.Add(2 * time.Hour) + if err := setExpiresAt(dir, "c", extended); err != nil { + t.Fatal(err) + } + ls, err = loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if !ls.ExpiresAt.Equal(extended) { + t.Fatalf("extended ExpiresAt = %v, want %v", ls.ExpiresAt, extended) + } + // Extend must preserve the policy. + if ls.MaxRun != "1h" || ls.OnExpiry != "stop" { + t.Fatalf("extend clobbered policy: %+v", ls) + } +} + +// Old sidecars (no lifetime fields) decode to "no lifetime". +func TestLoadLifetime_LegacySidecar(t *testing.T) { + dir := t.TempDir() + cfg := Config{Name: "c", CacheDir: dir} // no lifetime at all + if err := saveState(cfg); err != nil { + t.Fatal(err) + } + ls, err := loadLifetime(dir, "c") + if err != nil { + t.Fatal(err) + } + if ls.Enabled() || !ls.ExpiresAt.IsZero() { + t.Fatalf("legacy sidecar should yield empty lifetime, got %+v", ls) + } +} diff --git a/pkg/provision/qemu/prepare_export.go b/pkg/provision/qemu/prepare_export.go index 6d57ac9..a375c3f 100644 --- a/pkg/provision/qemu/prepare_export.go +++ b/pkg/provision/qemu/prepare_export.go @@ -97,6 +97,15 @@ func PrepareExport(ctx context.Context, cacheDir, name string, logger *zap.Logge return fmt.Errorf("disk image not found at %s: %w", diskPath, err) } + // The offline phase runs virt-customize + virt-tar-out, which + // build a supermin appliance from the host kernel. Gate that here, + // AFTER the cheap correctness preconditions (so a missing/stopped + // cluster reports its own actionable error) but BEFORE the live + // phase mutates anything, so an unreadable kernel fails clean. + if err := requireReadableHostKernel(); err != nil { + return err + } + // --- LIVE phase --- // Clear the per-deploy dns-hint-ip annotation so the snapshot // doesn't ship our LB IP. Then dump reconciled gateway state diff --git a/pkg/provision/qemu/prepare_export_test.go b/pkg/provision/qemu/prepare_export_test.go index 8b987e4..f59b4ad 100644 --- a/pkg/provision/qemu/prepare_export_test.go +++ b/pkg/provision/qemu/prepare_export_test.go @@ -2,6 +2,7 @@ package qemu import ( "context" + "errors" "os" "path/filepath" "runtime" @@ -310,3 +311,29 @@ func TestPrepareExport_MissingVirtCustomize(t *testing.T) { t.Errorf("error should hint at apt install libguestfs-tools: %v", err) } } + +// TestPrepareExport_PreconditionsBeforeKernelCheck guards the +// ordering that broke CI: an unreadable host kernel (libguestfs +// capability) must not mask a cheap correctness precondition like +// "no saved state". On the CI runner the kernel image was 0600, so +// the kernel check -- placed too early -- returned its own error +// instead of the actionable "run provision" hint. Force the kernel +// check to fail regardless of host state and assert the precondition +// still wins. +func TestPrepareExport_PreconditionsBeforeKernelCheck(t *testing.T) { + stubPrepareExportTools(t) + orig := requireReadableHostKernel + requireReadableHostKernel = func() error { return errors.New("host kernel unreadable (forced)") } + t.Cleanup(func() { requireReadableHostKernel = orig }) + + err := PrepareExport(context.Background(), t.TempDir(), "missing", nil) + if err == nil { + t.Fatal("expected error when no saved state exists") + } + if !strings.Contains(err.Error(), "y-cluster provision") { + t.Errorf("no-saved-state precondition must win over the kernel check: %v", err) + } + if strings.Contains(err.Error(), "unreadable") { + t.Errorf("kernel check fired before the precondition: %v", err) + } +} diff --git a/pkg/provision/qemu/qemu.go b/pkg/provision/qemu/qemu.go index bfb58c5..c9eb93b 100644 --- a/pkg/provision/qemu/qemu.go +++ b/pkg/provision/qemu/qemu.go @@ -66,6 +66,13 @@ type Config struct { // Provision creates the file if missing; Teardown leaves it. DataDisk string DataDiskSize string + + // Lifetime/OnExpiry are the cost-control auto-expiry policy + // (Go duration string + local action). Persisted to the state + // sidecar; the absolute deadline is armed separately so it + // anchors to start, not provision. Empty Lifetime disables. + Lifetime string + OnExpiry string } // K3s carries the runtime view of K3sConfig: which version to @@ -178,6 +185,8 @@ func FromConfig(c *config.QEMUConfig) Config { Storage: c.Storage, DataDisk: dataDisk, DataDiskSize: dataDiskSize, + Lifetime: c.Lifetime.MaxRun, + OnExpiry: c.Lifetime.OnExpiry, } } @@ -349,6 +358,17 @@ func Provision(ctx context.Context, cfg Config, logger *zap.Logger) (*Cluster, e logger.Warn("could not save state sidecar (start will not work without it)", zap.Error(err)) } + // Arm the auto-expiry deadline (no-op when no lifetime budget). + // Anchored to now: the clock starts when the cluster comes up, + // which for provision is also "now" but for the appliance start + // path is the meaningful anchor (the disk may have been built + // long before this boot). + if deadline, err := armLifetime(cfg.CacheDir, cfg.Name); err != nil { + logger.Warn("could not arm lifetime deadline", zap.Error(err)) + } else if !deadline.IsZero() { + logger.Info("lifetime armed", zap.Time("expiresAt", deadline), zap.String("onExpiry", cfg.OnExpiry)) + } + // Wait for SSH if err := c.waitForSSH(ctx); err != nil { return nil, err @@ -705,11 +725,11 @@ func (c *Cluster) DiskPath() string { // inputPath: // // - .vmdk -> qemu-img convert -f vmdk -O qcow2 (the original -// VMware-import path; vmdk subformat doesn't matter -// because qemu-img -f vmdk auto-detects the variant). +// VMware-import path; vmdk subformat doesn't matter +// because qemu-img -f vmdk auto-detects the variant). // - .qcow2 -> qemu-img convert -f qcow2 -O qcow2 (rewrites the -// qcow2 into the cache layout; usually a quick -// copy + compaction, no format change). +// qcow2 into the cache layout; usually a quick +// copy + compaction, no format change). // // A local-qemu e2e loop that does `y-cluster export // --format=qcow2 ... | y-cluster import` doesn't need any diff --git a/pkg/provision/qemu/state.go b/pkg/provision/qemu/state.go index 784c166..4303b08 100644 --- a/pkg/provision/qemu/state.go +++ b/pkg/provision/qemu/state.go @@ -5,8 +5,13 @@ import ( "fmt" "os" "path/filepath" + "time" ) +// nowFunc is the wall clock used for lifetime deadline math. A +// package var so tests can pin it; production uses time.Now. +var nowFunc = time.Now + // stateVersion guards forward-compat: a newer y-cluster reading an // older sidecar bails out with a clear error rather than guessing. // Bump when the schema changes incompatibly. @@ -33,6 +38,19 @@ type savedState struct { Context string `json:"context"` CacheDir string `json:"cacheDir"` K3s K3s `json:"k3s"` + + // Lifetime fields are additive (added after stateVersion 1) + // and all omitempty, so an old sidecar without them decodes + // cleanly to "no lifetime" and an old binary ignores them. + // They are NOT a reason to bump stateVersion. + // + // Lifetime/OnExpiry are the policy copied from config at + // provision; ExpiresAt is the absolute deadline, anchored to + // the most recent provision/start (not to provision alone -- + // an appliance disk may boot long after it was built). + Lifetime string `json:"lifetime,omitempty"` + OnExpiry string `json:"onExpiry,omitempty"` + ExpiresAt string `json:"expiresAt,omitempty"` // RFC3339; empty = no deadline } // statePath returns the sidecar path for (cacheDir, name). @@ -41,8 +59,10 @@ func statePath(cacheDir, name string) string { } // saveState writes the launch-relevant subset of cfg to the -// sidecar. Atomic via a .tmp+rename so a crash mid-write doesn't -// leave a half-written file Start would later fail to parse. +// sidecar, including the lifetime policy (Lifetime/OnExpiry). The +// ExpiresAt deadline is NOT set here -- it is armed separately by +// armLifetime so the deadline anchors to start, and so `extend` can +// move it without rewriting launch state. Atomic via .tmp+rename. func saveState(cfg Config) error { s := savedState{ Version: stateVersion, @@ -55,12 +75,21 @@ func saveState(cfg Config) error { Context: cfg.Context, CacheDir: cfg.CacheDir, K3s: cfg.K3s, + Lifetime: cfg.Lifetime, + OnExpiry: cfg.OnExpiry, } + return writeSidecar(cfg.CacheDir, cfg.Name, s) +} + +// writeSidecar atomically marshals s to /.json via +// a .tmp+rename so a crash mid-write doesn't leave a half-written +// file Start would later fail to parse. +func writeSidecar(cacheDir, name string, s savedState) error { data, err := json.MarshalIndent(s, "", " ") if err != nil { return err } - path := statePath(cfg.CacheDir, cfg.Name) + path := statePath(cacheDir, name) tmp := path + ".tmp" if err := os.WriteFile(tmp, data, 0o644); err != nil { return fmt.Errorf("write %s: %w", tmp, err) @@ -72,25 +101,35 @@ func saveState(cfg Config) error { return nil } -// loadState reads /.json and rehydrates a runtime -// Config. Kubeconfig is re-resolved from $KUBECONFIG at call time -// rather than persisted -- it's an environmental concern that -// shouldn't bake into the sidecar. -func loadState(cacheDir, name string) (Config, error) { +// readSidecar reads and version-checks the raw sidecar. Shared by +// loadState and the lifetime helpers. +func readSidecar(cacheDir, name string) (savedState, error) { path := statePath(cacheDir, name) data, err := os.ReadFile(path) if err != nil { - return Config{}, err + return savedState{}, err } var s savedState if err := json.Unmarshal(data, &s); err != nil { - return Config{}, fmt.Errorf("parse %s: %w", path, err) + return savedState{}, fmt.Errorf("parse %s: %w", path, err) } if s.Version != stateVersion { - return Config{}, fmt.Errorf( + return savedState{}, fmt.Errorf( "%s: unsupported state version %d (want %d); re-provision to refresh", path, s.Version, stateVersion) } + return s, nil +} + +// loadState reads /.json and rehydrates a runtime +// Config. Kubeconfig is re-resolved from $KUBECONFIG at call time +// rather than persisted -- it's an environmental concern that +// shouldn't bake into the sidecar. +func loadState(cacheDir, name string) (Config, error) { + s, err := readSidecar(cacheDir, name) + if err != nil { + return Config{}, err + } return Config{ Name: s.Name, DiskSize: s.DiskSize, @@ -102,9 +141,91 @@ func loadState(cacheDir, name string) (Config, error) { CacheDir: s.CacheDir, Kubeconfig: os.Getenv("KUBECONFIG"), K3s: s.K3s, + Lifetime: s.Lifetime, + OnExpiry: s.OnExpiry, }, nil } +// LifetimeState is the persisted auto-expiry view of a cluster. +type LifetimeState struct { + // MaxRun is the configured budget (Go duration string), empty + // when no lifetime is set. + MaxRun string + // OnExpiry is the local action at the deadline. + OnExpiry string + // ExpiresAt is the absolute deadline; zero when unset. + ExpiresAt time.Time +} + +// Enabled reports whether a budget is configured. +func (l LifetimeState) Enabled() bool { return l.MaxRun != "" && l.MaxRun != "0" } + +// Remaining is the time left until the deadline; negative when past +// due, zero when no deadline is armed. +func (l LifetimeState) Remaining() time.Duration { + if l.ExpiresAt.IsZero() { + return 0 + } + return l.ExpiresAt.Sub(nowFunc()) +} + +// Expired reports whether an armed deadline is at or past now. +func (l LifetimeState) Expired() bool { + return !l.ExpiresAt.IsZero() && !nowFunc().Before(l.ExpiresAt) +} + +// loadLifetime returns the persisted lifetime policy + deadline. +func loadLifetime(cacheDir, name string) (LifetimeState, error) { + s, err := readSidecar(cacheDir, name) + if err != nil { + return LifetimeState{}, err + } + ls := LifetimeState{MaxRun: s.Lifetime, OnExpiry: s.OnExpiry} + if s.ExpiresAt != "" { + t, err := time.Parse(time.RFC3339, s.ExpiresAt) + if err != nil { + return LifetimeState{}, fmt.Errorf("parse expiresAt %q: %w", s.ExpiresAt, err) + } + ls.ExpiresAt = t + } + return ls, nil +} + +// armLifetime sets ExpiresAt = now + MaxRun, anchoring the deadline +// to the current moment (provision or start). It is a no-op that +// returns a zero deadline when the cluster has no lifetime budget. +// Returns the new deadline so callers can schedule a host timer. +func armLifetime(cacheDir, name string) (time.Time, error) { + s, err := readSidecar(cacheDir, name) + if err != nil { + return time.Time{}, err + } + if s.Lifetime == "" || s.Lifetime == "0" { + return time.Time{}, nil + } + d, err := time.ParseDuration(s.Lifetime) + if err != nil { + return time.Time{}, fmt.Errorf("parse lifetime %q: %w", s.Lifetime, err) + } + deadline := nowFunc().Add(d) + s.ExpiresAt = deadline.Format(time.RFC3339) + if err := writeSidecar(cacheDir, name, s); err != nil { + return time.Time{}, err + } + return deadline, nil +} + +// setExpiresAt persists an explicit deadline, used by `extend` to +// push the deadline out without re-anchoring to now. +func setExpiresAt(cacheDir, name string, t time.Time) error { + s, err := readSidecar(cacheDir, name) + if err != nil { + return err + } + s.ExpiresAt = t.Format(time.RFC3339) + return writeSidecar(cacheDir, name, s) +} + // removeState deletes the sidecar and ignores "not present" // errors so teardown is idempotent. func removeState(cacheDir, name string) error { diff --git a/pkg/provision/schema/common.schema.json b/pkg/provision/schema/common.schema.json index 8b8d0f2..0474a72 100644 --- a/pkg/provision/schema/common.schema.json +++ b/pkg/provision/schema/common.schema.json @@ -21,6 +21,10 @@ "$ref": "#/$defs/K3sConfig", "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml." }, + "lifetime": { + "$ref": "#/$defs/LifetimeConfig", + "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables." + }, "memory": { "default": "8192", "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.", @@ -111,6 +115,26 @@ }, "type": "object" }, + "LifetimeConfig": { + "additionalProperties": false, + "properties": { + "maxRun": { + "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.", + "type": "string" + }, + "onExpiry": { + "default": "stop", + "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).", + "enum": [ + "stop", + "pause", + "teardown" + ], + "type": "string" + } + }, + "type": "object" + }, "PortForward": { "additionalProperties": false, "properties": { diff --git a/pkg/provision/schema/docker.schema.json b/pkg/provision/schema/docker.schema.json index 796b644..ceee6c1 100644 --- a/pkg/provision/schema/docker.schema.json +++ b/pkg/provision/schema/docker.schema.json @@ -21,6 +21,10 @@ "$ref": "#/$defs/K3sConfig", "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml." }, + "lifetime": { + "$ref": "#/$defs/LifetimeConfig", + "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables." + }, "memory": { "default": "8192", "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.", @@ -110,6 +114,26 @@ }, "type": "object" }, + "LifetimeConfig": { + "additionalProperties": false, + "properties": { + "maxRun": { + "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.", + "type": "string" + }, + "onExpiry": { + "default": "stop", + "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).", + "enum": [ + "stop", + "pause", + "teardown" + ], + "type": "string" + } + }, + "type": "object" + }, "PortForward": { "additionalProperties": false, "properties": { diff --git a/pkg/provision/schema/multipass.schema.json b/pkg/provision/schema/multipass.schema.json index df86eba..ee1336b 100644 --- a/pkg/provision/schema/multipass.schema.json +++ b/pkg/provision/schema/multipass.schema.json @@ -53,6 +53,26 @@ }, "type": "object" }, + "LifetimeConfig": { + "additionalProperties": false, + "properties": { + "maxRun": { + "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.", + "type": "string" + }, + "onExpiry": { + "default": "stop", + "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).", + "enum": [ + "stop", + "pause", + "teardown" + ], + "type": "string" + } + }, + "type": "object" + }, "MultipassConfig": { "additionalProperties": false, "properties": { @@ -79,6 +99,10 @@ "$ref": "#/$defs/K3sConfig", "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml." }, + "lifetime": { + "$ref": "#/$defs/LifetimeConfig", + "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables." + }, "memory": { "default": "8192", "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.", diff --git a/pkg/provision/schema/qemu.schema.json b/pkg/provision/schema/qemu.schema.json index 9decf95..f76e77f 100644 --- a/pkg/provision/schema/qemu.schema.json +++ b/pkg/provision/schema/qemu.schema.json @@ -53,6 +53,26 @@ }, "type": "object" }, + "LifetimeConfig": { + "additionalProperties": false, + "properties": { + "maxRun": { + "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.", + "type": "string" + }, + "onExpiry": { + "default": "stop", + "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).", + "enum": [ + "stop", + "pause", + "teardown" + ], + "type": "string" + } + }, + "type": "object" + }, "PortForward": { "additionalProperties": false, "properties": { @@ -109,6 +129,10 @@ "$ref": "#/$defs/K3sConfig", "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml." }, + "lifetime": { + "$ref": "#/$defs/LifetimeConfig", + "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables." + }, "memory": { "default": "8192", "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.", diff --git a/scripts/_check-host-kernel.sh b/scripts/_check-host-kernel.sh new file mode 100644 index 0000000..a9567f8 --- /dev/null +++ b/scripts/_check-host-kernel.sh @@ -0,0 +1,37 @@ +# shellcheck shell=bash +# Sourced by the appliance build / e2e scripts before any libguestfs +# (virt-sysprep / virt-customize) work, to fail fast with a DURABLE +# fix when the running kernel image is not readable. libguestfs builds +# a supermin appliance from the host kernel, and Ubuntu ships +# /boot/vmlinuz-* mode 0600, so a fresh 0600 image lands on every +# kernel upgrade. This message is kept in sync with +# requireReadableHostKernel() in pkg/provision/qemu/libguestfs.go (the +# binary enforces the same check at its libguestfs call sites). +__krel="$(uname -r)" +if ! [ -r "/boot/vmlinuz-$__krel" ]; then + { + echo "host kernel /boot/vmlinuz-$__krel is not readable by this user, so" + echo "libguestfs (virt-customize / virt-sysprep) will fail building its" + echo 'supermin appliance with "supermin exited with error status 1".' + cat <<'EOM' + +Ubuntu ships /boot/vmlinuz-* mode 0600, and a fresh 0600 image lands on +every kernel upgrade -- a one-off chmod or a per-version dpkg-statoverride +does not survive that. Install a kernel hook once so current and future +kernels stay readable (this makes vmlinuz world-readable): + + sudo tee /etc/kernel/postinst.d/zz-vmlinuz-readable >/dev/null <<'HOOK' +#!/bin/sh +# Keep installed kernels readable for libguestfs/supermin. +v="$1"; [ -n "$v" ] && [ -e "/boot/vmlinuz-$v" ] && chmod 0644 "/boot/vmlinuz-$v" +HOOK + sudo chmod 0755 /etc/kernel/postinst.d/zz-vmlinuz-readable + sudo chmod 0644 /boot/vmlinuz-* + +The hook re-applies on every future kernel; the chmod fixes the ones +already installed. +EOM + } >&2 + exit 1 +fi +unset __krel