diff --git a/README.md b/README.md
index 5d94e40..397b6fc 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,13 @@ Subcommand groups:
   and arbitrary user-built images.
 - **cache info / purge** — inspect or wipe y-cluster's shared
   download cache (k3s airgap bundles, image OCI layouts).
+- **lifetime status / reap / extend / arm / disarm / gcp-flags** —
+  cost-control auto-expiry. A `lifetime.maxRun` in the config gives
+  the cluster a wall-clock budget counted from when it starts; on
+  expiry a local cluster runs its `onExpiry` action (stop by
+  default) via a host timer, and a GCP appliance is deleted by GCP
+  itself (`gcp-flags` emits the `--max-run-duration` flags). See the
+  "lifetime" idea below.
 - **serve / serve ensure / serve stop / serve logs** — a
   lightweight HTTP server that exposes config assets to the
   cluster: kustomize-built Secrets named
@@ -42,7 +49,20 @@ context. The README is intentionally short — when something is
 discoverable from `y-cluster <cmd> --help`, that's where it
 lives.
 
-## Two ideas worth knowing before you start
+## Three ideas worth knowing before you start
+
+**lifetime: the budget is counted from start, and the trigger lives
+where the cost is.** `lifetime.maxRun` is a wall-clock budget that
+begins when the cluster *starts* (re-anchored on every `y-cluster
+start`), not when it was provisioned — an appliance disk may boot
+days after it was built. Locally the host *is* the cost, so a host
+timer fires `y-cluster lifetime reap`, which stops the cluster (or
+the configured `onExpiry` action). On a GCP appliance the host
+mustn't be the trigger (it may be offline), so `lifetime gcp-flags`
+hands the duration to GCP's native `--max-run-duration`, and GCP
+deletes the instance on its own — the attached data disk survives.
+`reap` re-checks the persisted deadline and re-arms if it isn't due,
+so `lifetime extend 2h` is safe and a stale timer is harmless.
 
 **yconverge: ordering vs checks come from different places.**
 CUE imports in `yconverge.cue` declare ordering — each import is
diff --git a/cmd/y-cluster/lifecycle.go b/cmd/y-cluster/lifecycle.go
index 736aa40..31f7115 100644
--- a/cmd/y-cluster/lifecycle.go
+++ b/cmd/y-cluster/lifecycle.go
@@ -55,6 +55,9 @@ func stopCmd() *cobra.Command {
 			}
 			switch lr.Backend {
 			case cluster.BackendQEMU:
+				// A manual stop ends this run's budget; remove the
+				// host expiry timer. `start` re-arms a fresh window.
+				disarmHostTimer(contextName, logger)
 				return qemu.Stop(qemuCacheDir(), lr.ClusterName, logger)
 			case cluster.BackendDocker:
 				return docker.Stop(ctx, lr.ClusterName, logger)
@@ -240,6 +243,9 @@ func startCmd() *cobra.Command {
 			if err != nil {
 				return err
 			}
+			// Start re-anchored the deadline to now; install the host
+			// timer for the fresh window.
+			armHostTimerIfLifetime(qemuCacheDir(), clusterName, contextName, logger)
 			logger.Info("cluster started",
 				zap.String("context", c.Context()),
 			)
diff --git a/cmd/y-cluster/lifetime.go b/cmd/y-cluster/lifetime.go
new file mode 100644
index 0000000..dd6331c
--- /dev/null
+++ b/cmd/y-cluster/lifetime.go
@@ -0,0 +1,332 @@
+package main
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"time"
+
+	"github.com/spf13/cobra"
+	"go.uber.org/zap"
+
+	"github.com/Yolean/y-cluster/pkg/cluster"
+	"github.com/Yolean/y-cluster/pkg/lifetime"
+	"github.com/Yolean/y-cluster/pkg/provision/config"
+	"github.com/Yolean/y-cluster/pkg/provision/qemu"
+)
+
+// lifetimeCmd is the cost-control auto-expiry surface. A dev cluster
+// left running after a task is paused or finished is pure cost; a
+// lifetime budget makes the cluster expire on its own.
+//
+// Two enforcement paths, picked by where the cost lives:
+//   - LOCAL (qemu): a host-side timer (status/reap/extend/arm/disarm
+//     below) runs the onExpiry action. The host is the cost, so a
+//     host timer is the right trigger.
+//   - CLOUD (GCP appliance): `gcp-flags` emits gcloud scheduling
+//     flags so GCP itself deletes the instance at the deadline -- no
+//     host or cluster dependency. This is the only correct trigger
+//     for a paid cloud resource.
+//
+// The local subcommands are qemu-only today, matching the rest of
+// the lifecycle surface; the qemu state sidecar is where the
+// deadline lives.
+func lifetimeCmd() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "lifetime",
+		Short: "Cost-control auto-expiry: stop/decommission a cluster that is no longer needed",
+	}
+	cmd.AddCommand(
+		lifetimeStatusCmd(),
+		lifetimeReapCmd(),
+		lifetimeExtendCmd(),
+		lifetimeArmCmd(),
+		lifetimeDisarmCmd(),
+		lifetimeGCPFlagsCmd(),
+	)
+	return cmd
+}
+
+// armHostTimerIfLifetime installs the host-side reap timer when the
+// qemu cluster has an armed deadline. Called from provision/start
+// after the deadline has been (re)anchored. Best-effort: a failure
+// to arm is logged, not fatal -- the deadline is persisted and
+// `y-cluster lifetime reap` (by hand or from any scheduler) remains
+// the backstop. No-op when no lifetime is configured.
+func armHostTimerIfLifetime(cacheDir, name, contextName string, logger *zap.Logger) {
+	ls, err := qemu.LoadLifetime(cacheDir, name)
+	if err != nil || !ls.Enabled() || ls.ExpiresAt.IsZero() {
+		return
+	}
+	bin, err := os.Executable()
+	if err != nil {
+		logger.Warn("could not resolve binary path to arm lifetime timer", zap.Error(err))
+		return
+	}
+	if err := lifetime.Arm(bin, contextName, ls.Remaining(), logger); err != nil {
+		logger.Warn("could not arm lifetime host timer", zap.Error(err))
+	}
+}
+
+// disarmHostTimer removes the host-side reap timer for a context.
+// Called from stop/teardown. Best-effort by design.
+func disarmHostTimer(contextName string, logger *zap.Logger) {
+	_ = lifetime.Disarm(contextName, logger)
+}
+
+// resolveQemuCluster maps a kubeconfig context to the qemu cache dir
+// + cluster name that the lifetime sidecar is keyed on. Works for a
+// stopped cluster too (the context survives in kubeconfig), unlike
+// cluster.Lookup which needs a running runtime.
+func resolveQemuCluster(contextName string) (cacheDir, name string, err error) {
+	name, err = cluster.ResolveClusterName("", contextName)
+	if err != nil {
+		return "", "", err
+	}
+	if name == "" {
+		return "", "", fmt.Errorf("kubeconfig context %q has no associated cluster", contextName)
+	}
+	return qemuCacheDir(), name, nil
+}
+
+// lifetimeStateErr renders the missing-sidecar case as a clear
+// qemu-only message rather than a raw "no such file" error.
+func lifetimeStateErr(name string, err error) error {
+	if errors.Is(err, os.ErrNotExist) {
+		return fmt.Errorf("no lifetime state for cluster %q; lifetime is implemented for the qemu provider only", name)
+	}
+	return err
+}
+
+func lifetimeStatusCmd() *cobra.Command {
+	var contextName string
+	cmd := &cobra.Command{
+		Use:   "status",
+		Short: "Show the cluster's lifetime policy and time remaining",
+		Args:  cobra.NoArgs,
+		RunE: func(cmd *cobra.Command, _ []string) error {
+			cacheDir, name, err := resolveQemuCluster(contextName)
+			if err != nil {
+				return err
+			}
+			ls, err := qemu.LoadLifetime(cacheDir, name)
+			if err != nil {
+				return lifetimeStateErr(name, err)
+			}
+			out := cmd.OutOrStdout()
+			if !ls.Enabled() {
+				fmt.Fprintf(out, "lifetime: disabled (no maxRun) for %q\n", name)
+				return nil
+			}
+			fmt.Fprintf(out, "cluster:   %s\n", name)
+			fmt.Fprintf(out, "maxRun:    %s\n", ls.MaxRun)
+			fmt.Fprintf(out, "onExpiry:  %s\n", ls.OnExpiry)
+			if ls.ExpiresAt.IsZero() {
+				fmt.Fprintln(out, "expiresAt: (not armed; run `y-cluster start` or `y-cluster lifetime arm`)")
+				return nil
+			}
+			fmt.Fprintf(out, "expiresAt: %s\n", ls.ExpiresAt.Format(time.RFC3339))
+			rem := ls.Remaining().Round(time.Second)
+			if rem < 0 {
+				fmt.Fprintf(out, "remaining: EXPIRED (%s ago)\n", (-rem).String())
+			} else {
+				fmt.Fprintf(out, "remaining: %s\n", rem)
+			}
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name")
+	return cmd
+}
+
+func lifetimeReapCmd() *cobra.Command {
+	var contextName string
+	cmd := &cobra.Command{
+		Use:   "reap",
+		Short: "Run the expiry action if the deadline has passed; otherwise re-arm",
+		Long: `reap is what the host timer fires at the deadline. It is
+idempotent and self-healing: it re-reads the persisted deadline and
+acts only if it has truly elapsed. If the deadline was pushed out
+(e.g. via ` + "`lifetime extend`" + `) since the timer was set, reap
+simply re-arms for the remaining window and exits. Safe to run by
+hand or from an external cron.`,
+		Args: cobra.NoArgs,
+		RunE: func(cmd *cobra.Command, _ []string) error {
+			logger := loggerFromContext(cmd.Context())
+			cacheDir, name, err := resolveQemuCluster(contextName)
+			if err != nil {
+				return err
+			}
+			ls, err := qemu.LoadLifetime(cacheDir, name)
+			if err != nil {
+				return lifetimeStateErr(name, err)
+			}
+			if !ls.Enabled() {
+				logger.Info("no lifetime configured; nothing to reap", zap.String("cluster", name))
+				return nil
+			}
+			if ls.ExpiresAt.IsZero() {
+				logger.Info("lifetime not armed; nothing to reap", zap.String("cluster", name))
+				return nil
+			}
+			if !ls.Expired() {
+				rem := ls.Remaining()
+				if bin, err := os.Executable(); err == nil {
+					if err := lifetime.Arm(bin, contextName, rem, logger); err != nil {
+						logger.Warn("could not re-arm host timer", zap.Error(err))
+					}
+				}
+				logger.Info("not yet expired; re-armed",
+					zap.String("cluster", name), zap.Duration("remaining", rem.Round(time.Second)))
+				return nil
+			}
+
+			logger.Info("lifetime expired; reaping",
+				zap.String("cluster", name), zap.String("onExpiry", ls.OnExpiry))
+			switch ls.OnExpiry {
+			case config.OnExpiryPause:
+				err = qemu.Pause(cacheDir, name, logger)
+			case config.OnExpiryTeardown:
+				err = qemu.TeardownByName(cacheDir, name, false, logger)
+			default: // stop is the default and the empty-value behaviour
+				err = qemu.Stop(cacheDir, name, logger)
+			}
+			if err != nil {
+				return err
+			}
+			// Action performed: remove the host timer (best-effort;
+			// reap's recheck makes a stray timer harmless anyway).
+			_ = lifetime.Disarm(contextName, logger)
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name")
+	return cmd
+}
+
+func lifetimeExtendCmd() *cobra.Command {
+	var contextName string
+	cmd := &cobra.Command{
+		Use:   "extend <duration>",
+		Short: "Push the deadline out by <duration> (e.g. 2h) and re-arm",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			logger := loggerFromContext(cmd.Context())
+			d, err := time.ParseDuration(args[0])
+			if err != nil {
+				return fmt.Errorf("invalid duration %q: %w", args[0], err)
+			}
+			if d <= 0 {
+				return fmt.Errorf("extend duration must be positive, got %q", args[0])
+			}
+			cacheDir, name, err := resolveQemuCluster(contextName)
+			if err != nil {
+				return err
+			}
+			nt, err := qemu.ExtendDeadline(cacheDir, name, d)
+			if err != nil {
+				return lifetimeStateErr(name, err)
+			}
+			if bin, err := os.Executable(); err == nil {
+				if err := lifetime.Arm(bin, contextName, time.Until(nt), logger); err != nil {
+					logger.Warn("could not re-arm host timer", zap.Error(err))
+				}
+			}
+			fmt.Fprintf(cmd.OutOrStdout(), "extended; expiresAt %s\n", nt.Format(time.RFC3339))
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name")
+	return cmd
+}
+
+func lifetimeArmCmd() *cobra.Command {
+	var contextName string
+	cmd := &cobra.Command{
+		Use:   "arm",
+		Short: "(Re)install the host timer that fires the expiry action",
+		Args:  cobra.NoArgs,
+		RunE: func(cmd *cobra.Command, _ []string) error {
+			logger := loggerFromContext(cmd.Context())
+			cacheDir, name, err := resolveQemuCluster(contextName)
+			if err != nil {
+				return err
+			}
+			ls, err := qemu.LoadLifetime(cacheDir, name)
+			if err != nil {
+				return lifetimeStateErr(name, err)
+			}
+			if !ls.Enabled() {
+				return fmt.Errorf("no lifetime configured for %q; set lifetime.maxRun and re-provision", name)
+			}
+			if ls.ExpiresAt.IsZero() {
+				return fmt.Errorf("no deadline armed for %q; `y-cluster start` re-anchors it", name)
+			}
+			bin, err := os.Executable()
+			if err != nil {
+				return err
+			}
+			return lifetime.Arm(bin, contextName, ls.Remaining(), logger)
+		},
+	}
+	cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name")
+	return cmd
+}
+
+func lifetimeDisarmCmd() *cobra.Command {
+	var contextName string
+	cmd := &cobra.Command{
+		Use:   "disarm",
+		Short: "Remove the host timer (the persisted deadline is left intact)",
+		Args:  cobra.NoArgs,
+		RunE: func(cmd *cobra.Command, _ []string) error {
+			logger := loggerFromContext(cmd.Context())
+			return lifetime.Disarm(contextName, logger)
+		},
+	}
+	cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name")
+	return cmd
+}
+
+func lifetimeGCPFlagsCmd() *cobra.Command {
+	var configDir string
+	cmd := &cobra.Command{
+		Use:   "gcp-flags",
+		Short: "Print gcloud instances-create flags that enforce the lifetime cloud-side",
+		Long: `Reads lifetime.maxRun from the y-cluster-provision.yaml in -c
+<dir> and prints the matching
+` + "`--max-run-duration=<secs>s --instance-termination-action=DELETE`" + `
+flags for ` + "`gcloud compute instances create`" + `. Prints nothing
+when no lifetime is configured, so a build script can append the
+output unconditionally:
+
+    EXTRA=$(y-cluster lifetime gcp-flags -c "$CONFIG_DIR")
+    gcloud compute instances create ... $EXTRA`,
+		Args: cobra.NoArgs,
+		RunE: func(cmd *cobra.Command, _ []string) error {
+			loaded, err := loadProvision(configDir)
+			if err != nil {
+				return err
+			}
+			acc, ok := loaded.(interface {
+				LifetimePolicy() config.LifetimeConfig
+			})
+			if !ok {
+				return nil // provider with no lifetime surface: emit nothing
+			}
+			flags, err := lifetime.GCPFlags(acc.LifetimePolicy().MaxRun)
+			if err != nil {
+				return err
+			}
+			if flags != "" {
+				fmt.Fprintln(cmd.OutOrStdout(), flags)
+			}
+			return nil
+		},
+	}
+	cmd.Flags().StringVarP(&configDir, "config", "c", "", "directory containing y-cluster-provision.yaml")
+	if err := cmd.MarkFlagRequired("config"); err != nil {
+		panic(err)
+	}
+	return cmd
+}
diff --git a/cmd/y-cluster/main.go b/cmd/y-cluster/main.go
index ee7ab6b..b1c06d5 100644
--- a/cmd/y-cluster/main.go
+++ b/cmd/y-cluster/main.go
@@ -131,6 +131,7 @@ func rootCmd() *cobra.Command {
 	root.AddCommand(prepareExportCmd())
 	root.AddCommand(exportCmd())
 	root.AddCommand(importCmd())
+	root.AddCommand(lifetimeCmd())
 	root.AddCommand(serveCmd())
 	root.AddCommand(imagesCmd())
 	root.AddCommand(manifestsCmd())
@@ -361,6 +362,9 @@ message naming what was checked.`,
 				if _, err := qemu.Provision(cmd.Context(), rt, logger); err != nil {
 					return err
 				}
+				// Provision armed the deadline; install the host-side
+				// timer that fires the local expiry action.
+				armHostTimerIfLifetime(rt.CacheDir, rt.Name, rt.Context, logger)
 				logger.Info("cluster ready",
 					zap.String("ssh", fmt.Sprintf("ssh -p %s -i %s ystack@localhost",
 						rt.SSHPort, filepath.Join(rt.CacheDir, rt.Name+"-ssh"))),
@@ -411,6 +415,9 @@ func teardownCmd() *cobra.Command {
 			}
 			switch v := loaded.(type) {
 			case *config.QEMUConfig:
+				// Remove the host expiry timer before the cluster goes;
+				// the deadline is moot once teardown removes the sidecar.
+				disarmHostTimer(v.Context, logger)
 				return qemu.TeardownConfig(qemu.FromConfig(v), keepDisk, logger)
 			case *config.DockerConfig:
 				// docker has no persistent disk; keepDisk is
diff --git a/e2e/lifetime_test.go b/e2e/lifetime_test.go
new file mode 100644
index 0000000..2e309d1
--- /dev/null
+++ b/e2e/lifetime_test.go
@@ -0,0 +1,132 @@
+//go:build e2e && kvm
+
+package e2e
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+
+	"go.uber.org/zap"
+
+	"github.com/Yolean/y-cluster/pkg/provision/qemu"
+)
+
+// TestQemu_Lifetime exercises the local cost-control auto-expiry path
+// against a real qemu boot:
+//
+//   - Provision with a lifetime budget; assert the deadline is armed
+//     in the sidecar and anchored to roughly now+budget.
+//   - Push the deadline into the past (the time-travel a real expiry
+//     would reach naturally) and assert Expired() flips.
+//   - Run the onExpiry action (stop) and assert the VM is down with
+//     disk + sidecar preserved -- the cost is gone, the cluster is
+//     resumable.
+//   - Start and assert the deadline is re-anchored to this start (the
+//     "count from when the cluster starts" guarantee), giving a fresh
+//     budget rather than an already-expired one.
+//
+// The host timer (systemd-run/at) is unit-tested in pkg/lifetime;
+// here we cover the runtime substance: persistence, expiry detection,
+// the real stop action, and the start re-anchor.
+func TestQemu_Lifetime(t *testing.T) {
+	if _, err := os.Stat("/dev/kvm"); err != nil {
+		t.Skip("QEMU tests require /dev/kvm")
+	}
+	if err := qemu.CheckPrerequisites(); err != nil {
+		t.Skip(err)
+	}
+
+	logger, _ := zap.NewDevelopment()
+	cfg := e2eQEMURuntime()
+	cfg.Name = "y-cluster-e2e-lifetime"
+	cfg.Context = "y-cluster-e2e-lifetime"
+	cfg.CacheDir = t.TempDir()
+	cfg.Memory = "4096"
+	cfg.CPUs = "2"
+	cfg.SSHPort = "2227"
+	cfg.PortForwards = e2eUniqueForwards("26447", "28447")
+	cfg.Kubeconfig = os.Getenv("KUBECONFIG")
+	if cfg.Kubeconfig == "" {
+		t.Skip("KUBECONFIG must be set")
+	}
+	cfg.Lifetime = "2h"
+	cfg.OnExpiry = "stop"
+	t.Setenv("Y_CLUSTER_QEMU_CACHE_DIR", cfg.CacheDir)
+
+	ctx := context.Background()
+
+	cluster, err := qemu.Provision(ctx, cfg, logger)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() { _ = cluster.Teardown(false) })
+
+	// Deadline armed at provision, anchored ~now+2h.
+	ls, err := qemu.LoadLifetime(cfg.CacheDir, cfg.Name)
+	if err != nil {
+		t.Fatalf("LoadLifetime after provision: %v", err)
+	}
+	if !ls.Enabled() {
+		t.Fatal("lifetime should be enabled after provision with maxRun set")
+	}
+	if ls.ExpiresAt.IsZero() {
+		t.Fatal("ExpiresAt should be armed after provision")
+	}
+	if rem := time.Until(ls.ExpiresAt); rem < 90*time.Minute || rem > 2*time.Hour+5*time.Minute {
+		t.Fatalf("provision deadline %s out of expected ~2h window (remaining %s)", ls.ExpiresAt, rem)
+	}
+	provisionDeadline := ls.ExpiresAt
+
+	// Simulate the deadline elapsing: push it three hours into the
+	// past so it is unambiguously due.
+	if _, err := qemu.ExtendDeadline(cfg.CacheDir, cfg.Name, -3*time.Hour); err != nil {
+		t.Fatalf("ExtendDeadline (negative, to force expiry): %v", err)
+	}
+	ls, err = qemu.LoadLifetime(cfg.CacheDir, cfg.Name)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !ls.Expired() {
+		t.Fatalf("deadline %s should be expired after pushing it into the past", ls.ExpiresAt)
+	}
+
+	// The onExpiry action: stop. Pidfile gone, disk + sidecar kept.
+	vmPid := readPid(t, cfg.CacheDir, cfg.Name)
+	if err := qemu.Stop(cfg.CacheDir, cfg.Name, logger); err != nil {
+		t.Fatalf("Stop (reap action): %v", err)
+	}
+	if _, err := os.Stat(filepath.Join(cfg.CacheDir, cfg.Name+".pid")); !os.IsNotExist(err) {
+		t.Fatalf("pidfile should be gone after expiry stop; stat err=%v", err)
+	}
+	if _, err := os.Stat(cluster.DiskPath()); err != nil {
+		t.Fatalf("disk should be preserved after expiry stop: %v", err)
+	}
+	if _, err := os.Stat(filepath.Join(cfg.CacheDir, cfg.Name+".json")); err != nil {
+		t.Fatalf("state sidecar should be preserved after expiry stop: %v", err)
+	}
+	assertPidGone(t, vmPid)
+
+	// Start re-anchors the deadline to now: a stopped-then-started
+	// cluster gets a fresh budget, not the expired one we forced.
+	cluster2, err := qemu.Start(ctx, cfg.CacheDir, cfg.Name, logger)
+	if err != nil {
+		t.Fatalf("Start: %v", err)
+	}
+	_ = cluster2
+	assertNodeReady(t, cfg.Context, cfg.Kubeconfig)
+
+	ls, err = qemu.LoadLifetime(cfg.CacheDir, cfg.Name)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if ls.Expired() {
+		t.Fatal("deadline should be re-anchored (not expired) after Start")
+	}
+	if !ls.ExpiresAt.After(provisionDeadline) {
+		t.Fatalf("start deadline %s should be later than the original provision deadline %s",
+			ls.ExpiresAt, provisionDeadline)
+	}
+}
diff --git a/pkg/lifetime/gcp.go b/pkg/lifetime/gcp.go
new file mode 100644
index 0000000..ca9509a
--- /dev/null
+++ b/pkg/lifetime/gcp.go
@@ -0,0 +1,33 @@
+package lifetime
+
+import (
+	"fmt"
+	"time"
+)
+
+// GCPFlags renders the `gcloud compute instances create` scheduling
+// flags that enforce a cluster lifetime CLOUD-side. GCP measures
+// max-run-duration from when the instance STARTS and then performs
+// the termination action -- here DELETE -- with no dependency on the
+// provisioning host or on the cluster staying up. That is exactly
+// the "anchor to start, never host-bound" contract the appliance
+// needs: the disk image carries only the duration, never an absolute
+// deadline baked in at build time.
+//
+// Returns "" (no flags) when no budget is configured. The duration
+// is normalized to integer seconds so gcloud's duration parser can
+// never disagree with Go's time.ParseDuration.
+func GCPFlags(maxRun string) (string, error) {
+	if maxRun == "" || maxRun == "0" {
+		return "", nil
+	}
+	d, err := time.ParseDuration(maxRun)
+	if err != nil {
+		return "", fmt.Errorf("lifetime maxRun %q is not a valid Go duration: %w", maxRun, err)
+	}
+	if d <= 0 {
+		return "", fmt.Errorf("lifetime maxRun must be positive, got %q", maxRun)
+	}
+	secs := int(d / time.Second)
+	return fmt.Sprintf("--max-run-duration=%ds --instance-termination-action=DELETE", secs), nil
+}
diff --git a/pkg/lifetime/gcp_test.go b/pkg/lifetime/gcp_test.go
new file mode 100644
index 0000000..ab766d2
--- /dev/null
+++ b/pkg/lifetime/gcp_test.go
@@ -0,0 +1,49 @@
+package lifetime
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestGCPFlags(t *testing.T) {
+	tests := []struct {
+		in      string
+		want    string
+		wantErr bool
+	}{
+		{"", "", false},
+		{"0", "", false},
+		{"8h", "--max-run-duration=28800s --instance-termination-action=DELETE", false},
+		{"90m", "--max-run-duration=5400s --instance-termination-action=DELETE", false},
+		{"banana", "", true},
+		{"-5m", "", true},
+	}
+	for _, tt := range tests {
+		got, err := GCPFlags(tt.in)
+		if tt.wantErr {
+			if err == nil {
+				t.Errorf("GCPFlags(%q): expected error, got %q", tt.in, got)
+			}
+			continue
+		}
+		if err != nil {
+			t.Errorf("GCPFlags(%q): unexpected error %v", tt.in, err)
+			continue
+		}
+		if got != tt.want {
+			t.Errorf("GCPFlags(%q) = %q, want %q", tt.in, got, tt.want)
+		}
+	}
+}
+
+// The DELETE action is what preserves the separately-attached data
+// disk while stopping compute billing -- guard it explicitly.
+func TestGCPFlags_DeletesInstance(t *testing.T) {
+	got, err := GCPFlags("1h")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !strings.Contains(got, "--instance-termination-action=DELETE") {
+		t.Fatalf("expected DELETE termination action, got %q", got)
+	}
+}
diff --git a/pkg/lifetime/timer.go b/pkg/lifetime/timer.go
new file mode 100644
index 0000000..786d3ce
--- /dev/null
+++ b/pkg/lifetime/timer.go
@@ -0,0 +1,184 @@
+// Package lifetime arms and disarms the host-side timer that fires a
+// cluster's auto-expiry action. It is the LOCAL trigger: for a local
+// dev cluster the host machine is itself the cost, so a host timer is
+// the right place for the trigger (if the host sleeps or logs out,
+// the VM is down too, so there is nothing to reap). Paid CLOUD
+// resources must NOT be reaped from the provisioning host -- that
+// path uses cloud-enforced expiry (GCP max-run-duration) instead and
+// never goes through this package.
+//
+// The timer runs `<y-cluster> lifetime reap --context=<ctx>` at the
+// deadline. reap re-reads the persisted deadline and acts only if it
+// has truly elapsed, otherwise it re-arms for the remaining window.
+// That idempotency makes a stale timer (e.g. one left behind after an
+// `extend`) harmless, which is why Disarm is best-effort.
+package lifetime
+
+import (
+	"fmt"
+	"os/exec"
+	"strings"
+	"time"
+
+	"go.uber.org/zap"
+)
+
+// reapInvocation is the argv tail every backend schedules: the
+// y-cluster subcommand that performs the expiry check + action.
+func reapInvocation(bin, kubeContext string) []string {
+	return []string{bin, "lifetime", "reap", "--context=" + kubeContext}
+}
+
+// unitName is the transient systemd unit name for a context's timer.
+// Sanitized to the systemd unit charset; the context is already
+// DNS-label-ish but a kubeconfig context can in principle carry
+// characters systemd rejects, so map anything outside [a-z0-9-] to
+// '-'.
+func unitName(kubeContext string) string {
+	var b strings.Builder
+	b.WriteString("y-cluster-lifetime-")
+	for _, r := range kubeContext {
+		switch {
+		case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9', r == '-':
+			b.WriteRune(r)
+		default:
+			b.WriteRune('-')
+		}
+	}
+	return b.String()
+}
+
+// remainingSeconds clamps a deadline-relative duration to a minimum
+// of one second so the timer is always in the future even if the
+// deadline is already (nearly) here -- in which case reap fires
+// almost immediately and acts.
+func remainingSeconds(remaining time.Duration) int {
+	s := int(remaining / time.Second)
+	if s < 1 {
+		return 1
+	}
+	return s
+}
+
+// systemdRunArgs builds the argv for arming via a transient
+// `systemd-run --user` timer. `--on-active` is relative to now, so a
+// computed remaining window arms the deadline; `--unit` names it so
+// status/disarm can find it.
+func systemdRunArgs(bin, kubeContext string, remaining time.Duration) []string {
+	args := []string{
+		"--user",
+		"--unit=" + unitName(kubeContext),
+		fmt.Sprintf("--on-active=%ds", remainingSeconds(remaining)),
+		"--timer-property=AccuracySec=1s",
+		"--",
+	}
+	return append(args, reapInvocation(bin, kubeContext)...)
+}
+
+// atTimeSpec renders the `at` time argument. at granularity is
+// minutes, so round up to at least one minute.
+func atTimeSpec(remaining time.Duration) string {
+	mins := int((remaining + time.Minute - 1) / time.Minute)
+	if mins < 1 {
+		mins = 1
+	}
+	return fmt.Sprintf("now + %d minutes", mins)
+}
+
+// atScript is the shell line piped to `at`. The trailing comment is a
+// stable marker so Disarm can find this job among the user's at queue
+// (at has no job naming).
+func atScript(bin, kubeContext string) string {
+	return strings.Join(reapInvocation(bin, kubeContext), " ") +
+		" # " + unitName(kubeContext)
+}
+
+// Arm schedules the reap for `remaining` from now via systemd-run
+// (preferred) or `at` (fallback). It disarms any existing timer for
+// the context first so re-arming is idempotent. A nil error means a
+// timer is in place; an error means no host timer was armed (the
+// persisted deadline still stands, so a manual or external
+// `lifetime reap` remains the backstop).
+func Arm(bin, kubeContext string, remaining time.Duration, logger *zap.Logger) error {
+	if logger == nil {
+		logger = zap.NewNop()
+	}
+	_ = Disarm(kubeContext, logger) // best-effort; ignore "nothing to remove"
+
+	if _, err := exec.LookPath("systemd-run"); err == nil {
+		args := systemdRunArgs(bin, kubeContext, remaining)
+		out, err := exec.Command("systemd-run", args...).CombinedOutput()
+		if err == nil {
+			logger.Info("lifetime timer armed (systemd)",
+				zap.String("unit", unitName(kubeContext)),
+				zap.Duration("in", remaining))
+			return nil
+		}
+		// User bus may be unavailable (e.g. headless without linger);
+		// fall through to at rather than failing outright.
+		logger.Debug("systemd-run failed; trying at",
+			zap.Error(err), zap.ByteString("output", out))
+	}
+
+	if _, err := exec.LookPath("at"); err == nil {
+		cmd := exec.Command("at", strings.Fields(atTimeSpec(remaining))...)
+		cmd.Stdin = strings.NewReader(atScript(bin, kubeContext) + "\n")
+		if out, err := cmd.CombinedOutput(); err != nil {
+			return fmt.Errorf("at scheduling failed: %w: %s", err, strings.TrimSpace(string(out)))
+		}
+		logger.Info("lifetime timer armed (at)", zap.Duration("in", remaining))
+		return nil
+	}
+
+	return fmt.Errorf("no host scheduler available (need systemd-run --user or at); " +
+		"the deadline is persisted but will not fire automatically - " +
+		"run `y-cluster lifetime reap` from a cron/timer of your choosing")
+}
+
+// Disarm removes the context's host timer. Best-effort by design:
+// reap re-checks the persisted deadline, so a leftover timer that
+// fires is a no-op. Returns nil when nothing needed removing.
+func Disarm(kubeContext string, logger *zap.Logger) error {
+	if logger == nil {
+		logger = zap.NewNop()
+	}
+	logger.Debug("disarming lifetime host timer", zap.String("context", kubeContext))
+	if _, err := exec.LookPath("systemctl"); err == nil {
+		// Stopping a transient timer unit also cleans it up.
+		_ = exec.Command("systemctl", "--user", "stop", unitName(kubeContext)+".timer").Run()
+	}
+	if _, err := exec.LookPath("atq"); err == nil {
+		for _, id := range atJobIDsFor(kubeContext) {
+			if _, err := exec.LookPath("atrm"); err == nil {
+				_ = exec.Command("atrm", id).Run()
+			}
+		}
+	}
+	return nil
+}
+
+// atJobIDsFor returns at(1) job ids whose script carries this
+// context's marker. Best-effort: any error yields no ids.
+func atJobIDsFor(kubeContext string) []string {
+	out, err := exec.Command("atq").Output()
+	if err != nil {
+		return nil
+	}
+	marker := unitName(kubeContext)
+	var ids []string
+	for _, line := range strings.Split(string(out), "\n") {
+		fields := strings.Fields(line)
+		if len(fields) == 0 {
+			continue
+		}
+		id := fields[0]
+		body, err := exec.Command("at", "-c", id).Output()
+		if err != nil {
+			continue
+		}
+		if strings.Contains(string(body), marker) {
+			ids = append(ids, id)
+		}
+	}
+	return ids
+}
diff --git a/pkg/lifetime/timer_test.go b/pkg/lifetime/timer_test.go
new file mode 100644
index 0000000..4fa8480
--- /dev/null
+++ b/pkg/lifetime/timer_test.go
@@ -0,0 +1,80 @@
+package lifetime
+
+import (
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestUnitName_Sanitizes(t *testing.T) {
+	tests := map[string]string{
+		"local":        "y-cluster-lifetime-local",
+		"alice-dev1":   "y-cluster-lifetime-alice-dev1",
+		"weird/ctx @1": "y-cluster-lifetime-weird-ctx--1",
+	}
+	for in, want := range tests {
+		if got := unitName(in); got != want {
+			t.Errorf("unitName(%q) = %q, want %q", in, got, want)
+		}
+	}
+}
+
+func TestRemainingSeconds_FloorIsOne(t *testing.T) {
+	if got := remainingSeconds(-5 * time.Minute); got != 1 {
+		t.Errorf("past-due remaining should floor to 1s, got %d", got)
+	}
+	if got := remainingSeconds(0); got != 1 {
+		t.Errorf("zero remaining should floor to 1s, got %d", got)
+	}
+	if got := remainingSeconds(90 * time.Second); got != 90 {
+		t.Errorf("remainingSeconds(90s) = %d, want 90", got)
+	}
+}
+
+func TestSystemdRunArgs(t *testing.T) {
+	args := systemdRunArgs("/usr/local/bin/y-cluster", "local", 8*time.Hour)
+	joined := strings.Join(args, " ")
+	for _, want := range []string{
+		"--user",
+		"--unit=y-cluster-lifetime-local",
+		"--on-active=28800s",
+		"--",
+		"/usr/local/bin/y-cluster lifetime reap --context=local",
+	} {
+		if !strings.Contains(joined, want) {
+			t.Errorf("systemd-run args missing %q in: %s", want, joined)
+		}
+	}
+}
+
+func TestAtTimeSpec_RoundsUpToMinute(t *testing.T) {
+	tests := map[time.Duration]string{
+		30 * time.Second: "now + 1 minutes",
+		90 * time.Second: "now + 2 minutes",
+		8 * time.Hour:    "now + 480 minutes",
+		-1 * time.Minute: "now + 1 minutes",
+	}
+	for in, want := range tests {
+		if got := atTimeSpec(in); got != want {
+			t.Errorf("atTimeSpec(%v) = %q, want %q", in, got, want)
+		}
+	}
+}
+
+func TestAtScript_CarriesMarkerAndCommand(t *testing.T) {
+	s := atScript("/usr/local/bin/y-cluster", "alice-dev1")
+	if !strings.Contains(s, "/usr/local/bin/y-cluster lifetime reap --context=alice-dev1") {
+		t.Errorf("at script missing reap command: %s", s)
+	}
+	if !strings.Contains(s, "# y-cluster-lifetime-alice-dev1") {
+		t.Errorf("at script missing disarm marker: %s", s)
+	}
+}
+
+func TestReapInvocation(t *testing.T) {
+	got := reapInvocation("y-cluster", "local")
+	want := []string{"y-cluster", "lifetime", "reap", "--context=local"}
+	if strings.Join(got, " ") != strings.Join(want, " ") {
+		t.Errorf("reapInvocation = %v, want %v", got, want)
+	}
+}
diff --git a/pkg/provision/config/common.go b/pkg/provision/config/common.go
index c6df467..76a8a81 100644
--- a/pkg/provision/config/common.go
+++ b/pkg/provision/config/common.go
@@ -30,6 +30,8 @@
 //     keys is portable across providers
 package config
 
+import "time"
+
 // Provider IDs. Single source of truth for both the per-provider
 // `Validate()` checks and the `enum` constraint on
 // CommonConfig.Provider — schemagen reads AllProviders to build
@@ -54,16 +56,87 @@ var AllProviders = []string{ProviderDocker, ProviderMultipass, ProviderQEMU}
 // Per-provider Validate() must call validateCommon to enforce the
 // shared invariants (provider discriminator, k3s.version present).
 type CommonConfig struct {
-	Provider     string        `yaml:"provider"               json:"provider"               jsonschema:"description=Provisioner to use. Optional in the common schema -- when omitted at provision time the host is probed (multipass daemon reachable -> multipass; Linux+/dev/kvm+qemu-system-x86_64 -> qemu; else reachable docker daemon -> docker). Per-provider schemas narrow this to a single literal and keep it required."`
-	Name         string        `yaml:"name,omitempty"         json:"name,omitempty"         jsonschema:"default=y-cluster,description=Cluster instance identifier; used as the docker container name / qemu -name / kubeconfig cluster name / prefix for cache files."`
-	Context      string        `yaml:"context,omitempty"      json:"context,omitempty"      jsonschema:"default=local,description=kubeconfig context name to write."`
-	Memory       string        `yaml:"memory,omitempty"       json:"memory,omitempty"       jsonschema:"default=8192,description=Memory in MB. qemu allocates this to the VM; docker passes it to --memory."`
-	CPUs         string        `yaml:"cpus,omitempty"         json:"cpus,omitempty"         jsonschema:"default=4,description=vCPU count. qemu sets -smp; docker passes --cpus."`
-	K3s          K3sConfig     `yaml:"k3s,omitempty"          json:"k3s,omitempty"          jsonschema:"description=k3s install settings. Defaults track pkg/provision/config/k3s.yaml."`
-	PortForwards []PortForward `yaml:"portForwards,omitempty" json:"portForwards,omitempty" jsonschema:"description=Host->guest TCP port forwards. Defaults to 6443/80/443 when omitted. Must include a guest:6443 entry so the host's kubectl can reach the API server."`
-	Registries   Registries    `yaml:"registries,omitempty"   json:"registries,omitempty"   jsonschema:"description=k3s registries.yaml content. Written to /etc/rancher/k3s/registries.yaml on the node before k3s starts. ${VAR} substitution is supported on credential and endpoint fields."`
-	Gateway      GatewayConfig `yaml:"gateway,omitempty"      json:"gateway,omitempty"      jsonschema:"description=Bundled Envoy Gateway install. Skip the install entirely (no CRDs, controller, or GatewayClass) by setting skip:true; rename the default GatewayClass via name."`
-	Storage      StorageConfig `yaml:"storage,omitempty"      json:"storage,omitempty"      jsonschema:"description=Bundled local-path-provisioner install. Defaults give a predictable on-disk layout (/data/yolean/<ns>_<pvc>) and Retain reclaim so PV content survives PVC delete and an appliance upgrade rebinds the same directory by name."`
+	Provider     string         `yaml:"provider"               json:"provider"               jsonschema:"description=Provisioner to use. Optional in the common schema -- when omitted at provision time the host is probed (multipass daemon reachable -> multipass; Linux+/dev/kvm+qemu-system-x86_64 -> qemu; else reachable docker daemon -> docker). Per-provider schemas narrow this to a single literal and keep it required."`
+	Name         string         `yaml:"name,omitempty"         json:"name,omitempty"         jsonschema:"default=y-cluster,description=Cluster instance identifier; used as the docker container name / qemu -name / kubeconfig cluster name / prefix for cache files."`
+	Context      string         `yaml:"context,omitempty"      json:"context,omitempty"      jsonschema:"default=local,description=kubeconfig context name to write."`
+	Memory       string         `yaml:"memory,omitempty"       json:"memory,omitempty"       jsonschema:"default=8192,description=Memory in MB. qemu allocates this to the VM; docker passes it to --memory."`
+	CPUs         string         `yaml:"cpus,omitempty"         json:"cpus,omitempty"         jsonschema:"default=4,description=vCPU count. qemu sets -smp; docker passes --cpus."`
+	K3s          K3sConfig      `yaml:"k3s,omitempty"          json:"k3s,omitempty"          jsonschema:"description=k3s install settings. Defaults track pkg/provision/config/k3s.yaml."`
+	PortForwards []PortForward  `yaml:"portForwards,omitempty" json:"portForwards,omitempty" jsonschema:"description=Host->guest TCP port forwards. Defaults to 6443/80/443 when omitted. Must include a guest:6443 entry so the host's kubectl can reach the API server."`
+	Registries   Registries     `yaml:"registries,omitempty"   json:"registries,omitempty"   jsonschema:"description=k3s registries.yaml content. Written to /etc/rancher/k3s/registries.yaml on the node before k3s starts. ${VAR} substitution is supported on credential and endpoint fields."`
+	Gateway      GatewayConfig  `yaml:"gateway,omitempty"      json:"gateway,omitempty"      jsonschema:"description=Bundled Envoy Gateway install. Skip the install entirely (no CRDs, controller, or GatewayClass) by setting skip:true; rename the default GatewayClass via name."`
+	Storage      StorageConfig  `yaml:"storage,omitempty"      json:"storage,omitempty"      jsonschema:"description=Bundled local-path-provisioner install. Defaults give a predictable on-disk layout (/data/yolean/<ns>_<pvc>) and Retain reclaim so PV content survives PVC delete and an appliance upgrade rebinds the same directory by name."`
+	Lifetime     LifetimeConfig `yaml:"lifetime,omitempty"    json:"lifetime,omitempty"     jsonschema:"description=Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables."`
+}
+
+// LifetimeConfig is the cluster-level cost-control policy. A dev
+// cluster left running after a task is paused or finished is pure
+// cost (host RAM/CPU locally, hourly billing in cloud); a lifetime
+// makes the cluster expire on its own.
+//
+// The budget is always counted from when the cluster STARTS, never
+// from provision time. This matters for the appliance flow: a disk
+// is provisioned, exported, then imported and booted cloud-side
+// possibly days later -- the countdown must begin at that boot. On
+// the GCP path this falls out for free because the duration is
+// handed to GCP's native max-run-duration, which GCP measures from
+// instance start; locally the deadline is recomputed on each
+// `y-cluster start`.
+//
+// MaxRun empty (or "0") disables the whole feature -- a cluster
+// with no lifetime runs until manually stopped, the historical
+// behaviour.
+type LifetimeConfig struct {
+	// MaxRun is the wall-clock budget as a Go duration string
+	// (e.g. "8h", "90m", "24h"). Empty disables. Validated to
+	// parse via time.ParseDuration and be strictly positive.
+	MaxRun string `yaml:"maxRun,omitempty" json:"maxRun,omitempty" jsonschema:"description=Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry."`
+
+	// OnExpiry is the action a LOCAL cluster takes when MaxRun
+	// elapses: stop (graceful, disk preserved -- the default and
+	// cheapest reversible action), pause (SIGSTOP; RAM stays
+	// reserved), or teardown (delete). Ignored on the GCP
+	// appliance path, which always decommissions via instance
+	// delete. Defaulted to stop when MaxRun is set.
+	OnExpiry string `yaml:"onExpiry,omitempty" json:"onExpiry,omitempty" jsonschema:"enum=stop,enum=pause,enum=teardown,default=stop,description=Local action on expiry. Ignored on the GCP appliance path (always deletes the instance)."`
+}
+
+// Lifetime action names. Single source of truth for the OnExpiry
+// enum and the reaper's dispatch switch.
+const (
+	OnExpiryStop     = "stop"
+	OnExpiryPause    = "pause"
+	OnExpiryTeardown = "teardown"
+)
+
+// AllOnExpiry is the canonical OnExpiry value list, used by
+// validation error messages.
+var AllOnExpiry = []string{OnExpiryStop, OnExpiryPause, OnExpiryTeardown}
+
+// LifetimePolicy returns the configured lifetime. Promoted to every
+// provider config via CommonConfig embedding, so a caller holding an
+// `any` from LoadProvision can read the budget without switching on
+// the concrete provider type.
+func (c CommonConfig) LifetimePolicy() LifetimeConfig { return c.Lifetime }
+
+// Enabled reports whether a lifetime budget is configured.
+func (l LifetimeConfig) Enabled() bool {
+	return l.MaxRun != "" && l.MaxRun != "0"
+}
+
+// MaxRunDuration parses MaxRun. Returns (0, nil) when disabled so
+// callers can treat "no lifetime" and "zero budget" uniformly; a
+// non-nil error means MaxRun is set but unparseable, which Validate
+// rejects up front.
+func (l LifetimeConfig) MaxRunDuration() (time.Duration, error) {
+	if !l.Enabled() {
+		return 0, nil
+	}
+	d, err := time.ParseDuration(l.MaxRun)
+	if err != nil {
+		return 0, errInvalid("lifetime.maxRun %q is not a valid Go duration (e.g. 8h, 90m): %w", l.MaxRun, err)
+	}
+	return d, nil
 }
 
 // StorageConfig controls the local-path-provisioner install that
@@ -328,9 +401,34 @@ func (c *CommonConfig) validateCommon(expected string) error {
 	if c.K3s.Version == "" {
 		return errInvalid("k3s.version is empty; check pkg/provision/config/k3s.yaml")
 	}
+	if err := c.Lifetime.validate(); err != nil {
+		return err
+	}
 	return nil
 }
 
+// validate enforces the lifetime invariants. A disabled lifetime
+// (empty MaxRun) is always valid. When set, MaxRun must parse to a
+// strictly positive duration and OnExpiry must name a known action.
+func (l LifetimeConfig) validate() error {
+	if !l.Enabled() {
+		return nil
+	}
+	d, err := l.MaxRunDuration()
+	if err != nil {
+		return err
+	}
+	if d <= 0 {
+		return errInvalid("lifetime.maxRun must be positive, got %q", l.MaxRun)
+	}
+	switch l.OnExpiry {
+	case "", OnExpiryStop, OnExpiryPause, OnExpiryTeardown:
+		return nil
+	default:
+		return errInvalid("lifetime.onExpiry must be one of %v, got %q", AllOnExpiry, l.OnExpiry)
+	}
+}
+
 // requireHostAPIPort enforces the guest:6443 PortForwards invariant
 // for host-tunneled providers. qemu and docker call this from their
 // own Validate; multipass does not because the host dials the VM IP
diff --git a/pkg/provision/config/lifetime_test.go b/pkg/provision/config/lifetime_test.go
new file mode 100644
index 0000000..b574b24
--- /dev/null
+++ b/pkg/provision/config/lifetime_test.go
@@ -0,0 +1,113 @@
+package config
+
+import (
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestLifetime_DisabledByDefault(t *testing.T) {
+	c := &QEMUConfig{CommonConfig: CommonConfig{Provider: ProviderQEMU}}
+	c.ApplyDefaults()
+	if c.Lifetime.Enabled() {
+		t.Fatalf("lifetime should be disabled when maxRun is unset, got %+v", c.Lifetime)
+	}
+	if err := c.Validate(); err != nil {
+		t.Fatalf("disabled lifetime must validate, got %v", err)
+	}
+}
+
+func TestLifetime_DefaultsOnExpiryStop(t *testing.T) {
+	c := &QEMUConfig{CommonConfig: CommonConfig{
+		Provider: ProviderQEMU,
+		Lifetime: LifetimeConfig{MaxRun: "8h"},
+	}}
+	c.ApplyDefaults()
+	if !c.Lifetime.Enabled() {
+		t.Fatal("lifetime should be enabled when maxRun set")
+	}
+	if c.Lifetime.OnExpiry != OnExpiryStop {
+		t.Fatalf("OnExpiry default: got %q want %q", c.Lifetime.OnExpiry, OnExpiryStop)
+	}
+	if err := c.Validate(); err != nil {
+		t.Fatalf("valid lifetime rejected: %v", err)
+	}
+}
+
+func TestLifetime_RespectsExplicitOnExpiry(t *testing.T) {
+	c := &QEMUConfig{CommonConfig: CommonConfig{
+		Provider: ProviderQEMU,
+		Lifetime: LifetimeConfig{MaxRun: "1h", OnExpiry: OnExpiryTeardown},
+	}}
+	c.ApplyDefaults()
+	if c.Lifetime.OnExpiry != OnExpiryTeardown {
+		t.Fatalf("explicit OnExpiry overridden: %q", c.Lifetime.OnExpiry)
+	}
+	if err := c.Validate(); err != nil {
+		t.Fatalf("valid lifetime rejected: %v", err)
+	}
+}
+
+func TestLifetime_MaxRunDuration(t *testing.T) {
+	tests := []struct {
+		in   string
+		want time.Duration
+		ok   bool
+	}{
+		{"", 0, true},
+		{"0", 0, true},
+		{"8h", 8 * time.Hour, true},
+		{"90m", 90 * time.Minute, true},
+		{"banana", 0, false},
+	}
+	for _, tt := range tests {
+		d, err := LifetimeConfig{MaxRun: tt.in}.MaxRunDuration()
+		if tt.ok && err != nil {
+			t.Errorf("MaxRunDuration(%q): unexpected error %v", tt.in, err)
+			continue
+		}
+		if !tt.ok && err == nil {
+			t.Errorf("MaxRunDuration(%q): expected error, got nil", tt.in)
+			continue
+		}
+		if tt.ok && d != tt.want {
+			t.Errorf("MaxRunDuration(%q) = %v, want %v", tt.in, d, tt.want)
+		}
+	}
+}
+
+func TestLifetime_Validate_BadDuration(t *testing.T) {
+	c := &QEMUConfig{CommonConfig: CommonConfig{
+		Provider: ProviderQEMU,
+		Lifetime: LifetimeConfig{MaxRun: "lol"},
+	}}
+	c.ApplyDefaults()
+	err := c.Validate()
+	if err == nil || !strings.Contains(err.Error(), "maxRun") {
+		t.Fatalf("want maxRun parse error, got %v", err)
+	}
+}
+
+func TestLifetime_Validate_NegativeDuration(t *testing.T) {
+	c := &QEMUConfig{CommonConfig: CommonConfig{
+		Provider: ProviderQEMU,
+		Lifetime: LifetimeConfig{MaxRun: "-5m"},
+	}}
+	c.ApplyDefaults()
+	err := c.Validate()
+	if err == nil || !strings.Contains(err.Error(), "positive") {
+		t.Fatalf("want positive-duration error, got %v", err)
+	}
+}
+
+func TestLifetime_Validate_BadOnExpiry(t *testing.T) {
+	c := &QEMUConfig{CommonConfig: CommonConfig{
+		Provider: ProviderQEMU,
+		Lifetime: LifetimeConfig{MaxRun: "8h", OnExpiry: "explode"},
+	}}
+	c.ApplyDefaults()
+	err := c.Validate()
+	if err == nil || !strings.Contains(err.Error(), "onExpiry") {
+		t.Fatalf("want onExpiry enum error, got %v", err)
+	}
+}
diff --git a/pkg/provision/qemu/data_disk.go b/pkg/provision/qemu/data_disk.go
index c67e096..10e31d7 100644
--- a/pkg/provision/qemu/data_disk.go
+++ b/pkg/provision/qemu/data_disk.go
@@ -90,5 +90,10 @@ func checkDataDiskTools(path string) error {
 			"DataDisk %s does not exist and virt-format is not on PATH; "+
 				"install libguestfs-tools to let y-cluster create labeled data disks", path)
 	}
+	// virt-format builds a supermin appliance from the host kernel;
+	// bail early with a durable fix if it isn't readable.
+	if err := requireReadableHostKernel(); err != nil {
+		return err
+	}
 	return nil
 }
diff --git a/pkg/provision/qemu/libguestfs.go b/pkg/provision/qemu/libguestfs.go
new file mode 100644
index 0000000..bef2126
--- /dev/null
+++ b/pkg/provision/qemu/libguestfs.go
@@ -0,0 +1,90 @@
+package qemu
+
+import (
+	"fmt"
+	"os"
+	"strings"
+)
+
+// kernelReadableHookName is the /etc/kernel/postinst.d/ filename the
+// remediation suggests. Sorted late (zz-) so it runs after the
+// distro hooks that lay the image down.
+const kernelReadableHookName = "zz-vmlinuz-readable"
+
+// runningKernelRelease returns the running kernel's release string
+// (the `uname -r` value) from /proc, and whether it could be read.
+// Used to locate the host kernel image libguestfs needs.
+func runningKernelRelease() (string, bool) {
+	b, err := os.ReadFile("/proc/sys/kernel/osrelease")
+	if err != nil {
+		return "", false
+	}
+	rel := strings.TrimSpace(string(b))
+	if rel == "" {
+		return "", false
+	}
+	return rel, true
+}
+
+// requireReadableHostKernel verifies the running kernel image is
+// readable by the current process. libguestfs builds a supermin
+// appliance from the host kernel, so virt-customize / virt-sysprep /
+// virt-tar-out / virt-format all fail with the opaque "supermin
+// exited with error status 1" when /boot/vmlinuz-<release> is not
+// readable. Ubuntu ships those images mode 0600, and a fresh 0600
+// image lands on every kernel upgrade -- which is why per-version
+// chmod / dpkg-statoverride does not hold. The error surfaces a
+// durable, copy-pasteable fix (a kernel postinst.d hook) so a
+// downstream user fixes it once instead of rediscovering a
+// workaround after every upgrade.
+//
+// Returns nil when the image is readable, when its path can't be
+// determined, when it isn't found (we can't assert it's the
+// blocker), or on a non-permission error -- in those cases we let
+// libguestfs run and surface its own diagnostics rather than block
+// on a false positive.
+//
+// A var (not a plain func) so a test can force it to fail and assert
+// that a call site checks it only after its cheap correctness
+// preconditions -- otherwise an unreadable kernel on the build host
+// masks the actionable error (it did, on the CI runner).
+var requireReadableHostKernel = func() error {
+	rel, ok := runningKernelRelease()
+	if !ok {
+		return nil
+	}
+	return checkKernelReadable("/boot/vmlinuz-" + rel)
+}
+
+// checkKernelReadable is the path-parameterized core of
+// requireReadableHostKernel, split out so it can be tested against a
+// temp file without depending on the host's real /boot.
+func checkKernelReadable(path string) error {
+	f, err := os.Open(path)
+	if err == nil {
+		_ = f.Close()
+		return nil
+	}
+	if !os.IsPermission(err) {
+		return nil
+	}
+	return fmt.Errorf(`host kernel %s is not readable by this user, so libguestfs
+(virt-customize / virt-sysprep / virt-tar-out / virt-format) will fail
+building its supermin appliance with "supermin exited with error status 1".
+
+Ubuntu ships /boot/vmlinuz-* mode 0600, and a fresh 0600 image lands on
+every kernel upgrade -- a one-off chmod or a per-version dpkg-statoverride
+does not survive that. Install a kernel hook once so current and future
+kernels stay readable (this makes vmlinuz world-readable):
+
+  sudo tee /etc/kernel/postinst.d/%s >/dev/null <<'HOOK'
+#!/bin/sh
+# Keep installed kernels readable for libguestfs/supermin.
+v="$1"; [ -n "$v" ] && [ -e "/boot/vmlinuz-$v" ] && chmod 0644 "/boot/vmlinuz-$v"
+HOOK
+  sudo chmod 0755 /etc/kernel/postinst.d/%s
+  sudo chmod 0644 /boot/vmlinuz-*
+
+The hook re-applies on every future kernel; the chmod fixes the ones
+already installed`, path, kernelReadableHookName, kernelReadableHookName)
+}
diff --git a/pkg/provision/qemu/libguestfs_test.go b/pkg/provision/qemu/libguestfs_test.go
new file mode 100644
index 0000000..772f261
--- /dev/null
+++ b/pkg/provision/qemu/libguestfs_test.go
@@ -0,0 +1,63 @@
+package qemu
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestCheckKernelReadable_Readable(t *testing.T) {
+	dir := t.TempDir()
+	p := filepath.Join(dir, "vmlinuz-test")
+	if err := os.WriteFile(p, []byte("kernel"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	if err := checkKernelReadable(p); err != nil {
+		t.Fatalf("readable kernel should pass, got: %v", err)
+	}
+}
+
+func TestCheckKernelReadable_Missing(t *testing.T) {
+	// A missing image is not treated as the blocker -- we let
+	// libguestfs surface its own error rather than false-positive.
+	if err := checkKernelReadable(filepath.Join(t.TempDir(), "nope")); err != nil {
+		t.Fatalf("missing kernel should not block, got: %v", err)
+	}
+}
+
+func TestCheckKernelReadable_PermissionDenied(t *testing.T) {
+	if os.Geteuid() == 0 {
+		t.Skip("root bypasses DAC; permission-denied path is unobservable as root")
+	}
+	dir := t.TempDir()
+	p := filepath.Join(dir, "vmlinuz-locked")
+	if err := os.WriteFile(p, []byte("kernel"), 0o600); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Chmod(p, 0o000); err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(p, 0o644) })
+
+	err := checkKernelReadable(p)
+	if err == nil {
+		t.Fatal("unreadable kernel should return an actionable error")
+	}
+	msg := err.Error()
+	// The remediation must point at the durable postinst.d hook.
+	for _, want := range []string{p, "/etc/kernel/postinst.d/", "supermin", "chmod 0644 /boot/vmlinuz-*"} {
+		if !strings.Contains(msg, want) {
+			t.Errorf("error message missing %q; got:\n%s", want, msg)
+		}
+	}
+}
+
+func TestRunningKernelRelease(t *testing.T) {
+	// On the Linux CI/dev host this should resolve; if /proc is
+	// unavailable the function returns ok=false and callers no-op.
+	rel, ok := runningKernelRelease()
+	if ok && strings.TrimSpace(rel) == "" {
+		t.Fatal("ok=true but release is empty")
+	}
+}
diff --git a/pkg/provision/qemu/lifecycle.go b/pkg/provision/qemu/lifecycle.go
index 6a721bf..fdc469e 100644
--- a/pkg/provision/qemu/lifecycle.go
+++ b/pkg/provision/qemu/lifecycle.go
@@ -173,6 +173,15 @@ func Start(ctx context.Context, cacheDir, name string, logger *zap.Logger) (*Clu
 	if err := c.Kubeconfig.Import(rawKubeconfig); err != nil {
 		return nil, fmt.Errorf("merge kubeconfig: %w", err)
 	}
+	// Re-anchor the auto-expiry deadline to this start. A
+	// stopped-then-started cluster gets a fresh budget; this is the
+	// "count from when the cluster starts" guarantee (no-op when no
+	// lifetime is configured).
+	if deadline, err := armLifetime(cacheDir, name); err != nil {
+		logger.Warn("could not re-arm lifetime deadline on start", zap.Error(err))
+	} else if !deadline.IsZero() {
+		logger.Info("lifetime armed", zap.Time("expiresAt", deadline))
+	}
 	logger.Info("k3s ready", zap.String("context", c.cfg.Context))
 	return c, nil
 }
diff --git a/pkg/provision/qemu/lifetime.go b/pkg/provision/qemu/lifetime.go
new file mode 100644
index 0000000..ba29989
--- /dev/null
+++ b/pkg/provision/qemu/lifetime.go
@@ -0,0 +1,51 @@
+package qemu
+
+import (
+	"fmt"
+	"time"
+
+	"go.uber.org/zap"
+)
+
+// This file is the exported auto-expiry surface the cmd layer's
+// `y-cluster lifetime` verb drives. The deadline math and sidecar
+// persistence live in state.go (unexported); these wrappers keep the
+// command package free of the sidecar's internals.
+
+// LoadLifetime returns the persisted auto-expiry state for the named
+// cluster (policy + absolute deadline). A missing sidecar surfaces
+// as the underlying os error so the caller can render a clear
+// "qemu-only / not provisioned" message.
+func LoadLifetime(cacheDir, name string) (LifetimeState, error) {
+	return loadLifetime(cacheDir, name)
+}
+
+// ExtendDeadline pushes the persisted deadline out by d and returns
+// the new deadline. It errors when no deadline is armed, since
+// "extend" only makes sense against an existing budget.
+func ExtendDeadline(cacheDir, name string, d time.Duration) (time.Time, error) {
+	ls, err := loadLifetime(cacheDir, name)
+	if err != nil {
+		return time.Time{}, err
+	}
+	if ls.ExpiresAt.IsZero() {
+		return time.Time{}, fmt.Errorf("no lifetime deadline armed for %q; nothing to extend", name)
+	}
+	nt := ls.ExpiresAt.Add(d)
+	if err := setExpiresAt(cacheDir, name, nt); err != nil {
+		return time.Time{}, err
+	}
+	return nt, nil
+}
+
+// TeardownByName tears down a cluster identified by its cache sidecar
+// rather than a freshly loaded config dir. Used by the `onExpiry:
+// teardown` reap action, which only knows the cluster name + cache
+// dir. keepDisk is forwarded to the provider teardown.
+func TeardownByName(cacheDir, name string, keepDisk bool, logger *zap.Logger) error {
+	cfg, err := loadState(cacheDir, name)
+	if err != nil {
+		return fmt.Errorf("load state for teardown of %q: %w", name, err)
+	}
+	return TeardownConfig(cfg, keepDisk, logger)
+}
diff --git a/pkg/provision/qemu/lifetime_state_test.go b/pkg/provision/qemu/lifetime_state_test.go
new file mode 100644
index 0000000..ba476ff
--- /dev/null
+++ b/pkg/provision/qemu/lifetime_state_test.go
@@ -0,0 +1,179 @@
+package qemu
+
+import (
+	"testing"
+	"time"
+)
+
+// pinClock pins nowFunc for the duration of a test and restores it.
+func pinClock(t *testing.T, at time.Time) {
+	t.Helper()
+	prev := nowFunc
+	nowFunc = func() time.Time { return at }
+	t.Cleanup(func() { nowFunc = prev })
+}
+
+func TestArmLifetime_Disabled(t *testing.T) {
+	dir := t.TempDir()
+	cfg := Config{Name: "c", CacheDir: dir} // no Lifetime
+	if err := saveState(cfg); err != nil {
+		t.Fatal(err)
+	}
+	deadline, err := armLifetime(dir, "c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !deadline.IsZero() {
+		t.Fatalf("disabled lifetime should arm no deadline, got %v", deadline)
+	}
+	ls, err := loadLifetime(dir, "c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if ls.Enabled() || !ls.ExpiresAt.IsZero() {
+		t.Fatalf("expected no lifetime state, got %+v", ls)
+	}
+}
+
+func TestArmLifetime_AnchorsToNow(t *testing.T) {
+	dir := t.TempDir()
+	base := time.Date(2026, 6, 21, 12, 0, 0, 0, time.UTC)
+	pinClock(t, base)
+
+	cfg := Config{Name: "c", CacheDir: dir, Lifetime: "8h", OnExpiry: "stop"}
+	if err := saveState(cfg); err != nil {
+		t.Fatal(err)
+	}
+	deadline, err := armLifetime(dir, "c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	want := base.Add(8 * time.Hour)
+	if !deadline.Equal(want) {
+		t.Fatalf("deadline = %v, want %v", deadline, want)
+	}
+
+	ls, err := loadLifetime(dir, "c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if ls.MaxRun != "8h" || ls.OnExpiry != "stop" {
+		t.Fatalf("policy not persisted: %+v", ls)
+	}
+	if !ls.ExpiresAt.Equal(want) {
+		t.Fatalf("persisted ExpiresAt = %v, want %v", ls.ExpiresAt, want)
+	}
+}
+
+func TestArmLifetime_ReanchorsOnReArm(t *testing.T) {
+	dir := t.TempDir()
+	cfg := Config{Name: "c", CacheDir: dir, Lifetime: "2h", OnExpiry: "stop"}
+	if err := saveState(cfg); err != nil {
+		t.Fatal(err)
+	}
+
+	t0 := time.Date(2026, 6, 21, 10, 0, 0, 0, time.UTC)
+	pinClock(t, t0)
+	if _, err := armLifetime(dir, "c"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Simulate stop+start three hours later: re-arm gives a fresh
+	// window from the new "now", not from the original provision.
+	t1 := t0.Add(3 * time.Hour)
+	pinClock(t, t1)
+	deadline, err := armLifetime(dir, "c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if want := t1.Add(2 * time.Hour); !deadline.Equal(want) {
+		t.Fatalf("re-armed deadline = %v, want %v", deadline, want)
+	}
+}
+
+func TestLifetimeState_ExpiredAndRemaining(t *testing.T) {
+	dir := t.TempDir()
+	cfg := Config{Name: "c", CacheDir: dir, Lifetime: "1h", OnExpiry: "stop"}
+	if err := saveState(cfg); err != nil {
+		t.Fatal(err)
+	}
+	t0 := time.Date(2026, 6, 21, 9, 0, 0, 0, time.UTC)
+	pinClock(t, t0)
+	if _, err := armLifetime(dir, "c"); err != nil {
+		t.Fatal(err)
+	}
+
+	// 30m in: not expired, ~30m remaining.
+	pinClock(t, t0.Add(30*time.Minute))
+	ls, err := loadLifetime(dir, "c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if ls.Expired() {
+		t.Fatal("should not be expired at 30m of a 1h budget")
+	}
+	if r := ls.Remaining(); r != 30*time.Minute {
+		t.Fatalf("Remaining = %v, want 30m", r)
+	}
+
+	// 90m in: expired, negative remaining.
+	pinClock(t, t0.Add(90*time.Minute))
+	ls, err = loadLifetime(dir, "c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !ls.Expired() {
+		t.Fatal("should be expired at 90m of a 1h budget")
+	}
+	if r := ls.Remaining(); r >= 0 {
+		t.Fatalf("Remaining = %v, want negative", r)
+	}
+}
+
+func TestSetExpiresAt_Extend(t *testing.T) {
+	dir := t.TempDir()
+	cfg := Config{Name: "c", CacheDir: dir, Lifetime: "1h", OnExpiry: "stop"}
+	if err := saveState(cfg); err != nil {
+		t.Fatal(err)
+	}
+	t0 := time.Date(2026, 6, 21, 9, 0, 0, 0, time.UTC)
+	pinClock(t, t0)
+	if _, err := armLifetime(dir, "c"); err != nil {
+		t.Fatal(err)
+	}
+	ls, err := loadLifetime(dir, "c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	extended := ls.ExpiresAt.Add(2 * time.Hour)
+	if err := setExpiresAt(dir, "c", extended); err != nil {
+		t.Fatal(err)
+	}
+	ls, err = loadLifetime(dir, "c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !ls.ExpiresAt.Equal(extended) {
+		t.Fatalf("extended ExpiresAt = %v, want %v", ls.ExpiresAt, extended)
+	}
+	// Extend must preserve the policy.
+	if ls.MaxRun != "1h" || ls.OnExpiry != "stop" {
+		t.Fatalf("extend clobbered policy: %+v", ls)
+	}
+}
+
+// Old sidecars (no lifetime fields) decode to "no lifetime".
+func TestLoadLifetime_LegacySidecar(t *testing.T) {
+	dir := t.TempDir()
+	cfg := Config{Name: "c", CacheDir: dir} // no lifetime at all
+	if err := saveState(cfg); err != nil {
+		t.Fatal(err)
+	}
+	ls, err := loadLifetime(dir, "c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if ls.Enabled() || !ls.ExpiresAt.IsZero() {
+		t.Fatalf("legacy sidecar should yield empty lifetime, got %+v", ls)
+	}
+}
diff --git a/pkg/provision/qemu/prepare_export.go b/pkg/provision/qemu/prepare_export.go
index 6d57ac9..a375c3f 100644
--- a/pkg/provision/qemu/prepare_export.go
+++ b/pkg/provision/qemu/prepare_export.go
@@ -97,6 +97,15 @@ func PrepareExport(ctx context.Context, cacheDir, name string, logger *zap.Logge
 		return fmt.Errorf("disk image not found at %s: %w", diskPath, err)
 	}
 
+	// The offline phase runs virt-customize + virt-tar-out, which
+	// build a supermin appliance from the host kernel. Gate that here,
+	// AFTER the cheap correctness preconditions (so a missing/stopped
+	// cluster reports its own actionable error) but BEFORE the live
+	// phase mutates anything, so an unreadable kernel fails clean.
+	if err := requireReadableHostKernel(); err != nil {
+		return err
+	}
+
 	// --- LIVE phase ---
 	// Clear the per-deploy dns-hint-ip annotation so the snapshot
 	// doesn't ship our LB IP. Then dump reconciled gateway state
diff --git a/pkg/provision/qemu/prepare_export_test.go b/pkg/provision/qemu/prepare_export_test.go
index 8b987e4..f59b4ad 100644
--- a/pkg/provision/qemu/prepare_export_test.go
+++ b/pkg/provision/qemu/prepare_export_test.go
@@ -2,6 +2,7 @@ package qemu
 
 import (
 	"context"
+	"errors"
 	"os"
 	"path/filepath"
 	"runtime"
@@ -310,3 +311,29 @@ func TestPrepareExport_MissingVirtCustomize(t *testing.T) {
 		t.Errorf("error should hint at apt install libguestfs-tools: %v", err)
 	}
 }
+
+// TestPrepareExport_PreconditionsBeforeKernelCheck guards the
+// ordering that broke CI: an unreadable host kernel (libguestfs
+// capability) must not mask a cheap correctness precondition like
+// "no saved state". On the CI runner the kernel image was 0600, so
+// the kernel check -- placed too early -- returned its own error
+// instead of the actionable "run provision" hint. Force the kernel
+// check to fail regardless of host state and assert the precondition
+// still wins.
+func TestPrepareExport_PreconditionsBeforeKernelCheck(t *testing.T) {
+	stubPrepareExportTools(t)
+	orig := requireReadableHostKernel
+	requireReadableHostKernel = func() error { return errors.New("host kernel unreadable (forced)") }
+	t.Cleanup(func() { requireReadableHostKernel = orig })
+
+	err := PrepareExport(context.Background(), t.TempDir(), "missing", nil)
+	if err == nil {
+		t.Fatal("expected error when no saved state exists")
+	}
+	if !strings.Contains(err.Error(), "y-cluster provision") {
+		t.Errorf("no-saved-state precondition must win over the kernel check: %v", err)
+	}
+	if strings.Contains(err.Error(), "unreadable") {
+		t.Errorf("kernel check fired before the precondition: %v", err)
+	}
+}
diff --git a/pkg/provision/qemu/qemu.go b/pkg/provision/qemu/qemu.go
index bfb58c5..c9eb93b 100644
--- a/pkg/provision/qemu/qemu.go
+++ b/pkg/provision/qemu/qemu.go
@@ -66,6 +66,13 @@ type Config struct {
 	// Provision creates the file if missing; Teardown leaves it.
 	DataDisk     string
 	DataDiskSize string
+
+	// Lifetime/OnExpiry are the cost-control auto-expiry policy
+	// (Go duration string + local action). Persisted to the state
+	// sidecar; the absolute deadline is armed separately so it
+	// anchors to start, not provision. Empty Lifetime disables.
+	Lifetime string
+	OnExpiry string
 }
 
 // K3s carries the runtime view of K3sConfig: which version to
@@ -178,6 +185,8 @@ func FromConfig(c *config.QEMUConfig) Config {
 		Storage:      c.Storage,
 		DataDisk:     dataDisk,
 		DataDiskSize: dataDiskSize,
+		Lifetime:     c.Lifetime.MaxRun,
+		OnExpiry:     c.Lifetime.OnExpiry,
 	}
 }
 
@@ -349,6 +358,17 @@ func Provision(ctx context.Context, cfg Config, logger *zap.Logger) (*Cluster, e
 		logger.Warn("could not save state sidecar (start will not work without it)", zap.Error(err))
 	}
 
+	// Arm the auto-expiry deadline (no-op when no lifetime budget).
+	// Anchored to now: the clock starts when the cluster comes up,
+	// which for provision is also "now" but for the appliance start
+	// path is the meaningful anchor (the disk may have been built
+	// long before this boot).
+	if deadline, err := armLifetime(cfg.CacheDir, cfg.Name); err != nil {
+		logger.Warn("could not arm lifetime deadline", zap.Error(err))
+	} else if !deadline.IsZero() {
+		logger.Info("lifetime armed", zap.Time("expiresAt", deadline), zap.String("onExpiry", cfg.OnExpiry))
+	}
+
 	// Wait for SSH
 	if err := c.waitForSSH(ctx); err != nil {
 		return nil, err
@@ -705,11 +725,11 @@ func (c *Cluster) DiskPath() string {
 // inputPath:
 //
 //   - .vmdk  -> qemu-img convert -f vmdk  -O qcow2 (the original
-//              VMware-import path; vmdk subformat doesn't matter
-//              because qemu-img -f vmdk auto-detects the variant).
+//     VMware-import path; vmdk subformat doesn't matter
+//     because qemu-img -f vmdk auto-detects the variant).
 //   - .qcow2 -> qemu-img convert -f qcow2 -O qcow2 (rewrites the
-//              qcow2 into the cache layout; usually a quick
-//              copy + compaction, no format change).
+//     qcow2 into the cache layout; usually a quick
+//     copy + compaction, no format change).
 //
 // A local-qemu e2e loop that does `y-cluster export
 // --format=qcow2 ... | y-cluster import` doesn't need any
diff --git a/pkg/provision/qemu/state.go b/pkg/provision/qemu/state.go
index 784c166..4303b08 100644
--- a/pkg/provision/qemu/state.go
+++ b/pkg/provision/qemu/state.go
@@ -5,8 +5,13 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"time"
 )
 
+// nowFunc is the wall clock used for lifetime deadline math. A
+// package var so tests can pin it; production uses time.Now.
+var nowFunc = time.Now
+
 // stateVersion guards forward-compat: a newer y-cluster reading an
 // older sidecar bails out with a clear error rather than guessing.
 // Bump when the schema changes incompatibly.
@@ -33,6 +38,19 @@ type savedState struct {
 	Context      string        `json:"context"`
 	CacheDir     string        `json:"cacheDir"`
 	K3s          K3s           `json:"k3s"`
+
+	// Lifetime fields are additive (added after stateVersion 1)
+	// and all omitempty, so an old sidecar without them decodes
+	// cleanly to "no lifetime" and an old binary ignores them.
+	// They are NOT a reason to bump stateVersion.
+	//
+	// Lifetime/OnExpiry are the policy copied from config at
+	// provision; ExpiresAt is the absolute deadline, anchored to
+	// the most recent provision/start (not to provision alone --
+	// an appliance disk may boot long after it was built).
+	Lifetime  string `json:"lifetime,omitempty"`
+	OnExpiry  string `json:"onExpiry,omitempty"`
+	ExpiresAt string `json:"expiresAt,omitempty"` // RFC3339; empty = no deadline
 }
 
 // statePath returns the sidecar path for (cacheDir, name).
@@ -41,8 +59,10 @@ func statePath(cacheDir, name string) string {
 }
 
 // saveState writes the launch-relevant subset of cfg to the
-// sidecar. Atomic via a .tmp+rename so a crash mid-write doesn't
-// leave a half-written file Start would later fail to parse.
+// sidecar, including the lifetime policy (Lifetime/OnExpiry). The
+// ExpiresAt deadline is NOT set here -- it is armed separately by
+// armLifetime so the deadline anchors to start, and so `extend` can
+// move it without rewriting launch state. Atomic via .tmp+rename.
 func saveState(cfg Config) error {
 	s := savedState{
 		Version:      stateVersion,
@@ -55,12 +75,21 @@ func saveState(cfg Config) error {
 		Context:      cfg.Context,
 		CacheDir:     cfg.CacheDir,
 		K3s:          cfg.K3s,
+		Lifetime:     cfg.Lifetime,
+		OnExpiry:     cfg.OnExpiry,
 	}
+	return writeSidecar(cfg.CacheDir, cfg.Name, s)
+}
+
+// writeSidecar atomically marshals s to <cacheDir>/<name>.json via
+// a .tmp+rename so a crash mid-write doesn't leave a half-written
+// file Start would later fail to parse.
+func writeSidecar(cacheDir, name string, s savedState) error {
 	data, err := json.MarshalIndent(s, "", "  ")
 	if err != nil {
 		return err
 	}
-	path := statePath(cfg.CacheDir, cfg.Name)
+	path := statePath(cacheDir, name)
 	tmp := path + ".tmp"
 	if err := os.WriteFile(tmp, data, 0o644); err != nil {
 		return fmt.Errorf("write %s: %w", tmp, err)
@@ -72,25 +101,35 @@ func saveState(cfg Config) error {
 	return nil
 }
 
-// loadState reads <cacheDir>/<name>.json and rehydrates a runtime
-// Config. Kubeconfig is re-resolved from $KUBECONFIG at call time
-// rather than persisted -- it's an environmental concern that
-// shouldn't bake into the sidecar.
-func loadState(cacheDir, name string) (Config, error) {
+// readSidecar reads and version-checks the raw sidecar. Shared by
+// loadState and the lifetime helpers.
+func readSidecar(cacheDir, name string) (savedState, error) {
 	path := statePath(cacheDir, name)
 	data, err := os.ReadFile(path)
 	if err != nil {
-		return Config{}, err
+		return savedState{}, err
 	}
 	var s savedState
 	if err := json.Unmarshal(data, &s); err != nil {
-		return Config{}, fmt.Errorf("parse %s: %w", path, err)
+		return savedState{}, fmt.Errorf("parse %s: %w", path, err)
 	}
 	if s.Version != stateVersion {
-		return Config{}, fmt.Errorf(
+		return savedState{}, fmt.Errorf(
 			"%s: unsupported state version %d (want %d); re-provision to refresh",
 			path, s.Version, stateVersion)
 	}
+	return s, nil
+}
+
+// loadState reads <cacheDir>/<name>.json and rehydrates a runtime
+// Config. Kubeconfig is re-resolved from $KUBECONFIG at call time
+// rather than persisted -- it's an environmental concern that
+// shouldn't bake into the sidecar.
+func loadState(cacheDir, name string) (Config, error) {
+	s, err := readSidecar(cacheDir, name)
+	if err != nil {
+		return Config{}, err
+	}
 	return Config{
 		Name:         s.Name,
 		DiskSize:     s.DiskSize,
@@ -102,9 +141,91 @@ func loadState(cacheDir, name string) (Config, error) {
 		CacheDir:     s.CacheDir,
 		Kubeconfig:   os.Getenv("KUBECONFIG"),
 		K3s:          s.K3s,
+		Lifetime:     s.Lifetime,
+		OnExpiry:     s.OnExpiry,
 	}, nil
 }
 
+// LifetimeState is the persisted auto-expiry view of a cluster.
+type LifetimeState struct {
+	// MaxRun is the configured budget (Go duration string), empty
+	// when no lifetime is set.
+	MaxRun string
+	// OnExpiry is the local action at the deadline.
+	OnExpiry string
+	// ExpiresAt is the absolute deadline; zero when unset.
+	ExpiresAt time.Time
+}
+
+// Enabled reports whether a budget is configured.
+func (l LifetimeState) Enabled() bool { return l.MaxRun != "" && l.MaxRun != "0" }
+
+// Remaining is the time left until the deadline; negative when past
+// due, zero when no deadline is armed.
+func (l LifetimeState) Remaining() time.Duration {
+	if l.ExpiresAt.IsZero() {
+		return 0
+	}
+	return l.ExpiresAt.Sub(nowFunc())
+}
+
+// Expired reports whether an armed deadline is at or past now.
+func (l LifetimeState) Expired() bool {
+	return !l.ExpiresAt.IsZero() && !nowFunc().Before(l.ExpiresAt)
+}
+
+// loadLifetime returns the persisted lifetime policy + deadline.
+func loadLifetime(cacheDir, name string) (LifetimeState, error) {
+	s, err := readSidecar(cacheDir, name)
+	if err != nil {
+		return LifetimeState{}, err
+	}
+	ls := LifetimeState{MaxRun: s.Lifetime, OnExpiry: s.OnExpiry}
+	if s.ExpiresAt != "" {
+		t, err := time.Parse(time.RFC3339, s.ExpiresAt)
+		if err != nil {
+			return LifetimeState{}, fmt.Errorf("parse expiresAt %q: %w", s.ExpiresAt, err)
+		}
+		ls.ExpiresAt = t
+	}
+	return ls, nil
+}
+
+// armLifetime sets ExpiresAt = now + MaxRun, anchoring the deadline
+// to the current moment (provision or start). It is a no-op that
+// returns a zero deadline when the cluster has no lifetime budget.
+// Returns the new deadline so callers can schedule a host timer.
+func armLifetime(cacheDir, name string) (time.Time, error) {
+	s, err := readSidecar(cacheDir, name)
+	if err != nil {
+		return time.Time{}, err
+	}
+	if s.Lifetime == "" || s.Lifetime == "0" {
+		return time.Time{}, nil
+	}
+	d, err := time.ParseDuration(s.Lifetime)
+	if err != nil {
+		return time.Time{}, fmt.Errorf("parse lifetime %q: %w", s.Lifetime, err)
+	}
+	deadline := nowFunc().Add(d)
+	s.ExpiresAt = deadline.Format(time.RFC3339)
+	if err := writeSidecar(cacheDir, name, s); err != nil {
+		return time.Time{}, err
+	}
+	return deadline, nil
+}
+
+// setExpiresAt persists an explicit deadline, used by `extend` to
+// push the deadline out without re-anchoring to now.
+func setExpiresAt(cacheDir, name string, t time.Time) error {
+	s, err := readSidecar(cacheDir, name)
+	if err != nil {
+		return err
+	}
+	s.ExpiresAt = t.Format(time.RFC3339)
+	return writeSidecar(cacheDir, name, s)
+}
+
 // removeState deletes the sidecar and ignores "not present"
 // errors so teardown is idempotent.
 func removeState(cacheDir, name string) error {
diff --git a/pkg/provision/schema/common.schema.json b/pkg/provision/schema/common.schema.json
index 8b8d0f2..0474a72 100644
--- a/pkg/provision/schema/common.schema.json
+++ b/pkg/provision/schema/common.schema.json
@@ -21,6 +21,10 @@
           "$ref": "#/$defs/K3sConfig",
           "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml."
         },
+        "lifetime": {
+          "$ref": "#/$defs/LifetimeConfig",
+          "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables."
+        },
         "memory": {
           "default": "8192",
           "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.",
@@ -111,6 +115,26 @@
       },
       "type": "object"
     },
+    "LifetimeConfig": {
+      "additionalProperties": false,
+      "properties": {
+        "maxRun": {
+          "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.",
+          "type": "string"
+        },
+        "onExpiry": {
+          "default": "stop",
+          "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).",
+          "enum": [
+            "stop",
+            "pause",
+            "teardown"
+          ],
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
     "PortForward": {
       "additionalProperties": false,
       "properties": {
diff --git a/pkg/provision/schema/docker.schema.json b/pkg/provision/schema/docker.schema.json
index 796b644..ceee6c1 100644
--- a/pkg/provision/schema/docker.schema.json
+++ b/pkg/provision/schema/docker.schema.json
@@ -21,6 +21,10 @@
           "$ref": "#/$defs/K3sConfig",
           "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml."
         },
+        "lifetime": {
+          "$ref": "#/$defs/LifetimeConfig",
+          "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables."
+        },
         "memory": {
           "default": "8192",
           "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.",
@@ -110,6 +114,26 @@
       },
       "type": "object"
     },
+    "LifetimeConfig": {
+      "additionalProperties": false,
+      "properties": {
+        "maxRun": {
+          "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.",
+          "type": "string"
+        },
+        "onExpiry": {
+          "default": "stop",
+          "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).",
+          "enum": [
+            "stop",
+            "pause",
+            "teardown"
+          ],
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
     "PortForward": {
       "additionalProperties": false,
       "properties": {
diff --git a/pkg/provision/schema/multipass.schema.json b/pkg/provision/schema/multipass.schema.json
index df86eba..ee1336b 100644
--- a/pkg/provision/schema/multipass.schema.json
+++ b/pkg/provision/schema/multipass.schema.json
@@ -53,6 +53,26 @@
       },
       "type": "object"
     },
+    "LifetimeConfig": {
+      "additionalProperties": false,
+      "properties": {
+        "maxRun": {
+          "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.",
+          "type": "string"
+        },
+        "onExpiry": {
+          "default": "stop",
+          "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).",
+          "enum": [
+            "stop",
+            "pause",
+            "teardown"
+          ],
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
     "MultipassConfig": {
       "additionalProperties": false,
       "properties": {
@@ -79,6 +99,10 @@
           "$ref": "#/$defs/K3sConfig",
           "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml."
         },
+        "lifetime": {
+          "$ref": "#/$defs/LifetimeConfig",
+          "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables."
+        },
         "memory": {
           "default": "8192",
           "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.",
diff --git a/pkg/provision/schema/qemu.schema.json b/pkg/provision/schema/qemu.schema.json
index 9decf95..f76e77f 100644
--- a/pkg/provision/schema/qemu.schema.json
+++ b/pkg/provision/schema/qemu.schema.json
@@ -53,6 +53,26 @@
       },
       "type": "object"
     },
+    "LifetimeConfig": {
+      "additionalProperties": false,
+      "properties": {
+        "maxRun": {
+          "description": "Wall-clock budget as a Go duration such as 8h or 90m. Counted from cluster start. Empty disables auto-expiry.",
+          "type": "string"
+        },
+        "onExpiry": {
+          "default": "stop",
+          "description": "Local action on expiry. Ignored on the GCP appliance path (always deletes the instance).",
+          "enum": [
+            "stop",
+            "pause",
+            "teardown"
+          ],
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
     "PortForward": {
       "additionalProperties": false,
       "properties": {
@@ -109,6 +129,10 @@
           "$ref": "#/$defs/K3sConfig",
           "description": "k3s install settings. Defaults track pkg/provision/config/k3s.yaml."
         },
+        "lifetime": {
+          "$ref": "#/$defs/LifetimeConfig",
+          "description": "Cost-control auto-expiry. maxRun sets a wall-clock budget counted from when the cluster STARTS (not from provision); on expiry a local cluster runs onExpiry (stop by default) and a GCP appliance is deleted by GCP-native max-run-duration. Empty maxRun disables."
+        },
         "memory": {
           "default": "8192",
           "description": "Memory in MB. qemu allocates this to the VM; docker passes it to --memory.",
diff --git a/scripts/_check-host-kernel.sh b/scripts/_check-host-kernel.sh
new file mode 100644
index 0000000..a9567f8
--- /dev/null
+++ b/scripts/_check-host-kernel.sh
@@ -0,0 +1,37 @@
+# shellcheck shell=bash
+# Sourced by the appliance build / e2e scripts before any libguestfs
+# (virt-sysprep / virt-customize) work, to fail fast with a DURABLE
+# fix when the running kernel image is not readable. libguestfs builds
+# a supermin appliance from the host kernel, and Ubuntu ships
+# /boot/vmlinuz-* mode 0600, so a fresh 0600 image lands on every
+# kernel upgrade. This message is kept in sync with
+# requireReadableHostKernel() in pkg/provision/qemu/libguestfs.go (the
+# binary enforces the same check at its libguestfs call sites).
+__krel="$(uname -r)"
+if ! [ -r "/boot/vmlinuz-$__krel" ]; then
+    {
+        echo "host kernel /boot/vmlinuz-$__krel is not readable by this user, so"
+        echo "libguestfs (virt-customize / virt-sysprep) will fail building its"
+        echo 'supermin appliance with "supermin exited with error status 1".'
+        cat <<'EOM'
+
+Ubuntu ships /boot/vmlinuz-* mode 0600, and a fresh 0600 image lands on
+every kernel upgrade -- a one-off chmod or a per-version dpkg-statoverride
+does not survive that. Install a kernel hook once so current and future
+kernels stay readable (this makes vmlinuz world-readable):
+
+  sudo tee /etc/kernel/postinst.d/zz-vmlinuz-readable >/dev/null <<'HOOK'
+#!/bin/sh
+# Keep installed kernels readable for libguestfs/supermin.
+v="$1"; [ -n "$v" ] && [ -e "/boot/vmlinuz-$v" ] && chmod 0644 "/boot/vmlinuz-$v"
+HOOK
+  sudo chmod 0755 /etc/kernel/postinst.d/zz-vmlinuz-readable
+  sudo chmod 0644 /boot/vmlinuz-*
+
+The hook re-applies on every future kernel; the chmod fixes the ones
+already installed.
+EOM
+    } >&2
+    exit 1
+fi
+unset __krel