diff --git a/api/v1alpha1/seinodetask_types.go b/api/v1alpha1/seinodetask_types.go index e1b3b04..7b25dc5 100644 --- a/api/v1alpha1/seinodetask_types.go +++ b/api/v1alpha1/seinodetask_types.go @@ -119,6 +119,7 @@ const ( // +kubebuilder:validation:XValidation:rule="self.kind != 'DiscoverPeers' || has(self.discoverPeers)",message="spec.discoverPeers is required when kind=DiscoverPeers" // +kubebuilder:validation:XValidation:rule="self.kind != 'RestartPod' || has(self.restartPod)",message="spec.restartPod is required when kind=RestartPod" // +kubebuilder:validation:XValidation:rule="self.kind != 'RestartPod' || (has(self.restartPod) && size(self.restartPod.podUID) > 0)",message="spec.restartPod.podUID is required when kind=RestartPod" +// +kubebuilder:validation:XValidation:rule="self.kind != 'RestartPod' || self.restartPod.podUID == oldSelf.restartPod.podUID",message="spec.restartPod.podUID is immutable" // +kubebuilder:validation:XValidation:rule="self.kind != 'MarkReady' || has(self.markReady)",message="spec.markReady is required when kind=MarkReady" // +kubebuilder:validation:XValidation:rule="self.kind == oldSelf.kind",message="spec.kind is immutable" type SeiNodeTaskSpec struct { @@ -472,14 +473,6 @@ type SeiNodeTaskExecution struct { // charged against the execution budget. // +optional ExecutionStartedAt *metav1.Time `json:"executionStartedAt,omitempty"` - - // RestartedPodUID is the UID of the pod the kind=RestartPod task deletes, - // copied verbatim from spec.restartPod.podUID at synthesis. Content-addressed - // rather than clock-addressed: the task deletes only this pod and completes - // when an owned Ready pod with a different UID exists, sidestepping the - // same-second creationTimestamp race a time epoch would have. - // +optional - RestartedPodUID string `json:"restartedPodUID,omitempty"` } // SeiNodeTaskOutputs holds typed per-kind results. Exactly one sub-field is diff --git a/config/crd/sei.io_seinodetasks.yaml b/config/crd/sei.io_seinodetasks.yaml index 75b2822..5980e4b 100644 --- a/config/crd/sei.io_seinodetasks.yaml +++ b/config/crd/sei.io_seinodetasks.yaml @@ -371,6 +371,8 @@ spec: - message: spec.restartPod.podUID is required when kind=RestartPod rule: self.kind != 'RestartPod' || (has(self.restartPod) && size(self.restartPod.podUID) > 0) + - message: spec.restartPod.podUID is immutable + rule: self.kind != 'RestartPod' || self.restartPod.podUID == oldSelf.restartPod.podUID - message: spec.markReady is required when kind=MarkReady rule: self.kind != 'MarkReady' || has(self.markReady) - message: spec.kind is immutable @@ -561,14 +563,6 @@ spec: description: ID is the deterministic UUID v5 used as the sidecar task ID. type: string - restartedPodUID: - description: |- - RestartedPodUID is the UID of the pod the kind=RestartPod task deletes, - copied verbatim from spec.restartPod.podUID at synthesis. Content-addressed - rather than clock-addressed: the task deletes only this pod and completes - when an owned Ready pod with a different UID exists, sidestepping the - same-second creationTimestamp race a time epoch would have. - type: string status: description: Status is the lifecycle state of the underlying execution. enum: diff --git a/internal/controller/nodetask/controller.go b/internal/controller/nodetask/controller.go index 6e3dcbd..a0168ce 100644 --- a/internal/controller/nodetask/controller.go +++ b/internal/controller/nodetask/controller.go @@ -146,18 +146,11 @@ func (r *SeiNodeTaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) if _, _, err := taskParamsForKind(cr, nil); err != nil { r.markFailed(cr, now, task.FailureReason(err), err.Error()) } else { - // RestartPod content-addresses the caller-supplied pod UID - // (spec.restartPod.podUID, CEL-required non-empty); copy it verbatim. - var restartedPodUID string - if cr.Spec.Kind == seiv1alpha1.SeiNodeTaskKindRestartPod { - restartedPodUID = cr.Spec.RestartPod.PodUID - } execStart := metav1.NewTime(now) cr.Status.Task = &seiv1alpha1.SeiNodeTaskExecution{ ID: task.DeterministicTaskID(string(cr.UID), string(cr.Spec.Kind), 0), Status: seiv1alpha1.TaskPending, ExecutionStartedAt: &execStart, - RestartedPodUID: restartedPodUID, } r.setPhase(cr, seiv1alpha1.SeiNodeTaskPhaseRunning, now) // Persist synthesis before any side effects — mirrors the diff --git a/internal/controller/nodetask/controller_test.go b/internal/controller/nodetask/controller_test.go index e17846c..ea3e2c9 100644 --- a/internal/controller/nodetask/controller_test.go +++ b/internal/controller/nodetask/controller_test.go @@ -1067,7 +1067,7 @@ func newRestartPodTask() *seiv1alpha1.SeiNodeTask { func TestTaskParamsForKind_RestartPod(t *testing.T) { g := NewWithT(t) cr := newRestartPodTask() - cr.Status.Task = &seiv1alpha1.SeiNodeTaskExecution{RestartedPodUID: "pod-uid-captured"} + cr.Status.Task = &seiv1alpha1.SeiNodeTaskExecution{ID: "task-id"} taskType, raw, err := taskParamsForKind(cr, nil) g.Expect(err).NotTo(HaveOccurred()) g.Expect(taskType).To(Equal(task.TaskTypeRestartPod)) @@ -1076,19 +1076,20 @@ func TestTaskParamsForKind_RestartPod(t *testing.T) { g.Expect(json.Unmarshal(raw, &got)).To(Succeed()) g.Expect(got.NodeName).To(Equal(testNodeName)) g.Expect(got.Namespace).To(Equal(testNS)) - g.Expect(got.RestartedPodUID).To(Equal(types.UID("pod-uid-captured"))) + g.Expect(got.RestartedPodUID).To(Equal(types.UID("pod-uid-old"))) } -// The early-validation path (status.task nil) builds params with an empty -// captured UID — the real value is threaded once status.task is populated. -func TestTaskParamsForKind_RestartPod_NilTaskEmptyUID(t *testing.T) { +// The early-validation path (status.task nil) reads the UID straight from the +// immutable spec.restartPod.podUID — no snapshot dependency, so it yields the +// spec value even before status.task exists. +func TestTaskParamsForKind_RestartPod_NilTaskReadsSpec(t *testing.T) { g := NewWithT(t) cr := newRestartPodTask() _, raw, err := taskParamsForKind(cr, nil) g.Expect(err).NotTo(HaveOccurred()) var got task.RestartPodParams g.Expect(json.Unmarshal(raw, &got)).To(Succeed()) - g.Expect(got.RestartedPodUID).To(BeEmpty()) + g.Expect(got.RestartedPodUID).To(Equal(types.UID("pod-uid-old"))) } func TestReconcile_RestartPod_HappyPath(t *testing.T) { @@ -1104,12 +1105,13 @@ func TestReconcile_RestartPod_HappyPath(t *testing.T) { r, c := newReconciler(t, t0, cr, node, sts, oldPod) - // R1: synthesize task (copies spec.restartPod.podUID to status.task verbatim). + // R1: synthesize task. The restart UID is read from the immutable + // spec.restartPod.podUID each reconcile, not snapshotted to status.task. _, err := r.Reconcile(ctx, req()) g.Expect(err).NotTo(HaveOccurred()) got := getTask(t, ctx, c) g.Expect(got.Status.Phase).To(Equal(seiv1alpha1.SeiNodeTaskPhaseRunning)) - g.Expect(got.Status.Task.RestartedPodUID).To(Equal("pod-uid-old")) + g.Expect(got.Spec.RestartPod.PodUID).To(Equal("pod-uid-old")) // R2: Execute deletes the captured pod; Status sees no pod → still Running. _, err = r.Reconcile(ctx, req()) diff --git a/internal/controller/nodetask/envtest/cel_validation_test.go b/internal/controller/nodetask/envtest/cel_validation_test.go index 8b60816..9f2a93b 100644 --- a/internal/controller/nodetask/envtest/cel_validation_test.go +++ b/internal/controller/nodetask/envtest/cel_validation_test.go @@ -167,3 +167,19 @@ func TestCEL_KindImmutable_DiscoverPeersToRestartPod(t *testing.T) { g.Expect(err).To(HaveOccurred()) g.Expect(err.Error()).To(ContainSubstring("kind is immutable")) } + +// spec.restartPod.podUID is immutable — the task content-addresses the pod to +// delete, so the restart target cannot be re-pointed after creation. +func TestCEL_RestartPodUIDImmutable(t *testing.T) { + g := NewWithT(t) + ns := makeNamespace(t) + snt := baseTask(ns, "poduid-immutable", seiv1alpha1.SeiNodeTaskKindRestartPod) + snt.Spec.RestartPod = &seiv1alpha1.RestartPodPayload{PodUID: "pod-uid-1"} + g.Expect(testCli.Create(testCtx, snt)).To(Succeed()) + + patch := client.MergeFrom(snt.DeepCopy()) + snt.Spec.RestartPod.PodUID = "pod-uid-2" + err := testCli.Patch(testCtx, snt, patch) + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring("podUID is immutable")) +} diff --git a/internal/task/restart_pod.go b/internal/task/restart_pod.go index 5fee6c1..8dd5a50 100644 --- a/internal/task/restart_pod.go +++ b/internal/task/restart_pod.go @@ -18,16 +18,16 @@ const TaskTypeRestartPod = "restart-pod" // RestartPodParams identifies the target SeiNode whose pod is restarted so // seid re-reads config.toml on the next start. // -// RestartedPodUID is the content-addressed restart signal, supplied by the -// caller (spec.restartPod.podUID), copied to status.task at synthesis, and -// threaded through params so it is stable across reconciles. The task deletes -// this pod and completes once a replacement owned pod (a different UID) is -// Ready. Keying on UID rather than a creation-time epoch survives the -// same-second-truncation race: an OnDelete replacement always has a fresh UID. -// CEL requires a non-empty podUID for kind=RestartPod. The caller owns UID -// correctness: the task acts only on a UID that matches a live pod, so a stale -// UID leaves the pod in place and completes as a no-op (the contract is -// documented on the PodUID API field). +// RestartedPodUID is the content-addressed restart signal, read directly from +// the immutable spec.restartPod.podUID each reconcile (not snapshotted to +// status.task), so it is stable across reconciles. The task deletes this pod and +// completes once a replacement owned pod (a different UID) is Ready. Keying on +// UID rather than a creation-time epoch survives the same-second-truncation +// race: an OnDelete replacement always has a fresh UID. CEL requires a non-empty, +// immutable podUID for kind=RestartPod. The caller owns UID correctness: the task +// acts only on a UID that matches a live pod, so a stale UID leaves the pod in +// place and completes as a no-op (the contract is documented on the PodUID API +// field). type RestartPodParams struct { NodeName string `json:"nodeName"` Namespace string `json:"namespace"` diff --git a/internal/task/seinodetask_params.go b/internal/task/seinodetask_params.go index b7fcea8..aa00b38 100644 --- a/internal/task/seinodetask_params.go +++ b/internal/task/seinodetask_params.go @@ -237,21 +237,15 @@ func restartPodParams(cr *seiv1alpha1.SeiNodeTask) (SeiNodeTaskParams, error) { return SeiNodeTaskParams{}, paramsErrorf("spec.restartPod is required for kind=RestartPod") } // The pod UID is caller-supplied verbatim (spec.restartPod.podUID) and CEL - // requires it non-empty for kind=RestartPod; this is the defensive backstop. + // requires it non-empty (and immutable) for kind=RestartPod; this is the + // defensive backstop. if cr.Spec.RestartPod.PodUID == "" { return SeiNodeTaskParams{}, paramsErrorf("spec.restartPod.podUID is required for kind=RestartPod") } - // RestartedPodUID is content-addressed: copied from spec into status.task at - // synthesis, then threaded here. Empty is valid on the early-validation path - // (status.task nil) — the real value is threaded once status.task exists. - var podUID types.UID - if cr.Status.Task != nil { - podUID = types.UID(cr.Status.Task.RestartedPodUID) - } return SeiNodeTaskParams{TaskTypeRestartPod, RestartPodParams{ NodeName: cr.Spec.Target.NodeRef.Name, Namespace: cr.Namespace, - RestartedPodUID: podUID, + RestartedPodUID: types.UID(cr.Spec.RestartPod.PodUID), }}, nil } diff --git a/internal/task/seinodetask_params_test.go b/internal/task/seinodetask_params_test.go index a193a36..1c04467 100644 --- a/internal/task/seinodetask_params_test.go +++ b/internal/task/seinodetask_params_test.go @@ -4,6 +4,8 @@ import ( "errors" "testing" + "k8s.io/apimachinery/pkg/types" + sidecar "github.com/sei-protocol/seictl/sidecar/client" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" @@ -90,6 +92,46 @@ func TestSeiNodeTaskParamsFor_MarkReady_NilPayload_ParamsBuildFailed(t *testing. } } +// restartPodParams reads the restart UID straight from the immutable +// spec.restartPod.podUID — independent of status.task. Both the early-validation +// path (status.task nil) and the post-synthesis path (status.task set) yield the +// spec value. +func TestRestartPodParams_ReadsSpecPodUID(t *testing.T) { + base := func() *seiv1alpha1.SeiNodeTask { + return &seiv1alpha1.SeiNodeTask{ + Spec: seiv1alpha1.SeiNodeTaskSpec{ + Kind: seiv1alpha1.SeiNodeTaskKindRestartPod, + RestartPod: &seiv1alpha1.RestartPodPayload{PodUID: "pod-uid-1"}, + }, + } + } + + cases := map[string]*seiv1alpha1.SeiNodeTask{ + "status.task nil": base(), + "status.task set": func() *seiv1alpha1.SeiNodeTask { + cr := base() + cr.Status.Task = &seiv1alpha1.SeiNodeTaskExecution{ID: "task-id"} + return cr + }(), + } + + for name, cr := range cases { + t.Run(name, func(t *testing.T) { + p, err := SeiNodeTaskParamsFor(cr, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + rp, ok := p.Payload.(RestartPodParams) + if !ok { + t.Fatalf("Payload = %T, want RestartPodParams", p.Payload) + } + if rp.RestartedPodUID != types.UID("pod-uid-1") { + t.Errorf("RestartedPodUID = %q, want %q", rp.RestartedPodUID, "pod-uid-1") + } + }) + } +} + // FailureReason carries the reason intrinsically: a wired-kind param failure // reports ParamsBuildFailed, an unwired kind reports UnsupportedKind. Both are // the public condition enum (CLAUDE.md); the controller maps err→reason with no diff --git a/manifests/sei.io_seinodetasks.yaml b/manifests/sei.io_seinodetasks.yaml index 75b2822..5980e4b 100644 --- a/manifests/sei.io_seinodetasks.yaml +++ b/manifests/sei.io_seinodetasks.yaml @@ -371,6 +371,8 @@ spec: - message: spec.restartPod.podUID is required when kind=RestartPod rule: self.kind != 'RestartPod' || (has(self.restartPod) && size(self.restartPod.podUID) > 0) + - message: spec.restartPod.podUID is immutable + rule: self.kind != 'RestartPod' || self.restartPod.podUID == oldSelf.restartPod.podUID - message: spec.markReady is required when kind=MarkReady rule: self.kind != 'MarkReady' || has(self.markReady) - message: spec.kind is immutable @@ -561,14 +563,6 @@ spec: description: ID is the deterministic UUID v5 used as the sidecar task ID. type: string - restartedPodUID: - description: |- - RestartedPodUID is the UID of the pod the kind=RestartPod task deletes, - copied verbatim from spec.restartPod.podUID at synthesis. Content-addressed - rather than clock-addressed: the task deletes only this pod and completes - when an owned Ready pod with a different UID exists, sidestepping the - same-second creationTimestamp race a time epoch would have. - type: string status: description: Status is the lifecycle state of the underlying execution. enum: