From bbd20be82654dc5d84d4a431a42967522dd474f9 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Sat, 6 Jun 2026 20:06:12 -0700 Subject: [PATCH 1/2] nodetask: add RestartSeid kind (sidecar restart-seid), remove RestartPod MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RestartSeid is the SeiNode-scoped, sidecar-backed successor to RestartPod. It dispatches the seictl restart-seid task (v0.0.56), which restarts the seid process in place — seid re-reads config.toml WITHOUT bouncing the sidecar, so the sidecar's ready flag survives and there is no ~30-40s mark-ready reapproval gap. Empty payload, no caller-supplied pod UID. Poll-to-completion (registry false): the controller polls until restart-seid reports terminal (the sidecar waits for seid's RPC to come back, and fails loud — no SIGKILL — if seid outlives the grace window; that failure surfaces as a Failed SeiNodeTask). Completion means "seid RPC serving again", not caught-up/voting — gate height with a downstream AwaitNodesAtHeight. Named RestartSeid (not RestartNode) to avoid conflating a Kubernetes Node with a SeiNode, and to align the kind with the restart-seid sidecar task. RestartPod is removed entirely (kind, RestartPodPayload/podUID, spec field, the three podUID CEL rules, restartPodParams, restart_pod.go + tests, registry entry). pod_cycle.go stays — replace_pod still uses it. Verified zero live kind:RestartPod CRs on harbor/prod/dev before removal. Requires seictl v0.0.56. Roll out the controller image (built on v0.0.56) before/with the CRD; a new-CRD + old-controller window fails closed (UnsupportedKind), never silently. Co-Authored-By: Claude Opus 4.8 --- api/v1alpha1/seinodetask_types.go | 63 ++-- api/v1alpha1/zz_generated.deepcopy.go | 14 +- config/crd/sei.io_seinodetasks.yaml | 40 +-- go.mod | 2 +- go.sum | 2 + internal/controller/nodetask/controller.go | 11 +- .../controller/nodetask/controller_test.go | 192 +++++------ .../nodetask/envtest/cel_validation_test.go | 77 ++--- internal/task/pod_cycle.go | 2 +- internal/task/pod_cycle_test.go | 107 ++++++- internal/task/restart_pod.go | 154 --------- internal/task/restart_pod_test.go | 302 ------------------ internal/task/seinodetask_params.go | 27 +- internal/task/seinodetask_params_test.go | 78 ++--- internal/task/task.go | 2 +- manifests/sei.io_seinodetasks.yaml | 40 +-- 16 files changed, 325 insertions(+), 788 deletions(-) delete mode 100644 internal/task/restart_pod.go delete mode 100644 internal/task/restart_pod_test.go diff --git a/api/v1alpha1/seinodetask_types.go b/api/v1alpha1/seinodetask_types.go index 7b25dc5..e7e9fa8 100644 --- a/api/v1alpha1/seinodetask_types.go +++ b/api/v1alpha1/seinodetask_types.go @@ -6,7 +6,7 @@ import ( // SeiNodeTaskKind discriminates the SeiNodeTask spec union. Exactly one of // the matching payload sub-structs in SeiNodeTaskSpec must be set. -// +kubebuilder:validation:Enum=GovSoftwareUpgrade;GovVote;AwaitCondition;UpdateNodeImage;AwaitNodesAtHeight;DiscoverPeers;RestartPod;MarkReady +// +kubebuilder:validation:Enum=GovSoftwareUpgrade;GovVote;AwaitCondition;UpdateNodeImage;AwaitNodesAtHeight;DiscoverPeers;RestartSeid;MarkReady type SeiNodeTaskKind string const ( @@ -41,19 +41,22 @@ const ( // SeiNodeTaskKindDiscoverPeers backs the sidecar `discover-peers` task. // Re-resolves the target's spec.peers and writes persistent-peers into the // on-disk config.toml (scalar merge). Disk-only: a running seid reads - // config.toml at startup, so compose with kind=RestartPod to apply. The two - // run independently — a DiscoverPeers success followed by a RestartPod failure + // config.toml at startup, so compose with kind=RestartSeid to apply. The two + // run independently — a DiscoverPeers success followed by a RestartSeid failure // leaves config.toml ahead of the running peer set until the next restart. SeiNodeTaskKindDiscoverPeers SeiNodeTaskKind = "DiscoverPeers" - // SeiNodeTaskKindRestartPod backs the controller-side `restart-pod` task. - // Deletes the target's single pod so the StatefulSet's OnDelete strategy - // recreates it and seid re-reads config.toml. Completes when a distinct new - // pod is Ready. Single-replica stop-then-start gated by the RWO data PVC, so - // double-sign-safe: the new pod cannot bind the PVC until the old terminates. - // The pod to delete is caller-supplied via spec.restartPod.podUID; a UID that - // no longer matches the live pod completes as a no-op (see PodUID). - SeiNodeTaskKindRestartPod SeiNodeTaskKind = "RestartPod" + // SeiNodeTaskKindRestartSeid backs the sidecar `restart-seid` task. Restarts + // seid in place — the sidecar SIGTERMs the co-located seid process and the + // kubelet restarts only that container — so seid re-reads config.toml WITHOUT + // bouncing the sidecar. Because the sidecar process never restarts, its + // in-process readiness flag survives and /v0/healthz stays 200, so there is no + // mark-ready reapproval gap (unlike a full pod restart). Empty payload — the + // target is identified from the SeiNode, no caller-supplied pod UID. + // + // Completion = seid's local RPC serving again, NOT caught-up/voting; gate + // height with a downstream AwaitNodesAtHeight. Supersedes RestartPod. + SeiNodeTaskKindRestartSeid SeiNodeTaskKind = "RestartSeid" // SeiNodeTaskKindMarkReady backs the sidecar `mark-ready` task (fire-and-forget). // Re-marks sidecar readiness so /v0/healthz returns 200, which unblocks the seid @@ -110,16 +113,14 @@ const ( // Field names locked at v1alpha1 — see docs/design/seinode-task-lld.md // (PR sei-protocol/sei-k8s-controller#277). // -// +kubebuilder:validation:XValidation:rule="(has(self.govSoftwareUpgrade) ? 1 : 0) + (has(self.govVote) ? 1 : 0) + (has(self.awaitCondition) ? 1 : 0) + (has(self.updateNodeImage) ? 1 : 0) + (has(self.awaitNodesAtHeight) ? 1 : 0) + (has(self.discoverPeers) ? 1 : 0) + (has(self.restartPod) ? 1 : 0) + (has(self.markReady) ? 1 : 0) == 1",message="exactly one of govSoftwareUpgrade, govVote, awaitCondition, updateNodeImage, awaitNodesAtHeight, discoverPeers, restartPod, or markReady must be set" +// +kubebuilder:validation:XValidation:rule="(has(self.govSoftwareUpgrade) ? 1 : 0) + (has(self.govVote) ? 1 : 0) + (has(self.awaitCondition) ? 1 : 0) + (has(self.updateNodeImage) ? 1 : 0) + (has(self.awaitNodesAtHeight) ? 1 : 0) + (has(self.discoverPeers) ? 1 : 0) + (has(self.restartSeid) ? 1 : 0) + (has(self.markReady) ? 1 : 0) == 1",message="exactly one of govSoftwareUpgrade, govVote, awaitCondition, updateNodeImage, awaitNodesAtHeight, discoverPeers, restartSeid, or markReady must be set" // +kubebuilder:validation:XValidation:rule="self.kind != 'GovSoftwareUpgrade' || has(self.govSoftwareUpgrade)",message="spec.govSoftwareUpgrade is required when kind=GovSoftwareUpgrade" // +kubebuilder:validation:XValidation:rule="self.kind != 'GovVote' || has(self.govVote)",message="spec.govVote is required when kind=GovVote" // +kubebuilder:validation:XValidation:rule="self.kind != 'AwaitCondition' || has(self.awaitCondition)",message="spec.awaitCondition is required when kind=AwaitCondition" // +kubebuilder:validation:XValidation:rule="self.kind != 'UpdateNodeImage' || has(self.updateNodeImage)",message="spec.updateNodeImage is required when kind=UpdateNodeImage" // +kubebuilder:validation:XValidation:rule="self.kind != 'AwaitNodesAtHeight' || has(self.awaitNodesAtHeight)",message="spec.awaitNodesAtHeight is required when kind=AwaitNodesAtHeight" // +kubebuilder:validation:XValidation:rule="self.kind != 'DiscoverPeers' || has(self.discoverPeers)",message="spec.discoverPeers is required when kind=DiscoverPeers" -// +kubebuilder:validation:XValidation:rule="self.kind != 'RestartPod' || has(self.restartPod)",message="spec.restartPod is required when kind=RestartPod" -// +kubebuilder:validation:XValidation:rule="self.kind != 'RestartPod' || (has(self.restartPod) && size(self.restartPod.podUID) > 0)",message="spec.restartPod.podUID is required when kind=RestartPod" -// +kubebuilder:validation:XValidation:rule="self.kind != 'RestartPod' || self.restartPod.podUID == oldSelf.restartPod.podUID",message="spec.restartPod.podUID is immutable" +// +kubebuilder:validation:XValidation:rule="self.kind != 'RestartSeid' || has(self.restartSeid)",message="spec.restartSeid is required when kind=RestartSeid" // +kubebuilder:validation:XValidation:rule="self.kind != 'MarkReady' || has(self.markReady)",message="spec.markReady is required when kind=MarkReady" // +kubebuilder:validation:XValidation:rule="self.kind == oldSelf.kind",message="spec.kind is immutable" type SeiNodeTaskSpec struct { @@ -168,9 +169,9 @@ type SeiNodeTaskSpec struct { // +optional DiscoverPeers *DiscoverPeersPayload `json:"discoverPeers,omitempty"` - // RestartPod is the payload for kind=RestartPod. + // RestartSeid is the payload for kind=RestartSeid. // +optional - RestartPod *RestartPodPayload `json:"restartPod,omitempty"` + RestartSeid *RestartSeidPayload `json:"restartSeid,omitempty"` // MarkReady is the payload for kind=MarkReady. // +optional @@ -367,31 +368,17 @@ type AwaitNodesAtHeightPayload struct { // determined by the target's spec/status, so there is nothing to parameterize. // // Writes config.toml only; the running seid picks up the new peers on its next -// restart. Compose with kind=RestartPod to apply. See the +// restart. Compose with kind=RestartSeid to apply. See the // SeiNodeTaskKindDiscoverPeers doc comment for the sequencing and atomicity // caveats. type DiscoverPeersPayload struct{} -// RestartPodPayload is the payload for kind=RestartPod. The task deletes -// exactly the pod named by PodUID (delete → OnDelete recreate) so seid re-reads -// config.toml on start. See the SeiNodeTaskKindRestartPod doc comment for the -// completion signal and safety properties. -type RestartPodPayload struct { - // PodUID is the UID of the pod to restart, supplied by the caller. Obtain it - // immediately before creating the task; for the single-replica StatefulSet - // the pod is `-0`: - // kubectl get pod -0 -o jsonpath='{.metadata.uid}' - // The task deletes exactly this pod and completes when an owned Ready pod with - // a different UID appears. Content-addressed (UID, not creationTimestamp) so - // the OnDelete replacement is unambiguously distinguished from the original. - // - // The caller owns UID correctness: the controller uses this UID verbatim, so a - // UID that no longer matches the live pod (e.g. the pod was recreated - // out-of-band after it was read) completes immediately as a no-op. Fetch the - // UID as late as possible. - // +kubebuilder:validation:MinLength=1 - PodUID string `json:"podUID"` -} +// RestartSeidPayload is the payload for kind=RestartSeid. It is empty: the +// sidecar restart-seid task (sidecar.RestartSeidTask) takes no inputs — it +// SIGTERMs the co-located seid process so the kubelet restarts that container in +// place and seid re-reads config.toml. See the SeiNodeTaskKindRestartSeid doc +// comment for the completion signal and rationale. +type RestartSeidPayload struct{} // MarkReadyPayload is the payload for kind=MarkReady. It is empty: the sidecar // mark-ready task (sidecar.MarkReadyTask) takes no inputs — it re-marks the diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index f7b50d2..85860d8 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -705,16 +705,16 @@ func (in *ReplayerSpec) DeepCopy() *ReplayerSpec { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RestartPodPayload) DeepCopyInto(out *RestartPodPayload) { +func (in *RestartSeidPayload) DeepCopyInto(out *RestartSeidPayload) { *out = *in } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RestartPodPayload. -func (in *RestartPodPayload) DeepCopy() *RestartPodPayload { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RestartSeidPayload. +func (in *RestartSeidPayload) DeepCopy() *RestartSeidPayload { if in == nil { return nil } - out := new(RestartPodPayload) + out := new(RestartSeidPayload) in.DeepCopyInto(out) return out } @@ -1321,9 +1321,9 @@ func (in *SeiNodeTaskSpec) DeepCopyInto(out *SeiNodeTaskSpec) { *out = new(DiscoverPeersPayload) **out = **in } - if in.RestartPod != nil { - in, out := &in.RestartPod, &out.RestartPod - *out = new(RestartPodPayload) + if in.RestartSeid != nil { + in, out := &in.RestartSeid, &out.RestartSeid + *out = new(RestartSeidPayload) **out = **in } if in.MarkReady != nil { diff --git a/config/crd/sei.io_seinodetasks.yaml b/config/crd/sei.io_seinodetasks.yaml index 5980e4b..53377fa 100644 --- a/config/crd/sei.io_seinodetasks.yaml +++ b/config/crd/sei.io_seinodetasks.yaml @@ -249,33 +249,14 @@ spec: - UpdateNodeImage - AwaitNodesAtHeight - DiscoverPeers - - RestartPod + - RestartSeid - MarkReady type: string markReady: description: MarkReady is the payload for kind=MarkReady. type: object - restartPod: - description: RestartPod is the payload for kind=RestartPod. - properties: - podUID: - description: |- - PodUID is the UID of the pod to restart, supplied by the caller. Obtain it - immediately before creating the task; for the single-replica StatefulSet - the pod is `-0`: - kubectl get pod -0 -o jsonpath='{.metadata.uid}' - The task deletes exactly this pod and completes when an owned Ready pod with - a different UID appears. Content-addressed (UID, not creationTimestamp) so - the OnDelete replacement is unambiguously distinguished from the original. - - The caller owns UID correctness: the controller uses this UID verbatim, so a - UID that no longer matches the live pod (e.g. the pod was recreated - out-of-band after it was read) completes immediately as a no-op. Fetch the - UID as late as possible. - minLength: 1 - type: string - required: - - podUID + restartSeid: + description: RestartSeid is the payload for kind=RestartSeid. type: object target: description: |- @@ -347,13 +328,13 @@ spec: type: object x-kubernetes-validations: - message: exactly one of govSoftwareUpgrade, govVote, awaitCondition, - updateNodeImage, awaitNodesAtHeight, discoverPeers, restartPod, or + updateNodeImage, awaitNodesAtHeight, discoverPeers, restartSeid, or markReady must be set rule: '(has(self.govSoftwareUpgrade) ? 1 : 0) + (has(self.govVote) ? 1 : 0) + (has(self.awaitCondition) ? 1 : 0) + (has(self.updateNodeImage) ? 1 : 0) + (has(self.awaitNodesAtHeight) ? 1 : 0) + (has(self.discoverPeers) - ? 1 : 0) + (has(self.restartPod) ? 1 : 0) + (has(self.markReady) ? - 1 : 0) == 1' + ? 1 : 0) + (has(self.restartSeid) ? 1 : 0) + (has(self.markReady) + ? 1 : 0) == 1' - message: spec.govSoftwareUpgrade is required when kind=GovSoftwareUpgrade rule: self.kind != 'GovSoftwareUpgrade' || has(self.govSoftwareUpgrade) - message: spec.govVote is required when kind=GovVote @@ -366,13 +347,8 @@ spec: rule: self.kind != 'AwaitNodesAtHeight' || has(self.awaitNodesAtHeight) - message: spec.discoverPeers is required when kind=DiscoverPeers rule: self.kind != 'DiscoverPeers' || has(self.discoverPeers) - - message: spec.restartPod is required when kind=RestartPod - rule: self.kind != 'RestartPod' || has(self.restartPod) - - message: spec.restartPod.podUID is required when kind=RestartPod - rule: self.kind != 'RestartPod' || (has(self.restartPod) && size(self.restartPod.podUID) - > 0) - - message: spec.restartPod.podUID is immutable - rule: self.kind != 'RestartPod' || self.restartPod.podUID == oldSelf.restartPod.podUID + - message: spec.restartSeid is required when kind=RestartSeid + rule: self.kind != 'RestartSeid' || has(self.restartSeid) - message: spec.markReady is required when kind=MarkReady rule: self.kind != 'MarkReady' || has(self.markReady) - message: spec.kind is immutable diff --git a/go.mod b/go.mod index a3cfe8c..a535bae 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/google/uuid v1.6.0 github.com/onsi/gomega v1.39.1 github.com/sei-protocol/sei-config v0.0.19 - github.com/sei-protocol/seictl v0.0.55 + github.com/sei-protocol/seictl v0.0.56 github.com/urfave/cli/v3 v3.6.1 go.opentelemetry.io/otel v1.43.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 diff --git a/go.sum b/go.sum index c83af5b..d750601 100644 --- a/go.sum +++ b/go.sum @@ -1786,6 +1786,8 @@ github.com/sei-protocol/seictl v0.0.50 h1:zBOLIPI/G0oPsLV0DLlGnjCgckkyihOZ03llkF github.com/sei-protocol/seictl v0.0.50/go.mod h1:yNPLcFKRTbKvsdKFuQseMHkkXTol7FXidnGKJa/bUXQ= github.com/sei-protocol/seictl v0.0.55 h1:JZ15hoAS7ft3LL85SeYtkP3Gr/oMlEQnBjhefbDdiZ4= github.com/sei-protocol/seictl v0.0.55/go.mod h1:sDWY/llzQPnblG/WS6uQ7vqDtshNQ0WJTJzRUgmfFpg= +github.com/sei-protocol/seictl v0.0.56 h1:zRCZdGiRrvP+zv/DYhmfP570MOR9nO6o9VWncABkJSo= +github.com/sei-protocol/seictl v0.0.56/go.mod h1:sDWY/llzQPnblG/WS6uQ7vqDtshNQ0WJTJzRUgmfFpg= github.com/sei-protocol/seilog v0.0.3 h1:Zi7oWXdX5jv92dY8n482xH032LtNebC89Y+qYZlBn0Y= github.com/sei-protocol/seilog v0.0.3/go.mod h1:CKg58wraWnB3gRxWQ0v1rIVr0gmDHjkfP1bM2giKFFU= github.com/shirou/gopsutil v2.20.5+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= diff --git a/internal/controller/nodetask/controller.go b/internal/controller/nodetask/controller.go index a0168ce..5cc50ec 100644 --- a/internal/controller/nodetask/controller.go +++ b/internal/controller/nodetask/controller.go @@ -56,10 +56,11 @@ const ( sidecarStatusTimeout = 15 * time.Second // Per-kind execution-timeout defaults applied when spec.timeoutSeconds is 0. - // These bound kinds whose completion depends on a pod becoming Ready or a + // These bound kinds whose completion depends on seid coming back up or a // quick disk write; sidecar-backed gov/await kinds stay unbounded (an - // operator sets spec.timeoutSeconds). - defaultRestartPodTimeout = 10 * time.Minute + // operator sets spec.timeoutSeconds). RestartSeid gets a generous 10m: the + // sidecar SIGTERMs seid (up to ~90s graceful), then polls seid's RPC back up. + defaultRestartSeidTimeout = 10 * time.Minute defaultDiscoverPeersTimeout = 2 * time.Minute defaultMarkReadyTimeout = 2 * time.Minute ) @@ -320,8 +321,8 @@ func effectiveTimeout(cr *seiv1alpha1.SeiNodeTask) time.Duration { return time.Duration(cr.Spec.TimeoutSeconds) * time.Second } switch cr.Spec.Kind { - case seiv1alpha1.SeiNodeTaskKindRestartPod: - return defaultRestartPodTimeout + case seiv1alpha1.SeiNodeTaskKindRestartSeid: + return defaultRestartSeidTimeout case seiv1alpha1.SeiNodeTaskKindDiscoverPeers: return defaultDiscoverPeersTimeout case seiv1alpha1.SeiNodeTaskKindMarkReady: diff --git a/internal/controller/nodetask/controller_test.go b/internal/controller/nodetask/controller_test.go index ea3e2c9..b50cf1c 100644 --- a/internal/controller/nodetask/controller_test.go +++ b/internal/controller/nodetask/controller_test.go @@ -10,9 +10,6 @@ import ( "github.com/google/uuid" . "github.com/onsi/gomega" sidecar "github.com/sei-protocol/seictl/sidecar/client" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8sruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -37,7 +34,6 @@ const ( testKeyName = "operator" testFees = "2000usei" testPeerAddr = "abc@10.0.0.1:26656" - testRev = "rev-1" testPeerRegion = "us-east-1" testPeerTagKey = "role" @@ -1047,115 +1043,99 @@ func TestReconcile_DiscoverPeers_PreUpgradeNilExecutionStart_StampsAndTimesOut(t } // --------------------------------------------------------------------------- -// RestartPod +// RestartSeid // --------------------------------------------------------------------------- -func newRestartPodTask() *seiv1alpha1.SeiNodeTask { +func newRestartSeidTask() *seiv1alpha1.SeiNodeTask { return &seiv1alpha1.SeiNodeTask{ ObjectMeta: metav1.ObjectMeta{Name: testTaskName, Namespace: testNS, UID: "task-uid-restart", Generation: 1}, Spec: seiv1alpha1.SeiNodeTaskSpec{ - Kind: seiv1alpha1.SeiNodeTaskKindRestartPod, + Kind: seiv1alpha1.SeiNodeTaskKindRestartSeid, Target: seiv1alpha1.SeiNodeTaskTarget{ NodeRef: seiv1alpha1.SeiNodeTaskNodeRef{Name: testNodeName}, RequirePhase: seiv1alpha1.PhaseRunning, }, - RestartPod: &seiv1alpha1.RestartPodPayload{PodUID: "pod-uid-old"}, + RestartSeid: &seiv1alpha1.RestartSeidPayload{}, }, } } -func TestTaskParamsForKind_RestartPod(t *testing.T) { +func TestTaskParamsForKind_RestartSeid(t *testing.T) { g := NewWithT(t) - cr := newRestartPodTask() - cr.Status.Task = &seiv1alpha1.SeiNodeTaskExecution{ID: "task-id"} + cr := newRestartSeidTask() taskType, raw, err := taskParamsForKind(cr, nil) g.Expect(err).NotTo(HaveOccurred()) - g.Expect(taskType).To(Equal(task.TaskTypeRestartPod)) + g.Expect(taskType).To(Equal(sidecar.TaskTypeRestartSeid)) - var got task.RestartPodParams + var got sidecar.RestartSeidTask g.Expect(json.Unmarshal(raw, &got)).To(Succeed()) - g.Expect(got.NodeName).To(Equal(testNodeName)) - g.Expect(got.Namespace).To(Equal(testNS)) - g.Expect(got.RestartedPodUID).To(Equal(types.UID("pod-uid-old"))) + g.Expect(got).To(Equal(sidecar.RestartSeidTask{})) } -// The early-validation path (status.task nil) reads the UID straight from the -// immutable spec.restartPod.podUID — no snapshot dependency, so it yields the -// spec value even before status.task exists. -func TestTaskParamsForKind_RestartPod_NilTaskReadsSpec(t *testing.T) { - g := NewWithT(t) - cr := newRestartPodTask() - _, raw, err := taskParamsForKind(cr, nil) - g.Expect(err).NotTo(HaveOccurred()) - var got task.RestartPodParams - g.Expect(json.Unmarshal(raw, &got)).To(Succeed()) - g.Expect(got.RestartedPodUID).To(Equal(types.UID("pod-uid-old"))) -} - -func TestReconcile_RestartPod_HappyPath(t *testing.T) { +// RestartSeid is poll-to-completion (registered sidecarTask[...](false)): unlike +// MarkReady's fire-and-forget ack, the controller polls GetTask until the +// restart-seid task reports terminal (seid's RPC back up). Mirrors the +// DiscoverPeers poll shape. +func TestReconcile_RestartSeid_EndToEnd(t *testing.T) { g := NewWithT(t) ctx := context.Background() - // t0 is the restart epoch (status.startedAt). The pre-existing pod is - // created before it; the OnDelete replacement after it. t0 := time.Now() - cr := newRestartPodTask() + cr := newRestartSeidTask() node := newRunningNode() - sts := restartTestSTS() - oldPod := restartTestPod("pod-uid-old", metav1.NewTime(t0.Add(-time.Hour)), true) + fakeSC := newFakeSidecarClient() - r, c := newReconciler(t, t0, cr, node, sts, oldPod) + r, c := newReconcilerWithSidecar(t, t0, fakeSC, cr, node) - // R1: synthesize task. The restart UID is read from the immutable - // spec.restartPod.podUID each reconcile, not snapshotted to status.task. + // R1: synthesize task. _, err := r.Reconcile(ctx, req()) g.Expect(err).NotTo(HaveOccurred()) got := getTask(t, ctx, c) g.Expect(got.Status.Phase).To(Equal(seiv1alpha1.SeiNodeTaskPhaseRunning)) - g.Expect(got.Spec.RestartPod.PodUID).To(Equal("pod-uid-old")) + taskID, perr := uuid.Parse(got.Status.Task.ID) + g.Expect(perr).NotTo(HaveOccurred()) - // R2: Execute deletes the captured pod; Status sees no pod → still Running. + // R2: Execute submits restart-seid to the sidecar; not yet terminal → Running. _, err = r.Reconcile(ctx, req()) g.Expect(err).NotTo(HaveOccurred()) - deleted := &corev1.Pod{} - derr := c.Get(ctx, types.NamespacedName{Name: oldPod.Name, Namespace: testNS}, deleted) - g.Expect(apierrors.IsNotFound(derr)).To(BeTrue()) - got = getTask(t, ctx, c) - g.Expect(got.Status.Phase).To(Equal(seiv1alpha1.SeiNodeTaskPhaseRunning)) - - // Simulate OnDelete recreation with a Ready replacement (different UID). - newPod := restartTestPod("pod-uid-new", metav1.NewTime(t0.Add(time.Minute)), true) - g.Expect(c.Create(ctx, newPod)).To(Succeed()) + fakeSC.mu.Lock() + g.Expect(fakeSC.submitted).To(HaveLen(1)) + g.Expect(fakeSC.submitted[0].Type).To(Equal(sidecar.TaskTypeRestartSeid)) + fakeSC.mu.Unlock() + g.Expect(getTask(t, ctx, c).Status.Phase).To(Equal(seiv1alpha1.SeiNodeTaskPhaseRunning)) + // Poll contract: the controller queried GetTask (not served from a submit-time + // cache like a fire-and-forget task would). + fakeSC.mu.Lock() + g.Expect(fakeSC.getCalls).To(BeNumerically(">", 0)) + fakeSC.mu.Unlock() - // R3: Status sees the fresh Ready pod → Complete. + // Sidecar reports the restart-seid task complete (seid RPC back up). + fakeSC.setResult(taskID, sidecar.Completed, "") _, err = r.Reconcile(ctx, req()) g.Expect(err).NotTo(HaveOccurred()) got = getTask(t, ctx, c) g.Expect(got.Status.Phase).To(Equal(seiv1alpha1.SeiNodeTaskPhaseComplete)) } -// A RestartPod whose pod never becomes Ready must transition to Failed at the -// per-kind default timeout (spec.timeoutSeconds=0), not requeue forever. -func TestReconcile_RestartPod_NeverReady_TimesOut(t *testing.T) { +// A RestartSeid whose sidecar restart-seid never completes must transition to +// Failed at the per-kind default timeout (spec.timeoutSeconds=0), not requeue +// forever. +func TestReconcile_RestartSeid_NeverCompletes_TimesOut(t *testing.T) { g := NewWithT(t) ctx := context.Background() t0 := time.Now() - cr := newRestartPodTask() // timeoutSeconds unset → default applies + cr := newRestartSeidTask() // timeoutSeconds unset → default applies node := newRunningNode() - sts := restartTestSTS() - // Pod present but never Ready; Execute deletes it and the replacement - // (recreated by OnDelete in prod) never appears in this test. - oldPod := restartTestPod("pod-uid-old", metav1.NewTime(t0.Add(-time.Hour)), false) + fakeSC := newFakeSidecarClient() - r, c := newReconciler(t, t0, cr, node, sts, oldPod) + r, c := newReconcilerWithSidecar(t, t0, fakeSC, cr, node) - _, err := r.Reconcile(ctx, req()) // R1 synthesize (stamps startedAt=t0) + _, err := r.Reconcile(ctx, req()) // R1 synthesize (executionStartedAt=t0) g.Expect(err).NotTo(HaveOccurred()) - _, err = r.Reconcile(ctx, req()) // R2 Execute + Status → still Running + _, err = r.Reconcile(ctx, req()) // R2 Execute + Status → Running (never completes) g.Expect(err).NotTo(HaveOccurred()) g.Expect(getTask(t, ctx, c).Status.Phase).To(Equal(seiv1alpha1.SeiNodeTaskPhaseRunning)) - // Advance past the RestartPod default timeout. - r.Now = func() time.Time { return t0.Add(defaultRestartPodTimeout + time.Second) } + r.Now = func() time.Time { return t0.Add(defaultRestartSeidTimeout + time.Second) } _, err = r.Reconcile(ctx, req()) g.Expect(err).NotTo(HaveOccurred()) @@ -1198,51 +1178,6 @@ func TestReconcile_DiscoverPeers_LabelEmptyResolved_Fails(t *testing.T) { g.Expect(failed.Reason).To(Equal("ParamsBuildFailed")) } -func restartTestSTS() *appsv1.StatefulSet { - one := int32(1) - return &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: testNodeName, Namespace: testNS, UID: "sts-uid-restart", Generation: 1, - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: &one, - Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"sei.io/node": testNodeName}}, - }, - Status: appsv1.StatefulSetStatus{ - ObservedGeneration: 1, - CurrentRevision: testRev, - UpdateRevision: testRev, - }, - } -} - -func restartTestPod(uid types.UID, created metav1.Time, ready bool) *corev1.Pod { - controller := true - pod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: testNodeName + "-0", - Namespace: testNS, - UID: uid, - CreationTimestamp: created, - Labels: map[string]string{ - "sei.io/node": testNodeName, - appsv1.ControllerRevisionHashLabelKey: testRev, - }, - OwnerReferences: []metav1.OwnerReference{{ - APIVersion: "apps/v1", - Kind: "StatefulSet", - Name: testNodeName, - UID: "sts-uid-restart", - Controller: &controller, - }}, - }, - } - if ready { - pod.Status.Conditions = []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}} - } - return pod -} - // An unwired kind fails fast at synthesis with reason=UnsupportedKind — the // reason travels with the error (task.FailureReason), distinct from the // ParamsBuildFailed reason used for wired-kind payload errors, with no @@ -1308,3 +1243,44 @@ func TestReconcile_GovVote_SidecarFailure(t *testing.T) { g.Expect(got.Status.Task.Status).To(Equal(seiv1alpha1.TaskFailed)) g.Expect(got.Status.Task.Err).To(ContainSubstring(sidecarErr)) } + +// The headline correctness property of restart-seid: the sidecar task FAILS LOUD +// (never SIGKILLs) when seid does not exit in the grace window, and that failure +// must surface as a Failed SeiNodeTask carrying the sidecar's error — not a +// silent success or an indefinite requeue. Mirrors TestReconcile_GovVote_SidecarFailure. +func TestReconcile_RestartSeid_SidecarFailLoud_Fails(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + t0 := time.Now() + cr := newRestartSeidTask() + node := newRunningNode() + fakeSC := newFakeSidecarClient() + + r, c := newReconcilerWithSidecar(t, t0, fakeSC, cr, node) + + // R1: synthesize. + _, err := r.Reconcile(ctx, req()) + g.Expect(err).NotTo(HaveOccurred()) + got := getTask(t, ctx, c) + taskID, perr := uuid.Parse(got.Status.Task.ID) + g.Expect(perr).NotTo(HaveOccurred()) + + // Stage the fail-loud result BEFORE R2 so the first Status poll after Execute + // sees Failed. + const sidecarErr = "seid pid still alive after SIGTERM; leaving it running" + fakeSC.setResult(taskID, sidecar.Failed, sidecarErr) + + // R2: Execute submits restart-seid; Status polls and sees Failed → CR Failed. + _, err = r.Reconcile(ctx, req()) + g.Expect(err).NotTo(HaveOccurred()) + got = getTask(t, ctx, c) + g.Expect(got.Status.Phase).To(Equal(seiv1alpha1.SeiNodeTaskPhaseFailed)) + g.Expect(got.Status.Task.Status).To(Equal(seiv1alpha1.TaskFailed)) + g.Expect(got.Status.Task.Err).To(ContainSubstring(sidecarErr)) + + failed := findFailedCond(got) + g.Expect(failed).NotTo(BeNil()) + g.Expect(failed.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(failed.Reason).To(Equal("TaskFailed")) + g.Expect(failed.Message).To(ContainSubstring(sidecarErr)) +} diff --git a/internal/controller/nodetask/envtest/cel_validation_test.go b/internal/controller/nodetask/envtest/cel_validation_test.go index 9f2a93b..fc235f3 100644 --- a/internal/controller/nodetask/envtest/cel_validation_test.go +++ b/internal/controller/nodetask/envtest/cel_validation_test.go @@ -44,15 +44,26 @@ func TestCEL_DiscoverPeers_Accepted(t *testing.T) { g.Expect(testCli.Create(testCtx, snt)).To(Succeed()) } -// RestartPod with a payload carrying a non-empty podUID is accepted. -func TestCEL_RestartPod_Accepted(t *testing.T) { +// RestartSeid with its matching empty payload is accepted. +func TestCEL_RestartSeid_Accepted(t *testing.T) { g := NewWithT(t) ns := makeNamespace(t) - snt := baseTask(ns, "restart-ok", seiv1alpha1.SeiNodeTaskKindRestartPod) - snt.Spec.RestartPod = &seiv1alpha1.RestartPodPayload{PodUID: "pod-uid-1"} + snt := baseTask(ns, "restart-ok", seiv1alpha1.SeiNodeTaskKindRestartSeid) + snt.Spec.RestartSeid = &seiv1alpha1.RestartSeidPayload{} g.Expect(testCli.Create(testCtx, snt)).To(Succeed()) } +// The removed RestartPod kind is no longer in the enum, so a CR with +// kind=RestartPod is rejected at the schema layer. +func TestCEL_RestartPod_Removed_Rejected(t *testing.T) { + g := NewWithT(t) + ns := makeNamespace(t) + snt := baseTask(ns, "restartpod-gone", seiv1alpha1.SeiNodeTaskKind("RestartPod")) + err := testCli.Create(testCtx, snt) + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring("Unsupported value")) +} + // MarkReady with its matching empty payload is accepted. func TestCEL_MarkReady_Accepted(t *testing.T) { g := NewWithT(t) @@ -75,14 +86,14 @@ func TestCEL_MarkReady_NoPayload_Rejected(t *testing.T) { )) } -// kind=MarkReady with a second payload (markReady + restartPod) is rejected by +// kind=MarkReady with a second payload (markReady + restartSeid) is rejected by // the exactly-one union rule. func TestCEL_MarkReady_MultiplePayloads_Rejected(t *testing.T) { g := NewWithT(t) ns := makeNamespace(t) snt := baseTask(ns, "markready-two-payloads", seiv1alpha1.SeiNodeTaskKindMarkReady) snt.Spec.MarkReady = &seiv1alpha1.MarkReadyPayload{} - snt.Spec.RestartPod = &seiv1alpha1.RestartPodPayload{PodUID: "pod-uid-1"} + snt.Spec.RestartSeid = &seiv1alpha1.RestartSeidPayload{} err := testCli.Create(testCtx, snt) g.Expect(err).To(HaveOccurred()) g.Expect(err.Error()).To(ContainSubstring("exactly one")) @@ -101,58 +112,46 @@ func TestCEL_DiscoverPeers_NoPayload_Rejected(t *testing.T) { )) } -// kind=RestartPod with NO payload is rejected. -func TestCEL_RestartPod_NoPayload_Rejected(t *testing.T) { - g := NewWithT(t) - ns := makeNamespace(t) - snt := baseTask(ns, "restart-nopayload", seiv1alpha1.SeiNodeTaskKindRestartPod) - err := testCli.Create(testCtx, snt) - g.Expect(err).To(HaveOccurred()) -} - -// kind=RestartPod with a payload but empty podUID is rejected — the controller -// content-addresses the pod to delete, so an empty UID is not a valid restart -// target. -func TestCEL_RestartPod_EmptyPodUID_Rejected(t *testing.T) { +// kind=RestartSeid with NO payload is rejected. +func TestCEL_RestartSeid_NoPayload_Rejected(t *testing.T) { g := NewWithT(t) ns := makeNamespace(t) - snt := baseTask(ns, "restart-emptyuid", seiv1alpha1.SeiNodeTaskKindRestartPod) - snt.Spec.RestartPod = &seiv1alpha1.RestartPodPayload{} + snt := baseTask(ns, "restart-nopayload", seiv1alpha1.SeiNodeTaskKindRestartSeid) err := testCli.Create(testCtx, snt) g.Expect(err).To(HaveOccurred()) g.Expect(err.Error()).To(Or( - ContainSubstring("podUID is required"), - ContainSubstring("should be at least 1 chars long"), + ContainSubstring("exactly one"), + ContainSubstring("restartSeid is required"), )) } -// kind=DiscoverPeers with TWO payloads (discoverPeers + restartPod) is +// kind=DiscoverPeers with TWO payloads (discoverPeers + restartSeid) is // rejected by the exactly-one union rule. func TestCEL_MultiplePayloads_Rejected(t *testing.T) { g := NewWithT(t) ns := makeNamespace(t) snt := baseTask(ns, "two-payloads", seiv1alpha1.SeiNodeTaskKindDiscoverPeers) snt.Spec.DiscoverPeers = &seiv1alpha1.DiscoverPeersPayload{} - snt.Spec.RestartPod = &seiv1alpha1.RestartPodPayload{} + snt.Spec.RestartSeid = &seiv1alpha1.RestartSeidPayload{} err := testCli.Create(testCtx, snt) g.Expect(err).To(HaveOccurred()) g.Expect(err.Error()).To(ContainSubstring("exactly one")) } -// kind=DiscoverPeers carrying a mismatched payload (restartPod) is rejected by +// kind=DiscoverPeers carrying a mismatched payload (restartSeid) is rejected by // the kind/payload agreement rule. func TestCEL_KindPayloadMismatch_Rejected(t *testing.T) { g := NewWithT(t) ns := makeNamespace(t) snt := baseTask(ns, "kind-mismatch", seiv1alpha1.SeiNodeTaskKindDiscoverPeers) - snt.Spec.RestartPod = &seiv1alpha1.RestartPodPayload{} + snt.Spec.RestartSeid = &seiv1alpha1.RestartSeidPayload{} err := testCli.Create(testCtx, snt) g.Expect(err).To(HaveOccurred()) g.Expect(err.Error()).To(ContainSubstring("discoverPeers is required")) } -// spec.kind is immutable — a DiscoverPeers task cannot be flipped to RestartPod. -func TestCEL_KindImmutable_DiscoverPeersToRestartPod(t *testing.T) { +// spec.kind is immutable — a DiscoverPeers task cannot be flipped to RestartSeid. +func TestCEL_KindImmutable_DiscoverPeersToRestartSeid(t *testing.T) { g := NewWithT(t) ns := makeNamespace(t) snt := baseTask(ns, "kind-immutable", seiv1alpha1.SeiNodeTaskKindDiscoverPeers) @@ -160,26 +159,10 @@ func TestCEL_KindImmutable_DiscoverPeersToRestartPod(t *testing.T) { g.Expect(testCli.Create(testCtx, snt)).To(Succeed()) patch := client.MergeFrom(snt.DeepCopy()) - snt.Spec.Kind = seiv1alpha1.SeiNodeTaskKindRestartPod + snt.Spec.Kind = seiv1alpha1.SeiNodeTaskKindRestartSeid snt.Spec.DiscoverPeers = nil - snt.Spec.RestartPod = &seiv1alpha1.RestartPodPayload{PodUID: "pod-uid-1"} + snt.Spec.RestartSeid = &seiv1alpha1.RestartSeidPayload{} err := testCli.Patch(testCtx, snt, patch) g.Expect(err).To(HaveOccurred()) g.Expect(err.Error()).To(ContainSubstring("kind is immutable")) } - -// spec.restartPod.podUID is immutable — the task content-addresses the pod to -// delete, so the restart target cannot be re-pointed after creation. -func TestCEL_RestartPodUIDImmutable(t *testing.T) { - g := NewWithT(t) - ns := makeNamespace(t) - snt := baseTask(ns, "poduid-immutable", seiv1alpha1.SeiNodeTaskKindRestartPod) - snt.Spec.RestartPod = &seiv1alpha1.RestartPodPayload{PodUID: "pod-uid-1"} - g.Expect(testCli.Create(testCtx, snt)).To(Succeed()) - - patch := client.MergeFrom(snt.DeepCopy()) - snt.Spec.RestartPod.PodUID = "pod-uid-2" - err := testCli.Patch(testCtx, snt, patch) - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("podUID is immutable")) -} diff --git a/internal/task/pod_cycle.go b/internal/task/pod_cycle.go index 5a58226..96efeee 100644 --- a/internal/task/pod_cycle.go +++ b/internal/task/pod_cycle.go @@ -14,7 +14,7 @@ import ( ) // podCycle is the shared plumbing for the controller-side pod-lifecycle tasks -// (replace-pod, restart-pod). Both delete StatefulSet-owned pods to drive the +// (currently replace-pod). It deletes StatefulSet-owned pods to drive the // StatefulSet's OnDelete update strategy — pod lifecycle is the SeiNode // controller's responsibility, not the StatefulSet controller's. The common // core is: fetch the StatefulSet (cache-bypassing), guard against an absent diff --git a/internal/task/pod_cycle_test.go b/internal/task/pod_cycle_test.go index f7a05aa..4c1f096 100644 --- a/internal/task/pod_cycle_test.go +++ b/internal/task/pod_cycle_test.go @@ -3,12 +3,113 @@ package task import ( "context" "testing" + "time" . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" +) + +const ( + restartNS = "default" + restartSTS = "node-1" + restartRev = "rev-1" + kindSTS = "StatefulSet" + + restartedUID = types.UID("pod-uid-original") + replacementUID = types.UID("pod-uid-replacement") ) +var someTime = metav1.NewTime(time.Date(2026, 6, 6, 12, 0, 0, 0, time.UTC)) + +func restartPodNode() *seiv1alpha1.SeiNode { + return &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: restartSTS, Namespace: restartNS}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "test-chain", + Image: "test:v1", + FullNode: &seiv1alpha1.FullNodeSpec{}, + }, + Status: seiv1alpha1.SeiNodeStatus{Phase: seiv1alpha1.PhaseRunning}, + } +} + +func stsForRestart() *appsv1.StatefulSet { + one := int32(1) + return &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: restartSTS, Namespace: restartNS, UID: stsUID, Generation: 1, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: &one, + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{ + "sei.io/node": restartSTS, + }}, + }, + Status: appsv1.StatefulSetStatus{ + ObservedGeneration: 1, + CurrentRevision: restartRev, + UpdateRevision: restartRev, + }, + } +} + +// podForRestart builds a pod owned by the restart STS. uid identifies the pod; +// created sets the creationTimestamp; ready/terminating set the corresponding +// pod state. +func podForRestart(uid types.UID, created metav1.Time, ready, terminating bool) *corev1.Pod { + controller := true + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: restartSTS + "-0", + Namespace: restartNS, + UID: uid, + CreationTimestamp: created, + Labels: map[string]string{ + "sei.io/node": restartSTS, + appsv1.ControllerRevisionHashLabelKey: restartRev, + }, + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: "apps/v1", + Kind: kindSTS, + Name: restartSTS, + UID: stsUID, + Controller: &controller, + }}, + }, + } + if ready { + pod.Status.Conditions = []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}} + } + if terminating { + now := metav1.Now() + pod.DeletionTimestamp = &now + pod.Finalizers = []string{"test/keep-around"} + } + return pod +} + +func restartPodCfg(t *testing.T, node *seiv1alpha1.SeiNode, objs ...client.Object) ExecutionConfig { + t.Helper() + s := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatal(err) + } + if err := seiv1alpha1.AddToScheme(s); err != nil { + t.Fatal(err) + } + c := fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build() + return ExecutionConfig{KubeClient: c, APIReader: c, Scheme: s, Resource: node} +} + // fetchStatefulSet signals a missing StatefulSet as (node, nil, nil) so callers // treat it as a transient wait rather than an error. func TestPodCycle_FetchStatefulSet_MissingIsTransient(t *testing.T) { @@ -30,7 +131,7 @@ func TestPodCycle_GuardSelectorAndReplicas(t *testing.T) { g := NewWithT(t) sts := stsForRestart() sts.Spec.Selector = nil - err := guardSelectorAndReplicas(node, sts, TaskTypeRestartPod) + err := guardSelectorAndReplicas(node, sts, TaskTypeReplacePod) var termErr *TerminalError g.Expect(err).To(BeAssignableToTypeOf(termErr)) g.Expect(err.Error()).To(ContainSubstring("no selector")) @@ -40,7 +141,7 @@ func TestPodCycle_GuardSelectorAndReplicas(t *testing.T) { g := NewWithT(t) sts := stsForRestart() sts.Spec.Selector = &metav1.LabelSelector{} - err := guardSelectorAndReplicas(node, sts, TaskTypeRestartPod) + err := guardSelectorAndReplicas(node, sts, TaskTypeReplacePod) var termErr *TerminalError g.Expect(err).To(BeAssignableToTypeOf(termErr)) }) @@ -59,7 +160,7 @@ func TestPodCycle_GuardSelectorAndReplicas(t *testing.T) { t.Run("single-replica with selector passes", func(t *testing.T) { g := NewWithT(t) - g.Expect(guardSelectorAndReplicas(node, stsForRestart(), TaskTypeRestartPod)).To(Succeed()) + g.Expect(guardSelectorAndReplicas(node, stsForRestart(), TaskTypeReplacePod)).To(Succeed()) }) } diff --git a/internal/task/restart_pod.go b/internal/task/restart_pod.go deleted file mode 100644 index 8dd5a50..0000000 --- a/internal/task/restart_pod.go +++ /dev/null @@ -1,154 +0,0 @@ -package task - -import ( - "context" - "encoding/json" - "errors" - "fmt" - - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/types" - - seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" -) - -const TaskTypeRestartPod = "restart-pod" - -// RestartPodParams identifies the target SeiNode whose pod is restarted so -// seid re-reads config.toml on the next start. -// -// RestartedPodUID is the content-addressed restart signal, read directly from -// the immutable spec.restartPod.podUID each reconcile (not snapshotted to -// status.task), so it is stable across reconciles. The task deletes this pod and -// completes once a replacement owned pod (a different UID) is Ready. Keying on -// UID rather than a creation-time epoch survives the same-second-truncation -// race: an OnDelete replacement always has a fresh UID. CEL requires a non-empty, -// immutable podUID for kind=RestartPod. The caller owns UID correctness: the task -// acts only on a UID that matches a live pod, so a stale UID leaves the pod in -// place and completes as a no-op (the contract is documented on the PodUID API -// field). -type RestartPodParams struct { - NodeName string `json:"nodeName"` - Namespace string `json:"namespace"` - RestartedPodUID types.UID `json:"restartedPodUID,omitempty"` -} - -type restartPodExecution struct { - taskBase - podCycle - params RestartPodParams -} - -func deserializeRestartPod(id string, params json.RawMessage, cfg ExecutionConfig) (TaskExecution, error) { - var p RestartPodParams - if len(params) > 0 { - if err := json.Unmarshal(params, &p); err != nil { - return nil, fmt.Errorf("deserializing restart-pod params: %w", err) - } - } - return &restartPodExecution{ - taskBase: taskBase{id: id, status: ExecutionRunning}, - podCycle: podCycle{cfg: cfg}, - params: p, - }, nil -} - -// Execute deletes the target pod identified by RestartedPodUID. Pairs with the -// StatefulSet's OnDelete update strategy: pod lifecycle is the SeiNode -// controller's responsibility, and OnDelete recreates the pod at the same -// (unchanged) revision so seid re-reads config.toml on start. -// -// Idempotent and stateless across reconciles: Execute deletes the pod whose UID -// matches RestartedPodUID. Any other observed state — the replacement pod (a -// different UID), an empty UID, or a missing pod — leaves the cluster untouched. -// A missing StatefulSet/pod is a transient wait (apply-statefulset or scheduling -// may lag) and reports Running for the next reconcile. -func (e *restartPodExecution) Execute(ctx context.Context) error { - node, sts, err := e.fetchStatefulSet(ctx) - if err != nil { - return err - } - if sts == nil { - return nil - } - if err := guardSelectorAndReplicas(node, sts, TaskTypeRestartPod); err != nil { - return err - } - - // Defense-in-depth: CEL and synthesis guarantee a non-empty UID, so this is a - // backstop. An empty UID short-circuits to a Running Status, leaving the - // execution timeout to fail the task — a restart that deleted nothing stays - // out of the Complete state. - if e.params.RestartedPodUID == "" { - return nil - } - - pod, err := e.ownedPod(ctx, node, sts) - if err != nil { - return err - } - // Delete only the supplied pod. A matching live pod is the one to restart; a - // different UID is already the OnDelete replacement (idempotent across - // reconciles) or a stale caller-supplied UID — both leave the cluster as-is. - if pod == nil || pod.UID != e.params.RestartedPodUID || pod.DeletionTimestamp != nil { - return nil - } - return e.deletePod(ctx, pod) -} - -// Status completes when the replacement pod — a Ready owned pod whose UID -// differs from RestartedPodUID — exists. An empty RestartedPodUID holds the task -// at Running until the execution timeout fails it, so a restart that deleted -// nothing stays out of the Complete state. The original pod (matching UID), a -// terminating pod, or a missing pod also hold at Running so the executor re-polls. -func (e *restartPodExecution) Status(ctx context.Context) ExecutionStatus { - if s, done := e.isTerminal(); done { - return s - } - if e.params.RestartedPodUID == "" { - return ExecutionRunning - } - - node, sts, err := e.fetchStatefulSet(ctx) - if err != nil { - // Terminal type-assertion failure surfaces as a failed task; a missing - // StatefulSet is a transient wait. - var termErr *TerminalError - if errors.As(err, &termErr) { - e.setFailed(err) - return ExecutionFailed - } - return ExecutionRunning - } - if sts == nil { - return ExecutionRunning - } - - pod, err := e.ownedPod(ctx, node, sts) - if err != nil || pod == nil { - return ExecutionRunning - } - if pod.DeletionTimestamp != nil || pod.UID == e.params.RestartedPodUID { - return ExecutionRunning - } - if !podReady(pod) { - return ExecutionRunning - } - - e.complete() - return ExecutionComplete -} - -// ownedPod returns the single pod owned by the StatefulSet (matching its -// selector and ownerReference), or nil if none is found. -func (e *restartPodExecution) ownedPod(ctx context.Context, node *seiv1alpha1.SeiNode, sts *appsv1.StatefulSet) (*corev1.Pod, error) { - pods, err := e.ownedPods(ctx, node, sts) - if err != nil { - return nil, err - } - if len(pods) == 0 { - return nil, nil - } - return &pods[0], nil -} diff --git a/internal/task/restart_pod_test.go b/internal/task/restart_pod_test.go deleted file mode 100644 index 70a2841..0000000 --- a/internal/task/restart_pod_test.go +++ /dev/null @@ -1,302 +0,0 @@ -package task - -import ( - "context" - "encoding/json" - "testing" - "time" - - . "github.com/onsi/gomega" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - clientgoscheme "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - - seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" -) - -const ( - restartNS = "default" - restartSTS = "node-1" - restartRev = "rev-1" - kindSTS = "StatefulSet" - - // restartedUID is the UID of the pod the restart targets (captured at - // synthesis). The replacement pod gets replacementUID. - restartedUID = types.UID("pod-uid-original") - replacementUID = types.UID("pod-uid-replacement") -) - -func restartPodNode() *seiv1alpha1.SeiNode { - return &seiv1alpha1.SeiNode{ - ObjectMeta: metav1.ObjectMeta{Name: restartSTS, Namespace: restartNS}, - Spec: seiv1alpha1.SeiNodeSpec{ - ChainID: "test-chain", - Image: "test:v1", - FullNode: &seiv1alpha1.FullNodeSpec{}, - }, - Status: seiv1alpha1.SeiNodeStatus{Phase: seiv1alpha1.PhaseRunning}, - } -} - -func stsForRestart() *appsv1.StatefulSet { - one := int32(1) - return &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: restartSTS, Namespace: restartNS, UID: stsUID, Generation: 1, - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: &one, - Selector: &metav1.LabelSelector{MatchLabels: map[string]string{ - "sei.io/node": restartSTS, - }}, - }, - Status: appsv1.StatefulSetStatus{ - ObservedGeneration: 1, - CurrentRevision: restartRev, - UpdateRevision: restartRev, - }, - } -} - -// podForRestart builds a pod owned by the restart STS. uid identifies the pod -// (the task keys on UID, not creationTimestamp); created sets the -// creationTimestamp; ready/terminating set the corresponding pod state. -func podForRestart(uid types.UID, created metav1.Time, ready, terminating bool) *corev1.Pod { - controller := true - pod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: restartSTS + "-0", - Namespace: restartNS, - UID: uid, - CreationTimestamp: created, - Labels: map[string]string{ - "sei.io/node": restartSTS, - appsv1.ControllerRevisionHashLabelKey: restartRev, - }, - OwnerReferences: []metav1.OwnerReference{{ - APIVersion: "apps/v1", - Kind: kindSTS, - Name: restartSTS, - UID: stsUID, - Controller: &controller, - }}, - }, - } - if ready { - pod.Status.Conditions = []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}} - } - if terminating { - now := metav1.Now() - pod.DeletionTimestamp = &now - pod.Finalizers = []string{"test/keep-around"} - } - return pod -} - -func restartPodCfg(t *testing.T, node *seiv1alpha1.SeiNode, objs ...client.Object) ExecutionConfig { - t.Helper() - s := runtime.NewScheme() - if err := clientgoscheme.AddToScheme(s); err != nil { - t.Fatal(err) - } - if err := seiv1alpha1.AddToScheme(s); err != nil { - t.Fatal(err) - } - c := fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build() - return ExecutionConfig{KubeClient: c, APIReader: c, Scheme: s, Resource: node} -} - -// newRestartPodExec builds an execution that targets restartedUID (the pod -// captured at synthesis). -func newRestartPodExec(t *testing.T, cfg ExecutionConfig) TaskExecution { - t.Helper() - return newRestartPodExecWithUID(t, cfg, restartedUID) -} - -func newRestartPodExecWithUID(t *testing.T, cfg ExecutionConfig, uid types.UID) TaskExecution { - t.Helper() - raw, _ := json.Marshal(RestartPodParams{NodeName: restartSTS, Namespace: restartNS, RestartedPodUID: uid}) - exec, err := deserializeRestartPod("rp-test", raw, cfg) - if err != nil { - t.Fatal(err) - } - return exec -} - -var ( - someTime = metav1.NewTime(time.Date(2026, 6, 6, 12, 0, 0, 0, time.UTC)) - laterTime = metav1.NewTime(someTime.Add(time.Minute)) -) - -// Happy path: the captured pod (restartedUID) is deleted; the task completes -// once the replacement (different UID) is Ready. -func TestRestartPod_DeletesTargetAndCompletesWhenReplacementReady(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - node := restartPodNode() - sts := stsForRestart() - original := podForRestart(restartedUID, someTime, true, false) - - cfg := restartPodCfg(t, node, sts, original) - exec := newRestartPodExec(t, cfg) - - g.Expect(exec.Execute(ctx)).To(Succeed()) - got := &corev1.Pod{} - err := cfg.KubeClient.Get(ctx, types.NamespacedName{Name: original.Name, Namespace: restartNS}, got) - g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "expected target pod deleted, got err=%v", err) - - g.Expect(exec.Status(ctx)).To(Equal(ExecutionRunning)) - - // Replacement (different UID) appears but not Ready → still running. - fresh := podForRestart(replacementUID, laterTime, false, false) - g.Expect(cfg.KubeClient.Create(ctx, fresh)).To(Succeed()) - g.Expect(exec.Status(ctx)).To(Equal(ExecutionRunning)) - - // Becomes Ready → complete. - fresh.Status.Conditions = []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}} - g.Expect(cfg.KubeClient.Status().Update(ctx, fresh)).To(Succeed()) - g.Expect(exec.Status(ctx)).To(Equal(ExecutionComplete)) -} - -// Same-second race guard: the OnDelete replacement's creationTimestamp truncates -// to the EXACT instant the original was created. A clock-based epoch would -// re-delete it (created <= epoch) and loop forever; the UID-keyed task must NOT -// delete it and MUST complete because its UID differs. -func TestRestartPod_ReplacementSameSecond_NotReDeletedAndCompletes(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - node := restartPodNode() - sts := stsForRestart() - // Replacement created at the identical timestamp the original had, but a - // fresh UID — exactly the case the OnDelete recreation produces within 1s. - replacement := podForRestart(replacementUID, someTime, true, false) - - cfg := restartPodCfg(t, node, sts, replacement) - exec := newRestartPodExec(t, cfg) - - g.Expect(exec.Execute(ctx)).To(Succeed()) - got := &corev1.Pod{} - g.Expect(cfg.KubeClient.Get(ctx, types.NamespacedName{Name: replacement.Name, Namespace: restartNS}, got)).To(Succeed()) - g.Expect(got.DeletionTimestamp).To(BeNil(), "same-second replacement (different UID) must not be re-deleted") - g.Expect(exec.Status(ctx)).To(Equal(ExecutionComplete)) -} - -// Stateless across reconciles: a replacement pod (different UID) must NOT be -// deleted by a freshly reconstructed execution. Execute is a no-op and a Ready -// replacement completes immediately. -func TestRestartPod_Replacement_NotReDeleted(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - node := restartPodNode() - sts := stsForRestart() - fresh := podForRestart(replacementUID, laterTime, true, false) - - cfg := restartPodCfg(t, node, sts, fresh) - exec := newRestartPodExec(t, cfg) - - g.Expect(exec.Execute(ctx)).To(Succeed()) - got := &corev1.Pod{} - g.Expect(cfg.KubeClient.Get(ctx, types.NamespacedName{Name: fresh.Name, Namespace: restartNS}, got)).To(Succeed()) - g.Expect(got.DeletionTimestamp).To(BeNil(), "replacement pod must not be re-deleted") - g.Expect(exec.Status(ctx)).To(Equal(ExecutionComplete)) -} - -// Defense-in-depth: an empty RestartedPodUID must NEVER complete-on-first-Ready -// (the synthesis site never dispatches one, but a stray empty UID can't be -// allowed to masquerade as a successful restart). Execute deletes nothing and -// Status stays Running even with an owned Ready pod present, so the controller's -// execution timeout fails the task rather than silent-completing. -func TestRestartPod_EmptyUID_NeverCompletes(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - node := restartPodNode() - sts := stsForRestart() - existing := podForRestart(restartedUID, someTime, true, false) // owned + Ready - - cfg := restartPodCfg(t, node, sts, existing) - exec := newRestartPodExecWithUID(t, cfg, "") - - g.Expect(exec.Execute(ctx)).To(Succeed()) - got := &corev1.Pod{} - g.Expect(cfg.KubeClient.Get(ctx, types.NamespacedName{Name: existing.Name, Namespace: restartNS}, got)).To(Succeed()) - g.Expect(got.DeletionTimestamp).To(BeNil(), "empty UID must not delete any pod") - g.Expect(exec.Status(ctx)).To(Equal(ExecutionRunning), "empty UID must not complete on a Ready pod") -} - -// No pod exists at Execute time: Execute is a no-op; the task waits for a Ready -// replacement. -func TestRestartPod_NoPodYet_WaitsForReady(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - node := restartPodNode() - sts := stsForRestart() - - cfg := restartPodCfg(t, node, sts) - exec := newRestartPodExec(t, cfg) - - g.Expect(exec.Execute(ctx)).To(Succeed()) - g.Expect(exec.Status(ctx)).To(Equal(ExecutionRunning)) - - pod := podForRestart(replacementUID, laterTime, true, false) - g.Expect(cfg.KubeClient.Create(ctx, pod)).To(Succeed()) - g.Expect(exec.Status(ctx)).To(Equal(ExecutionComplete)) -} - -// StatefulSet missing entirely → Execute is a transient no-op, Status waits. -func TestRestartPod_StatefulSetMissing_TransientWait(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - node := restartPodNode() - - cfg := restartPodCfg(t, node) - exec := newRestartPodExec(t, cfg) - - g.Expect(exec.Execute(ctx)).To(Succeed()) - g.Expect(exec.Status(ctx)).To(Equal(ExecutionRunning)) -} - -// Multi-replica StatefulSets are rejected: restart-pod (like replace-pod) only -// supports single-replica nodes. -func TestRestartPod_MultiReplica_TerminalError(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - node := restartPodNode() - sts := stsForRestart() - three := int32(3) - sts.Spec.Replicas = &three - - cfg := restartPodCfg(t, node, sts) - exec := newRestartPodExec(t, cfg) - - err := exec.Execute(ctx) - g.Expect(err).To(HaveOccurred()) - var termErr *TerminalError - g.Expect(err).To(BeAssignableToTypeOf(termErr)) - g.Expect(err.Error()).To(ContainSubstring("multi-replica")) -} - -// The captured pod already terminating is not re-deleted, and the task waits -// for the replacement. -func TestRestartPod_TargetTerminating_Skipped(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - node := restartPodNode() - sts := stsForRestart() - termPod := podForRestart(restartedUID, someTime, true, true) - - cfg := restartPodCfg(t, node, sts, termPod) - exec := newRestartPodExec(t, cfg) - - g.Expect(exec.Execute(ctx)).To(Succeed()) - g.Expect(exec.Status(ctx)).To(Equal(ExecutionRunning)) - - got := &corev1.Pod{} - g.Expect(cfg.KubeClient.Get(ctx, types.NamespacedName{Name: termPod.Name, Namespace: restartNS}, got)).To(Succeed()) - g.Expect(got.DeletionTimestamp).NotTo(BeNil(), "terminating pod should still exist (finalizer holds it)") -} diff --git a/internal/task/seinodetask_params.go b/internal/task/seinodetask_params.go index aa00b38..c083333 100644 --- a/internal/task/seinodetask_params.go +++ b/internal/task/seinodetask_params.go @@ -4,8 +4,6 @@ import ( "errors" "fmt" - "k8s.io/apimachinery/pkg/types" - sidecar "github.com/sei-protocol/seictl/sidecar/client" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" @@ -92,8 +90,8 @@ func SeiNodeTaskParamsFor(cr *seiv1alpha1.SeiNodeTask, target *seiv1alpha1.SeiNo return awaitNodesAtHeightParams(cr) case seiv1alpha1.SeiNodeTaskKindDiscoverPeers: return discoverPeersParams(cr, target) - case seiv1alpha1.SeiNodeTaskKindRestartPod: - return restartPodParams(cr) + case seiv1alpha1.SeiNodeTaskKindRestartSeid: + return restartSeidParams(cr) case seiv1alpha1.SeiNodeTaskKindMarkReady: return markReadyParams(cr) default: @@ -232,21 +230,14 @@ func discoverPeersParams(cr *seiv1alpha1.SeiNodeTask, target *seiv1alpha1.SeiNod return SeiNodeTaskParams{sidecar.TaskTypeDiscoverPeers, sidecar.DiscoverPeersTask{Sources: sources}}, nil } -func restartPodParams(cr *seiv1alpha1.SeiNodeTask) (SeiNodeTaskParams, error) { - if cr.Spec.RestartPod == nil { - return SeiNodeTaskParams{}, paramsErrorf("spec.restartPod is required for kind=RestartPod") - } - // The pod UID is caller-supplied verbatim (spec.restartPod.podUID) and CEL - // requires it non-empty (and immutable) for kind=RestartPod; this is the - // defensive backstop. - if cr.Spec.RestartPod.PodUID == "" { - return SeiNodeTaskParams{}, paramsErrorf("spec.restartPod.podUID is required for kind=RestartPod") +// restartSeidParams builds the empty sidecar restart-seid payload. CEL requires +// spec.restartSeid for kind=RestartSeid; this guard covers the early-validation +// path (taskParamsForKind runs before the spec is admission-checked in tests). +func restartSeidParams(cr *seiv1alpha1.SeiNodeTask) (SeiNodeTaskParams, error) { + if cr.Spec.RestartSeid == nil { + return SeiNodeTaskParams{}, paramsErrorf("spec.restartSeid is required for kind=RestartSeid") } - return SeiNodeTaskParams{TaskTypeRestartPod, RestartPodParams{ - NodeName: cr.Spec.Target.NodeRef.Name, - Namespace: cr.Namespace, - RestartedPodUID: types.UID(cr.Spec.RestartPod.PodUID), - }}, nil + return SeiNodeTaskParams{sidecar.TaskTypeRestartSeid, sidecar.RestartSeidTask{}}, nil } // markReadyParams builds the empty sidecar mark-ready payload. CEL requires diff --git a/internal/task/seinodetask_params_test.go b/internal/task/seinodetask_params_test.go index 1c04467..ad44cb1 100644 --- a/internal/task/seinodetask_params_test.go +++ b/internal/task/seinodetask_params_test.go @@ -4,8 +4,6 @@ import ( "errors" "testing" - "k8s.io/apimachinery/pkg/types" - sidecar "github.com/sei-protocol/seictl/sidecar/client" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" @@ -92,43 +90,45 @@ func TestSeiNodeTaskParamsFor_MarkReady_NilPayload_ParamsBuildFailed(t *testing. } } -// restartPodParams reads the restart UID straight from the immutable -// spec.restartPod.podUID — independent of status.task. Both the early-validation -// path (status.task nil) and the post-synthesis path (status.task set) yield the -// spec value. -func TestRestartPodParams_ReadsSpecPodUID(t *testing.T) { - base := func() *seiv1alpha1.SeiNodeTask { - return &seiv1alpha1.SeiNodeTask{ - Spec: seiv1alpha1.SeiNodeTaskSpec{ - Kind: seiv1alpha1.SeiNodeTaskKindRestartPod, - RestartPod: &seiv1alpha1.RestartPodPayload{PodUID: "pod-uid-1"}, - }, - } - } - - cases := map[string]*seiv1alpha1.SeiNodeTask{ - "status.task nil": base(), - "status.task set": func() *seiv1alpha1.SeiNodeTask { - cr := base() - cr.Status.Task = &seiv1alpha1.SeiNodeTaskExecution{ID: "task-id"} - return cr - }(), - } - - for name, cr := range cases { - t.Run(name, func(t *testing.T) { - p, err := SeiNodeTaskParamsFor(cr, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - rp, ok := p.Payload.(RestartPodParams) - if !ok { - t.Fatalf("Payload = %T, want RestartPodParams", p.Payload) - } - if rp.RestartedPodUID != types.UID("pod-uid-1") { - t.Errorf("RestartedPodUID = %q, want %q", rp.RestartedPodUID, "pod-uid-1") - } - }) +// kind=RestartSeid maps to the sidecar restart-seid task with an empty payload — +// no target needed, no source building (mirrors MarkReady). +func TestSeiNodeTaskParamsFor_RestartSeid(t *testing.T) { + cr := &seiv1alpha1.SeiNodeTask{ + Spec: seiv1alpha1.SeiNodeTaskSpec{ + Kind: seiv1alpha1.SeiNodeTaskKindRestartSeid, + RestartSeid: &seiv1alpha1.RestartSeidPayload{}, + }, + } + + p, err := SeiNodeTaskParamsFor(cr, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if p.Type != sidecar.TaskTypeRestartSeid { + t.Errorf("Type = %q, want %q", p.Type, sidecar.TaskTypeRestartSeid) + } + if _, ok := p.Payload.(sidecar.RestartSeidTask); !ok { + t.Errorf("Payload = %T, want sidecar.RestartSeidTask", p.Payload) + } +} + +// kind=RestartSeid with a nil payload is a param-build failure (ParamsBuildFailed), +// not an unsupported kind. CEL normally blocks this; the guard is the backstop. +func TestSeiNodeTaskParamsFor_RestartSeid_NilPayload_ParamsBuildFailed(t *testing.T) { + cr := &seiv1alpha1.SeiNodeTask{ + Spec: seiv1alpha1.SeiNodeTaskSpec{Kind: seiv1alpha1.SeiNodeTaskKindRestartSeid}, + } + + _, err := SeiNodeTaskParamsFor(cr, nil) + if err == nil { + t.Fatal("expected error for missing payload, got nil") + } + var unsupported *ErrUnsupportedKind + if errors.As(err, &unsupported) { + t.Error("missing-payload error must not be *ErrUnsupportedKind") + } + if got := FailureReason(err); got != ReasonParamsBuildFailed { + t.Errorf("FailureReason = %q, want %q", got, ReasonParamsBuildFailed) } } diff --git a/internal/task/task.go b/internal/task/task.go index 1c184c9..666a07e 100644 --- a/internal/task/task.go +++ b/internal/task/task.go @@ -208,6 +208,7 @@ var registry = map[string]taskDeserializer{ sidecar.TaskTypeConfigValidate: sidecarTask[sidecar.ConfigValidateTask](true), sidecar.TaskTypeConfigureGenesis: sidecarTask[sidecar.ConfigureGenesisTask](false), sidecar.TaskTypeDiscoverPeers: sidecarTask[sidecar.DiscoverPeersTask](false), + sidecar.TaskTypeRestartSeid: sidecarTask[sidecar.RestartSeidTask](false), sidecar.TaskTypeMarkReady: sidecarTask[sidecar.MarkReadyTask](true), sidecar.TaskTypeSnapshotUpload: sidecarTask[sidecar.SnapshotUploadTask](true), sidecar.TaskTypeGenerateIdentity: sidecarTask[sidecar.GenerateIdentityTask](false), @@ -228,7 +229,6 @@ var registry = map[string]taskDeserializer{ TaskTypeApplyService: deserializeApplyService, TaskTypeApplyRBACProxyConfig: deserializeApplyRBACProxyConfig, TaskTypeReplacePod: deserializeReplacePod, - TaskTypeRestartPod: deserializeRestartPod, TaskTypeObserveImage: deserializeObserveImage, TaskTypeUpdateNodeImage: deserializeUpdateNodeImage, TaskTypeValidateSigningKey: deserializeValidateSigningKey, diff --git a/manifests/sei.io_seinodetasks.yaml b/manifests/sei.io_seinodetasks.yaml index 5980e4b..53377fa 100644 --- a/manifests/sei.io_seinodetasks.yaml +++ b/manifests/sei.io_seinodetasks.yaml @@ -249,33 +249,14 @@ spec: - UpdateNodeImage - AwaitNodesAtHeight - DiscoverPeers - - RestartPod + - RestartSeid - MarkReady type: string markReady: description: MarkReady is the payload for kind=MarkReady. type: object - restartPod: - description: RestartPod is the payload for kind=RestartPod. - properties: - podUID: - description: |- - PodUID is the UID of the pod to restart, supplied by the caller. Obtain it - immediately before creating the task; for the single-replica StatefulSet - the pod is `-0`: - kubectl get pod -0 -o jsonpath='{.metadata.uid}' - The task deletes exactly this pod and completes when an owned Ready pod with - a different UID appears. Content-addressed (UID, not creationTimestamp) so - the OnDelete replacement is unambiguously distinguished from the original. - - The caller owns UID correctness: the controller uses this UID verbatim, so a - UID that no longer matches the live pod (e.g. the pod was recreated - out-of-band after it was read) completes immediately as a no-op. Fetch the - UID as late as possible. - minLength: 1 - type: string - required: - - podUID + restartSeid: + description: RestartSeid is the payload for kind=RestartSeid. type: object target: description: |- @@ -347,13 +328,13 @@ spec: type: object x-kubernetes-validations: - message: exactly one of govSoftwareUpgrade, govVote, awaitCondition, - updateNodeImage, awaitNodesAtHeight, discoverPeers, restartPod, or + updateNodeImage, awaitNodesAtHeight, discoverPeers, restartSeid, or markReady must be set rule: '(has(self.govSoftwareUpgrade) ? 1 : 0) + (has(self.govVote) ? 1 : 0) + (has(self.awaitCondition) ? 1 : 0) + (has(self.updateNodeImage) ? 1 : 0) + (has(self.awaitNodesAtHeight) ? 1 : 0) + (has(self.discoverPeers) - ? 1 : 0) + (has(self.restartPod) ? 1 : 0) + (has(self.markReady) ? - 1 : 0) == 1' + ? 1 : 0) + (has(self.restartSeid) ? 1 : 0) + (has(self.markReady) + ? 1 : 0) == 1' - message: spec.govSoftwareUpgrade is required when kind=GovSoftwareUpgrade rule: self.kind != 'GovSoftwareUpgrade' || has(self.govSoftwareUpgrade) - message: spec.govVote is required when kind=GovVote @@ -366,13 +347,8 @@ spec: rule: self.kind != 'AwaitNodesAtHeight' || has(self.awaitNodesAtHeight) - message: spec.discoverPeers is required when kind=DiscoverPeers rule: self.kind != 'DiscoverPeers' || has(self.discoverPeers) - - message: spec.restartPod is required when kind=RestartPod - rule: self.kind != 'RestartPod' || has(self.restartPod) - - message: spec.restartPod.podUID is required when kind=RestartPod - rule: self.kind != 'RestartPod' || (has(self.restartPod) && size(self.restartPod.podUID) - > 0) - - message: spec.restartPod.podUID is immutable - rule: self.kind != 'RestartPod' || self.restartPod.podUID == oldSelf.restartPod.podUID + - message: spec.restartSeid is required when kind=RestartSeid + rule: self.kind != 'RestartSeid' || has(self.restartSeid) - message: spec.markReady is required when kind=MarkReady rule: self.kind != 'MarkReady' || has(self.markReady) - message: spec.kind is immutable From 6f7fba7c927d95e7db40f8e832ede9efd3f2e5c3 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Sat, 6 Jun 2026 20:34:58 -0700 Subject: [PATCH 2/2] nodetask: consolidate podCycle back into replace-pod (its only user) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit podCycle was extracted to share the StatefulSet-fetch / owned-pod / delete helpers between replace-pod and restart-pod. With RestartPod removed, replace-pod is the sole user, so the shared abstraction is no longer justified (CLAUDE.md: no premature helpers). Dissolve it: - fetchStatefulSet/ownedPods/deletePod become methods on replacePodExecution (which now holds cfg ExecutionConfig directly, no embedded podCycle); guardSelectorAndReplicas/ownedByStatefulSet are plain unexported funcs. - Delete pod_cycle.go / pod_cycle_test.go; the still-relevant unit tests move into replace_pod_test.go and the leftover restart-named fixtures are renamed to replace-pod equivalents. - Drop podReady (dead — only restart-pod used it). Pure internal refactor: replace-pod's revision-gated, readiness-blind behavior is byte-for-byte preserved; no API/CRD change. Co-Authored-By: Claude Opus 4.8 --- internal/task/pod_cycle.go | 113 ----------------- internal/task/pod_cycle_test.go | 198 ------------------------------ internal/task/replace_pod.go | 88 ++++++++++++- internal/task/replace_pod_test.go | 97 +++++++++++++++ 4 files changed, 182 insertions(+), 314 deletions(-) delete mode 100644 internal/task/pod_cycle.go delete mode 100644 internal/task/pod_cycle_test.go diff --git a/internal/task/pod_cycle.go b/internal/task/pod_cycle.go deleted file mode 100644 index 96efeee..0000000 --- a/internal/task/pod_cycle.go +++ /dev/null @@ -1,113 +0,0 @@ -package task - -import ( - "context" - "fmt" - - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" - - seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" -) - -// podCycle is the shared plumbing for the controller-side pod-lifecycle tasks -// (currently replace-pod). It deletes StatefulSet-owned pods to drive the -// StatefulSet's OnDelete update strategy — pod lifecycle is the SeiNode -// controller's responsibility, not the StatefulSet controller's. The common -// core is: fetch the StatefulSet (cache-bypassing), guard against an absent -// selector and multi-replica sets, list owned pods, and delete them -// NotFound-tolerantly. Each task supplies only its distinct bits (which -// revisions/UIDs to delete, and its completion/readiness semantics). -type podCycle struct { - cfg ExecutionConfig -} - -// fetchStatefulSet reads the SeiNode's StatefulSet via the APIReader (bypassing -// the controller-runtime cache, since apply-statefulset may have written it in -// the same reconcile). A missing StatefulSet is a transient wait, signalled by -// (nil, nil): apply-statefulset precedes these tasks and the controller retries -// on the next reconcile. -func (p podCycle) fetchStatefulSet(ctx context.Context) (*seiv1alpha1.SeiNode, *appsv1.StatefulSet, error) { - node, err := ResourceAs[*seiv1alpha1.SeiNode](p.cfg) - if err != nil { - return nil, nil, Terminal(err) - } - - sts := &appsv1.StatefulSet{} - key := types.NamespacedName{Name: node.Name, Namespace: node.Namespace} - if err := p.cfg.APIReader.Get(ctx, key, sts); err != nil { - if apierrors.IsNotFound(err) { - return node, nil, nil - } - return node, nil, fmt.Errorf("getting statefulset: %w", err) - } - return node, sts, nil -} - -// guardSelectorAndReplicas rejects StatefulSets these tasks cannot safely -// operate on: one with no selector (owned pods are unidentifiable) and one with -// more than a single replica (reverse-ordinal deletion is unimplemented). Both -// are Terminal — failing loud beats silently violating rolling-update -// semantics. taskName is interpolated into the multi-replica error. -func guardSelectorAndReplicas(node *seiv1alpha1.SeiNode, sts *appsv1.StatefulSet, taskName string) error { - if sts.Spec.Selector == nil || len(sts.Spec.Selector.MatchLabels) == 0 { - return Terminal(fmt.Errorf("statefulset %q has no selector; cannot identify owned pods", node.Name)) - } - if sts.Spec.Replicas != nil && *sts.Spec.Replicas > 1 { - return Terminal(fmt.Errorf( - "%s does not support multi-replica StatefulSets (got replicas=%d); "+ - "add ordinal-aware deletion before scaling up", taskName, *sts.Spec.Replicas)) - } - return nil -} - -// ownedPods lists the pods that match the StatefulSet's selector AND carry an -// ownerReference back to it. The label match alone is insufficient: a -// manually-applied pod can share the selector labels without being owned. -func (p podCycle) ownedPods(ctx context.Context, node *seiv1alpha1.SeiNode, sts *appsv1.StatefulSet) ([]corev1.Pod, error) { - pods := &corev1.PodList{} - if err := p.cfg.KubeClient.List(ctx, pods, - client.InNamespace(node.Namespace), - client.MatchingLabels(sts.Spec.Selector.MatchLabels), - ); err != nil { - return nil, fmt.Errorf("listing pods for statefulset %q: %w", node.Name, err) - } - owned := pods.Items[:0] - for i := range pods.Items { - if ownedByStatefulSet(&pods.Items[i], sts) { - owned = append(owned, pods.Items[i]) - } - } - return owned, nil -} - -// deletePod deletes the pod, tolerating an already-gone pod (NotFound) so the -// task is idempotent across reconciles. -func (p podCycle) deletePod(ctx context.Context, pod *corev1.Pod) error { - if err := p.cfg.KubeClient.Delete(ctx, pod); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("deleting pod %q: %w", pod.Name, err) - } - return nil -} - -func ownedByStatefulSet(pod *corev1.Pod, sts *appsv1.StatefulSet) bool { - for _, ref := range pod.OwnerReferences { - if ref.Kind == "StatefulSet" && ref.UID == sts.UID { - return true - } - } - return false -} - -// podReady reports whether the pod's Ready condition is True. -func podReady(pod *corev1.Pod) bool { - for _, c := range pod.Status.Conditions { - if c.Type == corev1.PodReady { - return c.Status == corev1.ConditionTrue - } - } - return false -} diff --git a/internal/task/pod_cycle_test.go b/internal/task/pod_cycle_test.go deleted file mode 100644 index 4c1f096..0000000 --- a/internal/task/pod_cycle_test.go +++ /dev/null @@ -1,198 +0,0 @@ -package task - -import ( - "context" - "testing" - "time" - - . "github.com/onsi/gomega" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - clientgoscheme "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - - seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" -) - -const ( - restartNS = "default" - restartSTS = "node-1" - restartRev = "rev-1" - kindSTS = "StatefulSet" - - restartedUID = types.UID("pod-uid-original") - replacementUID = types.UID("pod-uid-replacement") -) - -var someTime = metav1.NewTime(time.Date(2026, 6, 6, 12, 0, 0, 0, time.UTC)) - -func restartPodNode() *seiv1alpha1.SeiNode { - return &seiv1alpha1.SeiNode{ - ObjectMeta: metav1.ObjectMeta{Name: restartSTS, Namespace: restartNS}, - Spec: seiv1alpha1.SeiNodeSpec{ - ChainID: "test-chain", - Image: "test:v1", - FullNode: &seiv1alpha1.FullNodeSpec{}, - }, - Status: seiv1alpha1.SeiNodeStatus{Phase: seiv1alpha1.PhaseRunning}, - } -} - -func stsForRestart() *appsv1.StatefulSet { - one := int32(1) - return &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: restartSTS, Namespace: restartNS, UID: stsUID, Generation: 1, - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: &one, - Selector: &metav1.LabelSelector{MatchLabels: map[string]string{ - "sei.io/node": restartSTS, - }}, - }, - Status: appsv1.StatefulSetStatus{ - ObservedGeneration: 1, - CurrentRevision: restartRev, - UpdateRevision: restartRev, - }, - } -} - -// podForRestart builds a pod owned by the restart STS. uid identifies the pod; -// created sets the creationTimestamp; ready/terminating set the corresponding -// pod state. -func podForRestart(uid types.UID, created metav1.Time, ready, terminating bool) *corev1.Pod { - controller := true - pod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: restartSTS + "-0", - Namespace: restartNS, - UID: uid, - CreationTimestamp: created, - Labels: map[string]string{ - "sei.io/node": restartSTS, - appsv1.ControllerRevisionHashLabelKey: restartRev, - }, - OwnerReferences: []metav1.OwnerReference{{ - APIVersion: "apps/v1", - Kind: kindSTS, - Name: restartSTS, - UID: stsUID, - Controller: &controller, - }}, - }, - } - if ready { - pod.Status.Conditions = []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}} - } - if terminating { - now := metav1.Now() - pod.DeletionTimestamp = &now - pod.Finalizers = []string{"test/keep-around"} - } - return pod -} - -func restartPodCfg(t *testing.T, node *seiv1alpha1.SeiNode, objs ...client.Object) ExecutionConfig { - t.Helper() - s := runtime.NewScheme() - if err := clientgoscheme.AddToScheme(s); err != nil { - t.Fatal(err) - } - if err := seiv1alpha1.AddToScheme(s); err != nil { - t.Fatal(err) - } - c := fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build() - return ExecutionConfig{KubeClient: c, APIReader: c, Scheme: s, Resource: node} -} - -// fetchStatefulSet signals a missing StatefulSet as (node, nil, nil) so callers -// treat it as a transient wait rather than an error. -func TestPodCycle_FetchStatefulSet_MissingIsTransient(t *testing.T) { - g := NewWithT(t) - node := restartPodNode() - cfg := restartPodCfg(t, node) - - pc := podCycle{cfg: cfg} - gotNode, sts, err := pc.fetchStatefulSet(context.Background()) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(sts).To(BeNil()) - g.Expect(gotNode.Name).To(Equal(node.Name)) -} - -func TestPodCycle_GuardSelectorAndReplicas(t *testing.T) { - node := restartPodNode() - - t.Run("nil selector is terminal", func(t *testing.T) { - g := NewWithT(t) - sts := stsForRestart() - sts.Spec.Selector = nil - err := guardSelectorAndReplicas(node, sts, TaskTypeReplacePod) - var termErr *TerminalError - g.Expect(err).To(BeAssignableToTypeOf(termErr)) - g.Expect(err.Error()).To(ContainSubstring("no selector")) - }) - - t.Run("empty selector is terminal", func(t *testing.T) { - g := NewWithT(t) - sts := stsForRestart() - sts.Spec.Selector = &metav1.LabelSelector{} - err := guardSelectorAndReplicas(node, sts, TaskTypeReplacePod) - var termErr *TerminalError - g.Expect(err).To(BeAssignableToTypeOf(termErr)) - }) - - t.Run("multi-replica is terminal and names the task", func(t *testing.T) { - g := NewWithT(t) - sts := stsForRestart() - three := int32(3) - sts.Spec.Replicas = &three - err := guardSelectorAndReplicas(node, sts, TaskTypeReplacePod) - var termErr *TerminalError - g.Expect(err).To(BeAssignableToTypeOf(termErr)) - g.Expect(err.Error()).To(ContainSubstring("multi-replica")) - g.Expect(err.Error()).To(ContainSubstring(TaskTypeReplacePod)) - }) - - t.Run("single-replica with selector passes", func(t *testing.T) { - g := NewWithT(t) - g.Expect(guardSelectorAndReplicas(node, stsForRestart(), TaskTypeReplacePod)).To(Succeed()) - }) -} - -// ownedPods returns only pods that carry an ownerReference to the StatefulSet, -// even when a label-matching but unowned pod shares the selector. -func TestPodCycle_OwnedPods_FiltersByOwnership(t *testing.T) { - g := NewWithT(t) - node := restartPodNode() - sts := stsForRestart() - - owned := podForRestart(restartedUID, someTime, false, false) - unowned := podForRestart(replacementUID, someTime, false, false) - unowned.Name = restartSTS + "-imposter" - unowned.OwnerReferences = nil // matches selector labels but not owned - - cfg := restartPodCfg(t, node, sts, owned, unowned) - pc := podCycle{cfg: cfg} - - pods, err := pc.ownedPods(context.Background(), node, sts) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(pods).To(HaveLen(1)) - g.Expect(pods[0].UID).To(Equal(restartedUID)) -} - -// deletePod tolerates an already-absent pod (NotFound) so the task is -// idempotent across reconciles. -func TestPodCycle_DeletePod_NotFoundTolerant(t *testing.T) { - g := NewWithT(t) - node := restartPodNode() - cfg := restartPodCfg(t, node) - pc := podCycle{cfg: cfg} - - ghost := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "gone", Namespace: restartNS}} - g.Expect(pc.deletePod(context.Background(), ghost)).To(Succeed()) -} diff --git a/internal/task/replace_pod.go b/internal/task/replace_pod.go index 714c148..51d2e63 100644 --- a/internal/task/replace_pod.go +++ b/internal/task/replace_pod.go @@ -6,6 +6,12 @@ import ( "fmt" appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" ) const TaskTypeReplacePod = "replace-pod" @@ -17,7 +23,7 @@ type ReplacePodParams struct { type replacePodExecution struct { taskBase - podCycle + cfg ExecutionConfig params ReplacePodParams } @@ -30,7 +36,7 @@ func deserializeReplacePod(id string, params json.RawMessage, cfg ExecutionConfi } return &replacePodExecution{ taskBase: taskBase{id: id, status: ExecutionRunning}, - podCycle: podCycle{cfg: cfg}, + cfg: cfg, params: p, }, nil } @@ -66,7 +72,7 @@ func (e *replacePodExecution) Execute(ctx context.Context) error { return nil } - if err := guardSelectorAndReplicas(node, sts, TaskTypeReplacePod); err != nil { + if err := guardSelectorAndReplicas(node, sts); err != nil { return err } @@ -102,3 +108,79 @@ func (e *replacePodExecution) Execute(ctx context.Context) error { func (e *replacePodExecution) Status(_ context.Context) ExecutionStatus { return e.DefaultStatus() } + +// fetchStatefulSet reads the SeiNode's StatefulSet via the APIReader (bypassing +// the controller-runtime cache, since apply-statefulset may have written it in +// the same reconcile). A missing StatefulSet is a transient wait, signalled by +// (nil, nil): apply-statefulset precedes this task and the controller retries +// on the next reconcile. +func (e *replacePodExecution) fetchStatefulSet(ctx context.Context) (*seiv1alpha1.SeiNode, *appsv1.StatefulSet, error) { + node, err := ResourceAs[*seiv1alpha1.SeiNode](e.cfg) + if err != nil { + return nil, nil, Terminal(err) + } + + sts := &appsv1.StatefulSet{} + key := types.NamespacedName{Name: node.Name, Namespace: node.Namespace} + if err := e.cfg.APIReader.Get(ctx, key, sts); err != nil { + if apierrors.IsNotFound(err) { + return node, nil, nil + } + return node, nil, fmt.Errorf("getting statefulset: %w", err) + } + return node, sts, nil +} + +// ownedPods lists the pods that match the StatefulSet's selector AND carry an +// ownerReference back to it. The label match alone is insufficient: a +// manually-applied pod can share the selector labels without being owned. +func (e *replacePodExecution) ownedPods(ctx context.Context, node *seiv1alpha1.SeiNode, sts *appsv1.StatefulSet) ([]corev1.Pod, error) { + pods := &corev1.PodList{} + if err := e.cfg.KubeClient.List(ctx, pods, + client.InNamespace(node.Namespace), + client.MatchingLabels(sts.Spec.Selector.MatchLabels), + ); err != nil { + return nil, fmt.Errorf("listing pods for statefulset %q: %w", node.Name, err) + } + owned := pods.Items[:0] + for i := range pods.Items { + if ownedByStatefulSet(&pods.Items[i], sts) { + owned = append(owned, pods.Items[i]) + } + } + return owned, nil +} + +// deletePod deletes the pod, tolerating an already-gone pod (NotFound) so the +// task is idempotent across reconciles. +func (e *replacePodExecution) deletePod(ctx context.Context, pod *corev1.Pod) error { + if err := e.cfg.KubeClient.Delete(ctx, pod); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting pod %q: %w", pod.Name, err) + } + return nil +} + +// guardSelectorAndReplicas rejects StatefulSets this task cannot safely operate +// on: one with no selector (owned pods are unidentifiable) and one with more +// than a single replica (reverse-ordinal deletion is unimplemented). Both are +// Terminal — failing loud beats silently violating rolling-update semantics. +func guardSelectorAndReplicas(node *seiv1alpha1.SeiNode, sts *appsv1.StatefulSet) error { + if sts.Spec.Selector == nil || len(sts.Spec.Selector.MatchLabels) == 0 { + return Terminal(fmt.Errorf("statefulset %q has no selector; cannot identify owned pods", node.Name)) + } + if sts.Spec.Replicas != nil && *sts.Spec.Replicas > 1 { + return Terminal(fmt.Errorf( + "%s does not support multi-replica StatefulSets (got replicas=%d); "+ + "add ordinal-aware deletion before scaling up", TaskTypeReplacePod, *sts.Spec.Replicas)) + } + return nil +} + +func ownedByStatefulSet(pod *corev1.Pod, sts *appsv1.StatefulSet) bool { + for _, ref := range pod.OwnerReferences { + if ref.Kind == "StatefulSet" && ref.UID == sts.UID { + return true + } + } + return false +} diff --git a/internal/task/replace_pod_test.go b/internal/task/replace_pod_test.go index 144fd28..58d202a 100644 --- a/internal/task/replace_pod_test.go +++ b/internal/task/replace_pod_test.go @@ -23,6 +23,9 @@ const ( stsUID = types.UID("sts-uid-1") testReplaceNs = "default" testReplaceSTS = "node-1" + + ownedPodUID = types.UID("pod-uid-owned") + unownedPodUID = types.UID("pod-uid-unowned") ) func replacePodNode() *seiv1alpha1.SeiNode { @@ -109,6 +112,100 @@ func newReplacePodExec(t *testing.T, cfg ExecutionConfig) TaskExecution { return exec } +func newReplacePodExecRaw(t *testing.T, cfg ExecutionConfig) *replacePodExecution { + t.Helper() + return newReplacePodExec(t, cfg).(*replacePodExecution) +} + +// fetchStatefulSet signals a missing StatefulSet as (node, nil, nil) so callers +// treat it as a transient wait rather than an error. +func TestReplacePod_FetchStatefulSet_MissingIsTransient(t *testing.T) { + g := NewWithT(t) + node := replacePodNode() + cfg := replacePodCfg(t, node) + + exec := newReplacePodExecRaw(t, cfg) + gotNode, sts, err := exec.fetchStatefulSet(context.Background()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).To(BeNil()) + g.Expect(gotNode.Name).To(Equal(node.Name)) +} + +func TestReplacePod_GuardSelectorAndReplicas(t *testing.T) { + node := replacePodNode() + + t.Run("nil selector is terminal", func(t *testing.T) { + g := NewWithT(t) + sts := stsForReplace("rev-1", "rev-1") + sts.Spec.Selector = nil + err := guardSelectorAndReplicas(node, sts) + var termErr *TerminalError + g.Expect(err).To(BeAssignableToTypeOf(termErr)) + g.Expect(err.Error()).To(ContainSubstring("no selector")) + }) + + t.Run("empty selector is terminal", func(t *testing.T) { + g := NewWithT(t) + sts := stsForReplace("rev-1", "rev-1") + sts.Spec.Selector = &metav1.LabelSelector{} + err := guardSelectorAndReplicas(node, sts) + var termErr *TerminalError + g.Expect(err).To(BeAssignableToTypeOf(termErr)) + }) + + t.Run("multi-replica is terminal and names the task", func(t *testing.T) { + g := NewWithT(t) + sts := stsForReplace("rev-1", "rev-1") + three := int32(3) + sts.Spec.Replicas = &three + err := guardSelectorAndReplicas(node, sts) + var termErr *TerminalError + g.Expect(err).To(BeAssignableToTypeOf(termErr)) + g.Expect(err.Error()).To(ContainSubstring("multi-replica")) + g.Expect(err.Error()).To(ContainSubstring(TaskTypeReplacePod)) + }) + + t.Run("single-replica with selector passes", func(t *testing.T) { + g := NewWithT(t) + g.Expect(guardSelectorAndReplicas(node, stsForReplace("rev-1", "rev-1"))).To(Succeed()) + }) +} + +// ownedPods returns only pods that carry an ownerReference to the StatefulSet, +// even when a label-matching but unowned pod shares the selector. +func TestReplacePod_OwnedPods_FiltersByOwnership(t *testing.T) { + g := NewWithT(t) + node := replacePodNode() + sts := stsForReplace("old-rev", "new-rev") + + owned := podForReplace("old-rev", false) + owned.UID = ownedPodUID + unowned := podForReplace("old-rev", false) + unowned.Name = testReplaceSTS + "-imposter" + unowned.UID = unownedPodUID + unowned.OwnerReferences = nil // matches selector labels but not owned + + cfg := replacePodCfg(t, node, sts, owned, unowned) + exec := newReplacePodExecRaw(t, cfg) + + pods, err := exec.ownedPods(context.Background(), node, sts) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(pods).To(HaveLen(1)) + g.Expect(pods[0].UID).To(Equal(ownedPodUID)) +} + +// deletePod tolerates an already-absent pod (NotFound) so the task is +// idempotent across reconciles. +func TestReplacePod_DeletePod_NotFoundTolerant(t *testing.T) { + g := NewWithT(t) + node := replacePodNode() + cfg := replacePodCfg(t, node) + exec := newReplacePodExecRaw(t, cfg) + + ghost := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "gone", Namespace: testReplaceNs}} + g.Expect(exec.deletePod(context.Background(), ghost)).To(Succeed()) +} + // Stale-revision pod present → task deletes it and completes. func TestReplacePod_StalePod_DeletesAndCompletes(t *testing.T) { g := NewWithT(t)