Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 35 additions & 15 deletions api/v1alpha1/seinodetask_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (

// SeiNodeTaskKind discriminates the SeiNodeTask spec union. Exactly one of
// the matching payload sub-structs in SeiNodeTaskSpec must be set.
// +kubebuilder:validation:Enum=GovSoftwareUpgrade;GovVote;AwaitCondition;UpdateNodeImage;AwaitNodesAtHeight;DiscoverPeers;RestartPod
// +kubebuilder:validation:Enum=GovSoftwareUpgrade;GovVote;AwaitCondition;UpdateNodeImage;AwaitNodesAtHeight;DiscoverPeers;RestartPod;MarkReady
type SeiNodeTaskKind string

const (
Expand Down Expand Up @@ -40,9 +40,9 @@ const (

// SeiNodeTaskKindDiscoverPeers backs the sidecar `discover-peers` task.
// Re-resolves the target's spec.peers and writes persistent-peers into the
// on-disk config.toml (scalar merge). Disk-only: a running seid does not
// re-read config.toml, so compose with kind=RestartPod to apply. The two are
// not atomic — a DiscoverPeers success followed by a RestartPod failure
// on-disk config.toml (scalar merge). Disk-only: a running seid reads
// config.toml at startup, so compose with kind=RestartPod to apply. The two
// run independently — a DiscoverPeers success followed by a RestartPod failure
// leaves config.toml ahead of the running peer set until the next restart.
SeiNodeTaskKindDiscoverPeers SeiNodeTaskKind = "DiscoverPeers"

Expand All @@ -54,6 +54,17 @@ const (
// The pod to delete is caller-supplied via spec.restartPod.podUID; a UID that
// no longer matches the live pod completes as a no-op (see PodUID).
SeiNodeTaskKindRestartPod SeiNodeTaskKind = "RestartPod"

// SeiNodeTaskKindMarkReady backs the sidecar `mark-ready` task (fire-and-forget).
// Re-marks sidecar readiness so /v0/healthz returns 200, which unblocks the seid
// start-gate and the proxy readiness probe. Useful after a readiness-blind restart
// or image rollout to promote a parked node promptly, rather than waiting for the
// node controller's reapproval poll. Empty payload — nothing to parameterize.
//
// Completion is the submit ack: the task reports Complete once the sidecar
// accepts the request, a beat before /v0/healthz serves 200. Gate on the node
// actually serving with a following AwaitCondition/AwaitNodesAtHeight step.
SeiNodeTaskKindMarkReady SeiNodeTaskKind = "MarkReady"
)

// SeiNodeTaskPhase is the high-level lifecycle state of a SeiNodeTask.
Expand Down Expand Up @@ -99,7 +110,7 @@ const (
// Field names locked at v1alpha1 — see docs/design/seinode-task-lld.md
// (PR sei-protocol/sei-k8s-controller#277).
//
// +kubebuilder:validation:XValidation:rule="(has(self.govSoftwareUpgrade) ? 1 : 0) + (has(self.govVote) ? 1 : 0) + (has(self.awaitCondition) ? 1 : 0) + (has(self.updateNodeImage) ? 1 : 0) + (has(self.awaitNodesAtHeight) ? 1 : 0) + (has(self.discoverPeers) ? 1 : 0) + (has(self.restartPod) ? 1 : 0) == 1",message="exactly one of govSoftwareUpgrade, govVote, awaitCondition, updateNodeImage, awaitNodesAtHeight, discoverPeers, or restartPod must be set"
// +kubebuilder:validation:XValidation:rule="(has(self.govSoftwareUpgrade) ? 1 : 0) + (has(self.govVote) ? 1 : 0) + (has(self.awaitCondition) ? 1 : 0) + (has(self.updateNodeImage) ? 1 : 0) + (has(self.awaitNodesAtHeight) ? 1 : 0) + (has(self.discoverPeers) ? 1 : 0) + (has(self.restartPod) ? 1 : 0) + (has(self.markReady) ? 1 : 0) == 1",message="exactly one of govSoftwareUpgrade, govVote, awaitCondition, updateNodeImage, awaitNodesAtHeight, discoverPeers, restartPod, or markReady must be set"
// +kubebuilder:validation:XValidation:rule="self.kind != 'GovSoftwareUpgrade' || has(self.govSoftwareUpgrade)",message="spec.govSoftwareUpgrade is required when kind=GovSoftwareUpgrade"
// +kubebuilder:validation:XValidation:rule="self.kind != 'GovVote' || has(self.govVote)",message="spec.govVote is required when kind=GovVote"
// +kubebuilder:validation:XValidation:rule="self.kind != 'AwaitCondition' || has(self.awaitCondition)",message="spec.awaitCondition is required when kind=AwaitCondition"
Expand All @@ -108,6 +119,7 @@ const (
// +kubebuilder:validation:XValidation:rule="self.kind != 'DiscoverPeers' || has(self.discoverPeers)",message="spec.discoverPeers is required when kind=DiscoverPeers"
// +kubebuilder:validation:XValidation:rule="self.kind != 'RestartPod' || has(self.restartPod)",message="spec.restartPod is required when kind=RestartPod"
// +kubebuilder:validation:XValidation:rule="self.kind != 'RestartPod' || (has(self.restartPod) && size(self.restartPod.podUID) > 0)",message="spec.restartPod.podUID is required when kind=RestartPod"
// +kubebuilder:validation:XValidation:rule="self.kind != 'MarkReady' || has(self.markReady)",message="spec.markReady is required when kind=MarkReady"
// +kubebuilder:validation:XValidation:rule="self.kind == oldSelf.kind",message="spec.kind is immutable"
type SeiNodeTaskSpec struct {
// Kind selects the task implementation. Immutable after creation.
Expand Down Expand Up @@ -158,6 +170,10 @@ type SeiNodeTaskSpec struct {
// RestartPod is the payload for kind=RestartPod.
// +optional
RestartPod *RestartPodPayload `json:"restartPod,omitempty"`

// MarkReady is the payload for kind=MarkReady.
// +optional
MarkReady *MarkReadyPayload `json:"markReady,omitempty"`
}

// SeiNodeTaskTarget identifies the single SeiNode this task operates on.
Expand Down Expand Up @@ -346,13 +362,11 @@ type AwaitNodesAtHeightPayload struct {
// DiscoverPeersPayload is the payload for kind=DiscoverPeers. It is empty: the
// task re-resolves the target SeiNode's current spec.peers (ec2Tags, static,
// and label sources) and writes persistent-peers into the on-disk config.toml
// via the sidecar discover-peers task. There is nothing to parameterize — the
// peer sources are fully determined by the target's spec/status. Fields would
// only be added here if a future feature needs to override the target's
// declared peers (not in scope).
// via the sidecar discover-peers task. It is empty: the peer sources are fully
// determined by the target's spec/status, so there is nothing to parameterize.
//
// Writes config.toml only; the running seid does not pick up the new peers
// until a restart. Compose with kind=RestartPod to apply. See the
// Writes config.toml only; the running seid picks up the new peers on its next
// restart. Compose with kind=RestartPod to apply. See the
// SeiNodeTaskKindDiscoverPeers doc comment for the sequencing and atomicity
// caveats.
type DiscoverPeersPayload struct{}
Expand All @@ -370,14 +384,20 @@ type RestartPodPayload struct {
// a different UID appears. Content-addressed (UID, not creationTimestamp) so
// the OnDelete replacement is unambiguously distinguished from the original.
//
// The caller owns UID correctness: a non-empty UID that no longer matches the
// live pod (e.g. the pod was recreated out-of-band after it was read) deletes
// nothing and completes immediately as a no-op. Fetch the UID as late as
// possible — the controller does not re-validate it against the live pod.
// The caller owns UID correctness: the controller uses this UID verbatim, so a
// UID that no longer matches the live pod (e.g. the pod was recreated
// out-of-band after it was read) completes immediately as a no-op. Fetch the
// UID as late as possible.
// +kubebuilder:validation:MinLength=1
PodUID string `json:"podUID"`
}

// MarkReadyPayload is the payload for kind=MarkReady. It is empty: the sidecar
// mark-ready task (sidecar.MarkReadyTask) takes no inputs — it re-marks the
// target sidecar's in-process readiness flag so /v0/healthz serves 200. See the
// SeiNodeTaskKindMarkReady doc comment for the use case.
type MarkReadyPayload struct{}

// ---------------------------------------------------------------------------
// Status
// ---------------------------------------------------------------------------
Expand Down
20 changes: 20 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 14 additions & 7 deletions config/crd/sei.io_seinodetasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,11 @@ spec:
- AwaitNodesAtHeight
- DiscoverPeers
- RestartPod
- MarkReady
type: string
markReady:
description: MarkReady is the payload for kind=MarkReady.
type: object
restartPod:
description: RestartPod is the payload for kind=RestartPod.
properties:
Expand All @@ -264,10 +268,10 @@ spec:
a different UID appears. Content-addressed (UID, not creationTimestamp) so
the OnDelete replacement is unambiguously distinguished from the original.

The caller owns UID correctness: a non-empty UID that no longer matches the
live pod (e.g. the pod was recreated out-of-band after it was read) deletes
nothing and completes immediately as a no-op. Fetch the UID as late as
possible — the controller does not re-validate it against the live pod.
The caller owns UID correctness: the controller uses this UID verbatim, so a
UID that no longer matches the live pod (e.g. the pod was recreated
out-of-band after it was read) completes immediately as a no-op. Fetch the
UID as late as possible.
minLength: 1
type: string
required:
Expand Down Expand Up @@ -343,12 +347,13 @@ spec:
type: object
x-kubernetes-validations:
- message: exactly one of govSoftwareUpgrade, govVote, awaitCondition,
updateNodeImage, awaitNodesAtHeight, discoverPeers, or restartPod
must be set
updateNodeImage, awaitNodesAtHeight, discoverPeers, restartPod, or
markReady must be set
rule: '(has(self.govSoftwareUpgrade) ? 1 : 0) + (has(self.govVote) ?
1 : 0) + (has(self.awaitCondition) ? 1 : 0) + (has(self.updateNodeImage)
? 1 : 0) + (has(self.awaitNodesAtHeight) ? 1 : 0) + (has(self.discoverPeers)
? 1 : 0) + (has(self.restartPod) ? 1 : 0) == 1'
? 1 : 0) + (has(self.restartPod) ? 1 : 0) + (has(self.markReady) ?
1 : 0) == 1'
- message: spec.govSoftwareUpgrade is required when kind=GovSoftwareUpgrade
rule: self.kind != 'GovSoftwareUpgrade' || has(self.govSoftwareUpgrade)
- message: spec.govVote is required when kind=GovVote
Expand All @@ -366,6 +371,8 @@ spec:
- message: spec.restartPod.podUID is required when kind=RestartPod
rule: self.kind != 'RestartPod' || (has(self.restartPod) && size(self.restartPod.podUID)
> 0)
- message: spec.markReady is required when kind=MarkReady
rule: self.kind != 'MarkReady' || has(self.markReady)
- message: spec.kind is immutable
rule: self.kind == oldSelf.kind
status:
Expand Down
15 changes: 8 additions & 7 deletions internal/controller/nodetask/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ const (
// operator sets spec.timeoutSeconds).
defaultRestartPodTimeout = 10 * time.Minute
defaultDiscoverPeersTimeout = 2 * time.Minute
defaultMarkReadyTimeout = 2 * time.Minute
)

// resultRequeueImmediate mirrors planner.ResultRequeueImmediate without
Expand Down Expand Up @@ -330,6 +331,8 @@ func effectiveTimeout(cr *seiv1alpha1.SeiNodeTask) time.Duration {
return defaultRestartPodTimeout
case seiv1alpha1.SeiNodeTaskKindDiscoverPeers:
return defaultDiscoverPeersTimeout
case seiv1alpha1.SeiNodeTaskKindMarkReady:
return defaultMarkReadyTimeout
default:
return 0
}
Expand Down Expand Up @@ -359,13 +362,11 @@ func taskParamsForKind(cr *seiv1alpha1.SeiNodeTask, target *seiv1alpha1.SeiNode)
// populateOutputs stamps the typed per-kind outputs on Complete.
//
// Sidecar-backed kinds (GovVote, GovSoftwareUpgrade, AwaitCondition,
// AwaitNodesAtHeight) intentionally do NOT populate status.outputs in this
// PR. The sidecar's TaskResult shape carries the values, but extracting
// them would require structural changes on the sidecar side (typed
// per-task result payloads). We defer that work and leave the typed
// output fields on the CRD unset and forward-compatible. Downstream
// consumers coordinate via chain queries (chain-as-medium), not
// task-to-task currying. See conversation history / PR 3 scope notes.
// AwaitNodesAtHeight) leave their typed output fields unset for now: surfacing
// the values the sidecar's TaskResult carries needs typed per-task result
// payloads on the sidecar side, which is deferred. The CRD fields stay
// forward-compatible, and downstream consumers coordinate via chain queries
// (chain-as-medium) rather than task-to-task currying.
func populateOutputs(cr *seiv1alpha1.SeiNodeTask, target *seiv1alpha1.SeiNode) {
switch cr.Spec.Kind {
case seiv1alpha1.SeiNodeTaskKindUpdateNodeImage:
Expand Down
56 changes: 56 additions & 0 deletions internal/controller/nodetask/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ type fakeSidecarClient struct {
mu sync.Mutex
submitted []sidecar.TaskRequest
results map[uuid.UUID]*sidecar.TaskResult
getCalls int
}

func newFakeSidecarClient() *fakeSidecarClient {
Expand All @@ -254,6 +255,7 @@ func (f *fakeSidecarClient) SubmitTask(_ context.Context, req sidecar.TaskReques
func (f *fakeSidecarClient) GetTask(_ context.Context, id uuid.UUID) (*sidecar.TaskResult, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.getCalls++
if r, ok := f.results[id]; ok {
return r, nil
}
Expand Down Expand Up @@ -907,6 +909,60 @@ func TestReconcile_DiscoverPeers_LongTargetWait_DoesNotImmediatelyTimeOut(t *tes
g.Expect(getTask(t, ctx, c).Status.Phase).To(Equal(seiv1alpha1.SeiNodeTaskPhaseComplete))
}

// ---------------------------------------------------------------------------
// MarkReady
// ---------------------------------------------------------------------------

func newMarkReadyTask() *seiv1alpha1.SeiNodeTask {
return &seiv1alpha1.SeiNodeTask{
ObjectMeta: metav1.ObjectMeta{Name: testTaskName, Namespace: testNS, UID: "task-uid-markready", Generation: 1},
Spec: seiv1alpha1.SeiNodeTaskSpec{
Kind: seiv1alpha1.SeiNodeTaskKindMarkReady,
Target: seiv1alpha1.SeiNodeTaskTarget{
NodeRef: seiv1alpha1.SeiNodeTaskNodeRef{Name: testNodeName},
RequirePhase: seiv1alpha1.PhaseRunning,
},
MarkReady: &seiv1alpha1.MarkReadyPayload{},
},
}
}

// MarkReady is fire-and-forget (registered sidecarTask[...](true)): Execute
// completes the task the moment SubmitTask is acked, so Complete means "the
// mark-ready request was accepted" and lands at the submit reconcile, with
// Status reading back the cached terminal state. The test pins that contract:
// Complete at R2, and getCalls==0 confirms the completion served from cache.
func TestReconcile_MarkReady_EndToEnd(t *testing.T) {
g := NewWithT(t)
ctx := context.Background()
t0 := time.Now()
cr := newMarkReadyTask()
node := newRunningNode()
fakeSC := newFakeSidecarClient()

r, c := newReconcilerWithSidecar(t, t0, fakeSC, cr, node)

// R1: synthesize task.
_, err := r.Reconcile(ctx, req())
g.Expect(err).NotTo(HaveOccurred())
g.Expect(getTask(t, ctx, c).Status.Phase).To(Equal(seiv1alpha1.SeiNodeTaskPhaseRunning))

// R2: Execute submits mark-ready and completes immediately on the ack —
// no staged result, no further reconcile required.
_, err = r.Reconcile(ctx, req())
g.Expect(err).NotTo(HaveOccurred())
g.Expect(getTask(t, ctx, c).Status.Phase).To(Equal(seiv1alpha1.SeiNodeTaskPhaseComplete))

fakeSC.mu.Lock()
defer fakeSC.mu.Unlock()
g.Expect(fakeSC.submitted).To(HaveLen(1))
g.Expect(fakeSC.submitted[0].Type).To(Equal(sidecar.TaskTypeMarkReady))
// Completion served from the cached terminal state set at submit; getCalls==0
// pins the fire-and-forget contract (a sidecarTask[...](false) regression,
// which polls GetTask to terminal, would trip this).
g.Expect(fakeSC.getCalls).To(Equal(0))
}

// Execution-start timeout still fires: a DiscoverPeers task whose sidecar never
// completes Fails(Timeout) at executionStartedAt + default, confirming the
// budget is enforced (just from the right reference point).
Expand Down
35 changes: 35 additions & 0 deletions internal/controller/nodetask/envtest/cel_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,41 @@ func TestCEL_RestartPod_Accepted(t *testing.T) {
g.Expect(testCli.Create(testCtx, snt)).To(Succeed())
}

// MarkReady with its matching empty payload is accepted.
func TestCEL_MarkReady_Accepted(t *testing.T) {
g := NewWithT(t)
ns := makeNamespace(t)
snt := baseTask(ns, "markready-ok", seiv1alpha1.SeiNodeTaskKindMarkReady)
snt.Spec.MarkReady = &seiv1alpha1.MarkReadyPayload{}
g.Expect(testCli.Create(testCtx, snt)).To(Succeed())
}

// kind=MarkReady with NO payload is rejected (zero payloads / kind-required rule).
func TestCEL_MarkReady_NoPayload_Rejected(t *testing.T) {
g := NewWithT(t)
ns := makeNamespace(t)
snt := baseTask(ns, "markready-nopayload", seiv1alpha1.SeiNodeTaskKindMarkReady)
err := testCli.Create(testCtx, snt)
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(Or(
ContainSubstring("exactly one"),
ContainSubstring("markReady is required"),
))
}

// kind=MarkReady with a second payload (markReady + restartPod) is rejected by
// the exactly-one union rule.
func TestCEL_MarkReady_MultiplePayloads_Rejected(t *testing.T) {
g := NewWithT(t)
ns := makeNamespace(t)
snt := baseTask(ns, "markready-two-payloads", seiv1alpha1.SeiNodeTaskKindMarkReady)
snt.Spec.MarkReady = &seiv1alpha1.MarkReadyPayload{}
snt.Spec.RestartPod = &seiv1alpha1.RestartPodPayload{PodUID: "pod-uid-1"}
err := testCli.Create(testCtx, snt)
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("exactly one"))
}

// kind=DiscoverPeers with NO payload is rejected (zero payloads).
func TestCEL_DiscoverPeers_NoPayload_Rejected(t *testing.T) {
g := NewWithT(t)
Expand Down
Loading
Loading