Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 25 additions & 38 deletions api/v1alpha1/seinodetask_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (

// SeiNodeTaskKind discriminates the SeiNodeTask spec union. Exactly one of
// the matching payload sub-structs in SeiNodeTaskSpec must be set.
// +kubebuilder:validation:Enum=GovSoftwareUpgrade;GovVote;AwaitCondition;UpdateNodeImage;AwaitNodesAtHeight;DiscoverPeers;RestartPod;MarkReady
// +kubebuilder:validation:Enum=GovSoftwareUpgrade;GovVote;AwaitCondition;UpdateNodeImage;AwaitNodesAtHeight;DiscoverPeers;RestartSeid;MarkReady
type SeiNodeTaskKind string

const (
Expand Down Expand Up @@ -41,19 +41,22 @@ const (
// SeiNodeTaskKindDiscoverPeers backs the sidecar `discover-peers` task.
// Re-resolves the target's spec.peers and writes persistent-peers into the
// on-disk config.toml (scalar merge). Disk-only: a running seid reads
// config.toml at startup, so compose with kind=RestartPod to apply. The two
// run independently — a DiscoverPeers success followed by a RestartPod failure
// config.toml at startup, so compose with kind=RestartSeid to apply. The two
// run independently — a DiscoverPeers success followed by a RestartSeid failure
// leaves config.toml ahead of the running peer set until the next restart.
SeiNodeTaskKindDiscoverPeers SeiNodeTaskKind = "DiscoverPeers"

// SeiNodeTaskKindRestartPod backs the controller-side `restart-pod` task.
// Deletes the target's single pod so the StatefulSet's OnDelete strategy
// recreates it and seid re-reads config.toml. Completes when a distinct new
// pod is Ready. Single-replica stop-then-start gated by the RWO data PVC, so
// double-sign-safe: the new pod cannot bind the PVC until the old terminates.
// The pod to delete is caller-supplied via spec.restartPod.podUID; a UID that
// no longer matches the live pod completes as a no-op (see PodUID).
SeiNodeTaskKindRestartPod SeiNodeTaskKind = "RestartPod"
// SeiNodeTaskKindRestartSeid backs the sidecar `restart-seid` task. Restarts
// seid in place — the sidecar SIGTERMs the co-located seid process and the
// kubelet restarts only that container — so seid re-reads config.toml WITHOUT
// bouncing the sidecar. Because the sidecar process never restarts, its
// in-process readiness flag survives and /v0/healthz stays 200, so there is no
// mark-ready reapproval gap (unlike a full pod restart). Empty payload — the
// target is identified from the SeiNode, no caller-supplied pod UID.
//
// Completion = seid's local RPC serving again, NOT caught-up/voting; gate
// height with a downstream AwaitNodesAtHeight. Supersedes RestartPod.
SeiNodeTaskKindRestartSeid SeiNodeTaskKind = "RestartSeid"

// SeiNodeTaskKindMarkReady backs the sidecar `mark-ready` task (fire-and-forget).
// Re-marks sidecar readiness so /v0/healthz returns 200, which unblocks the seid
Expand Down Expand Up @@ -110,16 +113,14 @@ const (
// Field names locked at v1alpha1 — see docs/design/seinode-task-lld.md
// (PR sei-protocol/sei-k8s-controller#277).
//
// +kubebuilder:validation:XValidation:rule="(has(self.govSoftwareUpgrade) ? 1 : 0) + (has(self.govVote) ? 1 : 0) + (has(self.awaitCondition) ? 1 : 0) + (has(self.updateNodeImage) ? 1 : 0) + (has(self.awaitNodesAtHeight) ? 1 : 0) + (has(self.discoverPeers) ? 1 : 0) + (has(self.restartPod) ? 1 : 0) + (has(self.markReady) ? 1 : 0) == 1",message="exactly one of govSoftwareUpgrade, govVote, awaitCondition, updateNodeImage, awaitNodesAtHeight, discoverPeers, restartPod, or markReady must be set"
// +kubebuilder:validation:XValidation:rule="(has(self.govSoftwareUpgrade) ? 1 : 0) + (has(self.govVote) ? 1 : 0) + (has(self.awaitCondition) ? 1 : 0) + (has(self.updateNodeImage) ? 1 : 0) + (has(self.awaitNodesAtHeight) ? 1 : 0) + (has(self.discoverPeers) ? 1 : 0) + (has(self.restartSeid) ? 1 : 0) + (has(self.markReady) ? 1 : 0) == 1",message="exactly one of govSoftwareUpgrade, govVote, awaitCondition, updateNodeImage, awaitNodesAtHeight, discoverPeers, restartSeid, or markReady must be set"
// +kubebuilder:validation:XValidation:rule="self.kind != 'GovSoftwareUpgrade' || has(self.govSoftwareUpgrade)",message="spec.govSoftwareUpgrade is required when kind=GovSoftwareUpgrade"
// +kubebuilder:validation:XValidation:rule="self.kind != 'GovVote' || has(self.govVote)",message="spec.govVote is required when kind=GovVote"
// +kubebuilder:validation:XValidation:rule="self.kind != 'AwaitCondition' || has(self.awaitCondition)",message="spec.awaitCondition is required when kind=AwaitCondition"
// +kubebuilder:validation:XValidation:rule="self.kind != 'UpdateNodeImage' || has(self.updateNodeImage)",message="spec.updateNodeImage is required when kind=UpdateNodeImage"
// +kubebuilder:validation:XValidation:rule="self.kind != 'AwaitNodesAtHeight' || has(self.awaitNodesAtHeight)",message="spec.awaitNodesAtHeight is required when kind=AwaitNodesAtHeight"
// +kubebuilder:validation:XValidation:rule="self.kind != 'DiscoverPeers' || has(self.discoverPeers)",message="spec.discoverPeers is required when kind=DiscoverPeers"
// +kubebuilder:validation:XValidation:rule="self.kind != 'RestartPod' || has(self.restartPod)",message="spec.restartPod is required when kind=RestartPod"
// +kubebuilder:validation:XValidation:rule="self.kind != 'RestartPod' || (has(self.restartPod) && size(self.restartPod.podUID) > 0)",message="spec.restartPod.podUID is required when kind=RestartPod"
// +kubebuilder:validation:XValidation:rule="self.kind != 'RestartPod' || self.restartPod.podUID == oldSelf.restartPod.podUID",message="spec.restartPod.podUID is immutable"
// +kubebuilder:validation:XValidation:rule="self.kind != 'RestartSeid' || has(self.restartSeid)",message="spec.restartSeid is required when kind=RestartSeid"
// +kubebuilder:validation:XValidation:rule="self.kind != 'MarkReady' || has(self.markReady)",message="spec.markReady is required when kind=MarkReady"
// +kubebuilder:validation:XValidation:rule="self.kind == oldSelf.kind",message="spec.kind is immutable"
type SeiNodeTaskSpec struct {
Expand Down Expand Up @@ -168,9 +169,9 @@ type SeiNodeTaskSpec struct {
// +optional
DiscoverPeers *DiscoverPeersPayload `json:"discoverPeers,omitempty"`

// RestartPod is the payload for kind=RestartPod.
// RestartSeid is the payload for kind=RestartSeid.
// +optional
RestartPod *RestartPodPayload `json:"restartPod,omitempty"`
RestartSeid *RestartSeidPayload `json:"restartSeid,omitempty"`

// MarkReady is the payload for kind=MarkReady.
// +optional
Expand Down Expand Up @@ -367,31 +368,17 @@ type AwaitNodesAtHeightPayload struct {
// determined by the target's spec/status, so there is nothing to parameterize.
//
// Writes config.toml only; the running seid picks up the new peers on its next
// restart. Compose with kind=RestartPod to apply. See the
// restart. Compose with kind=RestartSeid to apply. See the
// SeiNodeTaskKindDiscoverPeers doc comment for the sequencing and atomicity
// caveats.
type DiscoverPeersPayload struct{}

// RestartPodPayload is the payload for kind=RestartPod. The task deletes
// exactly the pod named by PodUID (delete → OnDelete recreate) so seid re-reads
// config.toml on start. See the SeiNodeTaskKindRestartPod doc comment for the
// completion signal and safety properties.
type RestartPodPayload struct {
// PodUID is the UID of the pod to restart, supplied by the caller. Obtain it
// immediately before creating the task; for the single-replica StatefulSet
// the pod is `<target.nodeRef.Name>-0`:
// kubectl get pod <node>-0 -o jsonpath='{.metadata.uid}'
// The task deletes exactly this pod and completes when an owned Ready pod with
// a different UID appears. Content-addressed (UID, not creationTimestamp) so
// the OnDelete replacement is unambiguously distinguished from the original.
//
// The caller owns UID correctness: the controller uses this UID verbatim, so a
// UID that no longer matches the live pod (e.g. the pod was recreated
// out-of-band after it was read) completes immediately as a no-op. Fetch the
// UID as late as possible.
// +kubebuilder:validation:MinLength=1
PodUID string `json:"podUID"`
}
// RestartSeidPayload is the payload for kind=RestartSeid. It is empty: the
// sidecar restart-seid task (sidecar.RestartSeidTask) takes no inputs — it
// SIGTERMs the co-located seid process so the kubelet restarts that container in
// place and seid re-reads config.toml. See the SeiNodeTaskKindRestartSeid doc
// comment for the completion signal and rationale.
type RestartSeidPayload struct{}

// MarkReadyPayload is the payload for kind=MarkReady. It is empty: the sidecar
// mark-ready task (sidecar.MarkReadyTask) takes no inputs — it re-marks the
Expand Down
14 changes: 7 additions & 7 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 8 additions & 32 deletions config/crd/sei.io_seinodetasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -249,33 +249,14 @@ spec:
- UpdateNodeImage
- AwaitNodesAtHeight
- DiscoverPeers
- RestartPod
- RestartSeid
- MarkReady
type: string
markReady:
description: MarkReady is the payload for kind=MarkReady.
type: object
restartPod:
description: RestartPod is the payload for kind=RestartPod.
properties:
podUID:
description: |-
PodUID is the UID of the pod to restart, supplied by the caller. Obtain it
immediately before creating the task; for the single-replica StatefulSet
the pod is `<target.nodeRef.Name>-0`:
kubectl get pod <node>-0 -o jsonpath='{.metadata.uid}'
The task deletes exactly this pod and completes when an owned Ready pod with
a different UID appears. Content-addressed (UID, not creationTimestamp) so
the OnDelete replacement is unambiguously distinguished from the original.

The caller owns UID correctness: the controller uses this UID verbatim, so a
UID that no longer matches the live pod (e.g. the pod was recreated
out-of-band after it was read) completes immediately as a no-op. Fetch the
UID as late as possible.
minLength: 1
type: string
required:
- podUID
restartSeid:
description: RestartSeid is the payload for kind=RestartSeid.
type: object
target:
description: |-
Expand Down Expand Up @@ -347,13 +328,13 @@ spec:
type: object
x-kubernetes-validations:
- message: exactly one of govSoftwareUpgrade, govVote, awaitCondition,
updateNodeImage, awaitNodesAtHeight, discoverPeers, restartPod, or
updateNodeImage, awaitNodesAtHeight, discoverPeers, restartSeid, or
markReady must be set
rule: '(has(self.govSoftwareUpgrade) ? 1 : 0) + (has(self.govVote) ?
1 : 0) + (has(self.awaitCondition) ? 1 : 0) + (has(self.updateNodeImage)
? 1 : 0) + (has(self.awaitNodesAtHeight) ? 1 : 0) + (has(self.discoverPeers)
? 1 : 0) + (has(self.restartPod) ? 1 : 0) + (has(self.markReady) ?
1 : 0) == 1'
? 1 : 0) + (has(self.restartSeid) ? 1 : 0) + (has(self.markReady)
? 1 : 0) == 1'
- message: spec.govSoftwareUpgrade is required when kind=GovSoftwareUpgrade
rule: self.kind != 'GovSoftwareUpgrade' || has(self.govSoftwareUpgrade)
- message: spec.govVote is required when kind=GovVote
Expand All @@ -366,13 +347,8 @@ spec:
rule: self.kind != 'AwaitNodesAtHeight' || has(self.awaitNodesAtHeight)
- message: spec.discoverPeers is required when kind=DiscoverPeers
rule: self.kind != 'DiscoverPeers' || has(self.discoverPeers)
- message: spec.restartPod is required when kind=RestartPod
rule: self.kind != 'RestartPod' || has(self.restartPod)
- message: spec.restartPod.podUID is required when kind=RestartPod
rule: self.kind != 'RestartPod' || (has(self.restartPod) && size(self.restartPod.podUID)
> 0)
- message: spec.restartPod.podUID is immutable
rule: self.kind != 'RestartPod' || self.restartPod.podUID == oldSelf.restartPod.podUID
- message: spec.restartSeid is required when kind=RestartSeid
rule: self.kind != 'RestartSeid' || has(self.restartSeid)
- message: spec.markReady is required when kind=MarkReady
rule: self.kind != 'MarkReady' || has(self.markReady)
- message: spec.kind is immutable
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ require (
github.com/google/uuid v1.6.0
github.com/onsi/gomega v1.39.1
github.com/sei-protocol/sei-config v0.0.19
github.com/sei-protocol/seictl v0.0.55
github.com/sei-protocol/seictl v0.0.56
github.com/urfave/cli/v3 v3.6.1
go.opentelemetry.io/otel v1.43.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1786,6 +1786,8 @@ github.com/sei-protocol/seictl v0.0.50 h1:zBOLIPI/G0oPsLV0DLlGnjCgckkyihOZ03llkF
github.com/sei-protocol/seictl v0.0.50/go.mod h1:yNPLcFKRTbKvsdKFuQseMHkkXTol7FXidnGKJa/bUXQ=
github.com/sei-protocol/seictl v0.0.55 h1:JZ15hoAS7ft3LL85SeYtkP3Gr/oMlEQnBjhefbDdiZ4=
github.com/sei-protocol/seictl v0.0.55/go.mod h1:sDWY/llzQPnblG/WS6uQ7vqDtshNQ0WJTJzRUgmfFpg=
github.com/sei-protocol/seictl v0.0.56 h1:zRCZdGiRrvP+zv/DYhmfP570MOR9nO6o9VWncABkJSo=
github.com/sei-protocol/seictl v0.0.56/go.mod h1:sDWY/llzQPnblG/WS6uQ7vqDtshNQ0WJTJzRUgmfFpg=
github.com/sei-protocol/seilog v0.0.3 h1:Zi7oWXdX5jv92dY8n482xH032LtNebC89Y+qYZlBn0Y=
github.com/sei-protocol/seilog v0.0.3/go.mod h1:CKg58wraWnB3gRxWQ0v1rIVr0gmDHjkfP1bM2giKFFU=
github.com/shirou/gopsutil v2.20.5+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
Expand Down
11 changes: 6 additions & 5 deletions internal/controller/nodetask/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@ const (
sidecarStatusTimeout = 15 * time.Second

// Per-kind execution-timeout defaults applied when spec.timeoutSeconds is 0.
// These bound kinds whose completion depends on a pod becoming Ready or a
// These bound kinds whose completion depends on seid coming back up or a
// quick disk write; sidecar-backed gov/await kinds stay unbounded (an
// operator sets spec.timeoutSeconds).
defaultRestartPodTimeout = 10 * time.Minute
// operator sets spec.timeoutSeconds). RestartSeid gets a generous 10m: the
// sidecar SIGTERMs seid (up to ~90s graceful), then polls seid's RPC back up.
defaultRestartSeidTimeout = 10 * time.Minute
defaultDiscoverPeersTimeout = 2 * time.Minute
defaultMarkReadyTimeout = 2 * time.Minute
)
Expand Down Expand Up @@ -320,8 +321,8 @@ func effectiveTimeout(cr *seiv1alpha1.SeiNodeTask) time.Duration {
return time.Duration(cr.Spec.TimeoutSeconds) * time.Second
}
switch cr.Spec.Kind {
case seiv1alpha1.SeiNodeTaskKindRestartPod:
return defaultRestartPodTimeout
case seiv1alpha1.SeiNodeTaskKindRestartSeid:
return defaultRestartSeidTimeout
case seiv1alpha1.SeiNodeTaskKindDiscoverPeers:
return defaultDiscoverPeersTimeout
case seiv1alpha1.SeiNodeTaskKindMarkReady:
Expand Down
Loading
Loading