Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/v2/types_firewall.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ const (
FirewallDistanceConfigured ConditionType = "Distance"
// FirewallProvisioned indicates that all health conditions have been met at least once.
// Once set to true, it stays true and is used to detect condition degradation.
FirewallHealthy ConditionType = "Healthy"
FirewallProvisioned ConditionType = "Provisioned"
)

// ShootAccess contains secret references to construct a shoot client in the firewall-controller to update its firewall monitor.
Expand Down
5 changes: 0 additions & 5 deletions controllers/firewall/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@ func (c *controller) Reconcile(r *controllers.Ctx[*v2.Firewall]) error {
}

SetFirewallStatusFromMonitor(r.Target, mon)

if isAllConditionsMet(r.Target) {
cond := v2.NewCondition(v2.FirewallHealthy, v2.ConditionTrue, "Healthy", "All firewall conditions have been met.")
r.Target.Status.Conditions.Set(cond)
}
}()

fws, err := c.firewallCache.Get(r.Ctx, r.Target)
Expand Down
12 changes: 11 additions & 1 deletion controllers/firewall/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ func SetFirewallStatusFromMonitor(fw *v2.Firewall, mon *v2.FirewallMonitor) {
cond = v2.NewCondition(v2.FirewallDistanceConfigured, v2.ConditionTrue, "NotChecking", "Not checking distance due to firewall annotation.")
fw.Status.Conditions.Set(cond)

if isProvisioned(fw) {
cond := v2.NewCondition(v2.FirewallProvisioned, v2.ConditionTrue, "Provisioned", "All firewall conditions have been met.")
fw.Status.Conditions.Set(cond)
}

return
}

Expand Down Expand Up @@ -191,9 +196,14 @@ func SetFirewallStatusFromMonitor(fw *v2.Firewall, mon *v2.FirewallMonitor) {
cond := v2.NewCondition(v2.FirewallDistanceConfigured, v2.ConditionFalse, "NotConfigured", fmt.Sprintf("Controller has configured distance %d, but %d is specified.", connection.ActualDistance, fw.Distance))
fw.Status.Conditions.Set(cond)
}

if isProvisioned(fw) {
cond := v2.NewCondition(v2.FirewallProvisioned, v2.ConditionTrue, "Provisioned", "All firewall conditions have been met.")
fw.Status.Conditions.Set(cond)
}
}

func isAllConditionsMet(fw *v2.Firewall) bool {
func isProvisioned(fw *v2.Firewall) bool {
for _, ct := range []v2.ConditionType{
v2.FirewallCreated,
v2.FirewallReady,
Expand Down
8 changes: 3 additions & 5 deletions controllers/set/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,13 @@ func (c *controller) deleteFirewalls(r *controllers.Ctx[*v2.FirewallSet], fws ..
}
func (c *controller) deleteIfUnhealthyOrTimeout(r *controllers.Ctx[*v2.FirewallSet], fws ...*v2.Firewall) ([]*v2.Firewall, error) {
var result []*v2.Firewall
createTimeout := c.c.GetCreateTimeout()
healthTimeout := c.c.GetFirewallHealthTimeout()

for _, fw := range fws {
status := c.evaluateFirewallConditions(fw)

switch {
case (createTimeout > 0 && status.CreateTimeout) || (healthTimeout > 0 && status.HealthTimeout):
r.Log.Info("firewall health or creation timeout exceeded, deleting from set", "firewall-name", fw.Name)
switch status {
case statusCreateTimeout, statusHealthTimeout:
r.Log.Info("firewall creation or health timeout exceeded, deleting from set", "firewall-name", fw.Name)

err := c.deleteFirewalls(r, fw)
if err != nil {
Expand Down
148 changes: 92 additions & 56 deletions controllers/set/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,59 +8,83 @@ import (
"github.com/metal-stack/metal-lib/pkg/pointer"
)

type firewallConditionStatus struct {
IsReady bool
CreateTimeout bool
HealthTimeout bool
}
type status string

func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) firewallConditionStatus {
var (
unhealthyTimeout = c.c.GetFirewallHealthTimeout()
allocationTimeout = c.c.GetCreateTimeout()

created = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue
ready = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue
connected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue
seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue
distanceConfigured = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue
allConditionsMet = created && ready && connected && seedConnected && distanceConfigured

seedUpdatedTime = pointer.SafeDeref(fw.Status.ControllerStatus).SeedUpdated.Time
timeSinceReconcile = time.Since(seedUpdatedTime)
allocationTime = pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time
)

if allConditionsMet {
return firewallConditionStatus{IsReady: true}
}
const (
statusReady status = "ready"
statusProgressing status = "progressing"
statusUnhealthy status = "unhealthy"
statusHealthTimeout status = "health-timeout"
statusCreateTimeout status = "create-timeout"
)

func (c *controller) evaluateFirewallConditions(fw *v2.Firewall) status {
switch fw.Status.Phase {
case v2.FirewallPhaseCreating, v2.FirewallPhaseCrashing:
var (
createTimeout = c.c.GetCreateTimeout()
provisioned = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallProvisioned)).Status == v2.ConditionTrue
)

// duration after which a firewall in the creation phase will be recreated, exceeded
if allocationTimeout > 0 && fw.Status.Phase == v2.FirewallPhaseCreating && !allocationTime.IsZero() {
if time.Since(allocationTime) > allocationTimeout {
c.log.Info("create timeout exceeded", "firewall-name", fw.Name, "allocated-at", allocationTime.String(), "timeout-after", allocationTimeout.String())
return firewallConditionStatus{CreateTimeout: true}
if provisioned {
return statusReady
}
}
// Only apply health timeout once we have a non-zero seed reconcile timestamp.
if (!ready || !seedConnected || !connected) && unhealthyTimeout > 0 && created && !seedUpdatedTime.IsZero() && timeSinceReconcile > unhealthyTimeout {
c.log.Info("health timeout exceeded", "firewall-name", fw.Name, "last-reconciled-at", seedUpdatedTime.String(), "timeout-after", unhealthyTimeout.String())
return firewallConditionStatus{HealthTimeout: true}
}
// Firewall was healthy at one point (all conditions were met), but then one of the monitor conditions
// degraded so the firewall is unhealthy. Only check monitor conditions (connected, seedConnected, distanceConfigured)
// because the ready condition degradation is already handled by the time-based health timeout above.
wasHealthy := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallHealthy)).Status == v2.ConditionTrue
monitorConditionsDegraded := !connected || !seedConnected || !distanceConfigured
if monitorConditionsDegraded && wasHealthy && unhealthyTimeout > 0 {
c.log.Info("firewall monitor conditions degraded", "firewall-name", fw.Name)
return firewallConditionStatus{HealthTimeout: true}
}
//if everything returns false, it is progressing
return firewallConditionStatus{
IsReady: allConditionsMet,
CreateTimeout: false,
HealthTimeout: false,

if createTimeout > 0 {
createTimeout := c.c.GetCreateTimeout()

if ok := checkForTimeout(fw, v2.FirewallReady, createTimeout); ok {
c.log.Info("create timeout exceeded, firewall not provisioned in time", "firewall-name", fw.Name, "timeout-after", createTimeout.String())
return statusCreateTimeout
}
}

return statusProgressing

case v2.FirewallPhaseRunning:
fallthrough

default:
var (
created = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue
ready = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue
provisioned = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallProvisioned)).Status == v2.ConditionTrue
connected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue
seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue
distanceConfigured = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue

allConditionsMet = created && ready && provisioned && connected && seedConnected && distanceConfigured
)

if allConditionsMet {
return statusReady
}

if provisioned {
healthTimeout := c.c.GetFirewallHealthTimeout()

switch {
case !seedConnected:
if ok := checkForTimeout(fw, v2.FirewallControllerSeedConnected, healthTimeout); ok {
c.log.Info("health timeout exceeded, seed connection lost", "firewall-name", fw.Name, "timeout-after", healthTimeout.String())
return statusHealthTimeout
}

case !connected:
if ok := checkForTimeout(fw, v2.FirewallControllerConnected, healthTimeout); ok {
c.log.Info("health timeout exceeded, firewall monitor not reconciled anymore by controller", "firewall-name", fw.Name, "timeout-after", healthTimeout.String())
return statusHealthTimeout
}

case !ready:
if ok := checkForTimeout(fw, v2.FirewallReady, healthTimeout); ok {
c.log.Info("health timeout exceeded, firewall is not ready from perspective of the metal-api", "firewall-name", fw.Name, "timeout-after", healthTimeout.String())
return statusHealthTimeout
}
}
}

return statusUnhealthy
}
}

Expand All @@ -73,17 +97,19 @@ func (c *controller) setStatus(r *controllers.Ctx[*v2.FirewallSet], ownedFirewal
for _, fw := range ownedFirewalls {
statusReport := c.evaluateFirewallConditions(fw)

switch {
case statusReport.IsReady:
switch statusReport {
case statusReady:
r.Target.Status.ReadyReplicas++
continue

case statusReport.CreateTimeout || statusReport.HealthTimeout:
case statusProgressing:
r.Target.Status.ProgressingReplicas++
continue
case statusUnhealthy, statusCreateTimeout, statusHealthTimeout:
fallthrough
default:
r.Target.Status.UnhealthyReplicas++
continue
}

r.Target.Status.ProgressingReplicas++
}

revision, err := controllers.Revision(r.Target)
Expand All @@ -94,3 +120,13 @@ func (c *controller) setStatus(r *controllers.Ctx[*v2.FirewallSet], ownedFirewal

return nil
}

func checkForTimeout(fw *v2.Firewall, condition v2.ConditionType, timeout time.Duration) bool {
if timeout == 0 {
return false
}

cond := pointer.SafeDeref(fw.Status.Conditions.Get(condition))

return time.Since(cond.LastTransitionTime.Time) > timeout
}
Loading