Skip to content

Commit a05d39a

Browse files
committed
mma: fix todos related to is-leaseholder assertions
The existing assertions were valid, but needed better commentary. Also added a new assertion. Epic: CRDB-55052 Release note: None
1 parent ff23cba commit a05d39a

File tree

1 file changed

+25
-20
lines changed

1 file changed

+25
-20
lines changed

pkg/kv/kvserver/allocator/mmaprototype/cluster_state_rebalance_stores.go

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -524,8 +524,12 @@ func (re *rebalanceEnv) rebalanceLeasesFromLocalStoreID(
524524
store.StoreID, store.dimSummary[CPURate], overloadSlow)
525525
// This store is local, and cpu overloaded. Shed leases first.
526526
//
527-
// NB: any ranges at this store that don't have pending changes must
528-
// have this local store as the leaseholder.
527+
// NB: due to REQUIREMENT(change-computation), the top-k ranges for ss
528+
// reflect the latest adjusted state, including pending changes. Thus,
529+
// this store must be a replica and the leaseholder, which is asserted
530+
// below. The code below additionally ignores the range if it has pending
531+
// changes, which while not necessary for the is-leaseholder assertion,
532+
// makes the case where we assert even narrower.
529533
topKRanges := ss.adjusted.topKRanges[localStoreID]
530534
var leaseTransferCount int
531535
n := topKRanges.len()
@@ -548,37 +552,38 @@ func (re *rebalanceEnv) rebalanceLeasesFromLocalStoreID(
548552
log.KvDistribution.VEventf(ctx, 2, "skipping r%d: has pending changes", rangeID)
549553
continue
550554
}
555+
foundLocalReplica := false
551556
for _, repl := range rstate.replicas {
552557
if repl.StoreID != localStoreID { // NB: localStoreID == ss.StoreID == store.StoreID
553558
continue
554559
}
555560
if !repl.IsLeaseholder {
556-
// TODO(tbg): is this true? Can't there be ranges with replicas on
557-
// multiple local stores, and wouldn't this assertion fire in that
558-
// case once rebalanceStores is invoked on whichever of the two
559-
// stores doesn't hold the lease?
560-
//
561-
// TODO(tbg): see also the other assertion below (leaseholderID !=
562-
// store.StoreID) which seems similar to this one.
563-
log.KvDistribution.Fatalf(ctx, "internal state inconsistency: replica considered for lease shedding has no pending"+
564-
" changes but is not leaseholder: %+v", rstate)
561+
// NB: due to REQUIREMENT(change-computation), the top-k
562+
// ranges for ss reflect the latest adjusted state, including
563+
// pending changes. Thus, this store must be a replica and the
564+
// leaseholder, hence this assertion, and other assertions
565+
// below. Additionally, the code above ignored the range if it
566+
// has pending changes, which while not necessary for the
567+
// is-leaseholder assertion, makes the case where we assert
568+
// even narrower.
569+
log.KvDistribution.Fatalf(ctx,
570+
"internal state inconsistency: replica considered for lease shedding has no pending"+
571+
" changes but is not leaseholder: %+v", rstate)
565572
}
573+
foundLocalReplica = true
574+
break
575+
}
576+
if !foundLocalReplica {
577+
log.KvDistribution.Fatalf(
578+
ctx, "internal state inconsistency: local store is not a replica: %+v", rstate)
566579
}
567580
if re.now.Sub(rstate.lastFailedChange) < re.lastFailedChangeDelayDuration {
568581
log.KvDistribution.VEventf(ctx, 2, "skipping r%d: too soon after failed change", rangeID)
569582
continue
570583
}
571584
re.ensureAnalyzedConstraints(rstate)
572585
if rstate.constraints.leaseholderID != store.StoreID {
573-
// We should not panic here since the leaseQueue may have shed the
574-
// lease and informed MMA, since the last time MMA computed the
575-
// top-k ranges. This is useful for debugging in the prototype, due
576-
// to the lack of unit tests.
577-
//
578-
// TODO(tbg): can the above scenario currently happen? ComputeChanges
579-
// first processes the leaseholder message and then, still under the
580-
// lock, immediately calls into rebalanceStores (i.e. this store).
581-
// Doesn't this mean that the leaseholder view is up to date?
586+
// See the earlier comment about assertions.
582587
panic(fmt.Sprintf("internal state inconsistency: "+
583588
"store=%v range_id=%v should be leaseholder but isn't",
584589
store.StoreID, rangeID))

0 commit comments

Comments
 (0)