Skip to content

Commit 1f2576b

Browse files
committed
mmaprototype: clean up comments
1 parent 3ec86c8 commit 1f2576b

File tree

2 files changed

+109
-101
lines changed

2 files changed

+109
-101
lines changed

pkg/kv/kvserver/allocator/mmaprototype/allocator_state.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -703,14 +703,11 @@ func sortTargetCandidateSetAndPick(
703703
return cands.candidates[j].StoreID
704704
}
705705

706-
// ensureAnalyzedConstraints populates the constraints for the given range.
707-
// It should be called when computing lease-transfers / rebalancing candidates
708-
// for a range.
706+
// ensureAnalyzedConstraints ensures that the constraints field of rangeState is
707+
// populated. It uses rangeState.{replicas,conf} as inputs to the computation.
709708
//
710-
// NB: given rstate.conf should already be normalized using
711-
// makeNormalizedSpanConfig. The caller is responsible for calling
712-
// clearAnalyzedConstraints when rstate or the rstate.constraints is no longer
713-
// needed.
709+
// NB: Caller is responsible for calling clearAnalyzedConstraints when rstate or
710+
// the rstate.constraints is no longer needed.
714711
func (cs *clusterState) ensureAnalyzedConstraints(rstate *rangeState) {
715712
if rstate.constraints != nil {
716713
return

pkg/kv/kvserver/allocator/mmaprototype/constraint.go

Lines changed: 105 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,20 @@ type analyzedConstraints struct {
669669
// slices are also empty.
670670
constraints []internedConstraintsConjunction
671671

672+
// satisfiedByReplica[kind][i] contains the set of storeIDs that satisfy
673+
// constraints[i]. These are for stores that satisfy at least one
674+
// constraint. Each store should appear exactly once in the 2D slice
675+
// ac.satisfiedByReplica[kind]. For example,
676+
// satisfiedByReplica[voterIndex][0] = [1, 2, 3], means that the first
677+
// constraint (constraints[0]) is satisfied by storeIDs 1, 2, and 3.
678+
//
679+
// NB: satisfiedByReplica[nonVoterIndex][i] is populated even if this
680+
// analyzedConstraints represents a voter constraint. This is done because
681+
// satisfiedByReplica[voterIndex][i] may not sufficiently satisfy a
682+
// constraint (or even if it does, at a later point one could be
683+
// decommissioning one of the voters), and populating the nonVoterIndex
684+
// allows MMA to decide to promote a non-voter.
685+
//
672686
// Overlapping conjunctions: There is nothing preventing overlapping
673687
// ConstraintsConjunctions such that the same store can satisfy multiple,
674688
// though we expect this to be uncommon. This is algorithmically painful
@@ -691,25 +705,30 @@ type analyzedConstraints struct {
691705
// greedy manner instead of considering all possibilities. So all the
692706
// satisfiedBy slices represent sets that are non-intersecting.
693707
//
694-
// satisfiedByReplica[kind][i] contains the set of storeIDs that satisfy
695-
// constraints[i]. Populated by initialize and used later by mmma to
696-
// determine candidates to satisfy constraints such as
697-
// candidatesToReplaceVoterForRebalance.
698-
// For example, satisfiedByReplica[voterIndex][0] = [1, 2, 3], means that
699-
// the first constraint (constraints[0]) is satisfied by storeIDs 1, 2, and
700-
// 3.
701-
//
702-
// NB: this does not mean the current replica set satisfies constraints[i]
703-
// since we may populate satisfiedByReplica[nonVoterIndex][i] for voter
704-
// constraints as well. This just means that the store can satisfy
705-
// constraints[i] regardless of the replica type.
708+
// Example on why ordering from most strict to least strict is important:
709+
// If the constraints were [region=a,zone=a1:1],[region=a:1] (which is
710+
// sorted in decreasing strictness), a voter store in (region=a,zone=a1)
711+
// would satisfy both, but it would be present only in
712+
// satisfiedByReplica[voterIndex][i] for i=0, not i=1. If the user specified
713+
// the constraints in irregular order, [region=a]:1,[region=a,zone=a1]:1,
714+
// the voter would only be present in satisfiedByReplica[voterIndex][0] as
715+
// well (which now refers to the relaxed constraint), and as a consequence
716+
// we would consider (region=a,zone=a1) unfulfilled by this voter. If the
717+
// full constraints conjunction had been [region=a]:1, [region=a,zone=a1]:1,
718+
// [region=a,zone=a2]:1, and there were voters in that region in zones a1,
719+
// a2, and a3, we could end up satisfying [region=a]:1 with the voter in a1,
720+
// but then wouldn't consider it for the stricter constraint
721+
// [region=a,zone=a1]:1 which no other voter can satisfy. The constraints
722+
// would then appear unsatisfiable to the allocator.
723+
706724
satisfiedByReplica [numReplicaKinds][][]roachpb.StoreID
707725

708-
// These are stores that satisfy no constraint. Even though we are strict
709-
// about constraint satisfaction, this can happen if the SpanConfig changed
710-
// or the attributes of a store changed. Additionally, if these
711-
// analyzedConstraints correspond to voterConstraints, there can be
712-
// non-voters here (which is harmless).
726+
// ac.satisfiedNoConstraintReplica[kind] contains the set of storeIDs that
727+
// satisfy no constraint. Even though we are strict about constraint
728+
// satisfaction, this can happen if the SpanConfig changed or the attributes
729+
// of a store changed. Note that if these analyzedConstraints correspond to
730+
// voterConstraints, there can be non-voters here which is never used but
731+
// harmless.
713732
satisfiedNoConstraintReplica [numReplicaKinds][]roachpb.StoreID
714733
}
715734

@@ -744,8 +763,12 @@ func clear2DSlice[T any](v [][]T) [][]T {
744763
return v
745764
}
746765

747-
// rangeAnalyzedConstraints is a function of the spanConfig and the current
748-
// stores that have replicas for that range (including the ReplicaType).
766+
// rangeAnalyzedConstraints represents the analyzed constraint state for a
767+
// range, derived from the range's span config and current replica placement
768+
// (including ReplicaType). It contains information used to handle constraint
769+
// satisfaction as part of allocation decisions (e.g. helping identify candidate
770+
// stores for range rebalancing, lease transfers, up-replication,
771+
// down-replication, validating whether constraints are satisfied).
749772
//
750773
// LEARNER and VOTER_DEMOTING_LEARNER replicas are ignored.
751774
type rangeAnalyzedConstraints struct {
@@ -842,68 +865,39 @@ type storeMatchesConstraintInterface interface {
842865
storeMatches(storeID roachpb.StoreID, constraintConj constraintsConj) bool
843866
}
844867

845-
// initialize analyzes the current replica set and determines which constraints
846-
// replicas satisfy, populating ac.constraints, ac.satisfiedByReplica and
847-
// ac.satisfiedNoConstraintReplica. They are later used by mma to compute
848-
// lease-transfer and rebalancing candidates by functions like
849-
// candidatesToReplaceVoterForRebalance. The given buf.replicas is already
850-
// populated with the current replica set from buf.tryAddingStore.
851-
//
852-
// For stores that satisfy no constraint,
853-
// ac.satisfiedNoConstraintReplica[kind][i] will be populated with the replica
854-
// storeIDs that satisfy no constraint.
855-
//
856-
// For stores that satisfy at least one constraint,
857-
// ac.satisfiedByReplica[kind][i] is filled with the storeIDs of replicas that
858-
// satisfy ac.constraints[i]. Each store should appear exactly once in the 2D
859-
// slice ac.satisfiedByReplica[kind]. Phase 2 below attempts to be optimal by
860-
// prioritizing under-satisfied constraints and assigning stores to those first.
861-
// A constraint may end up being over-satisfied
862-
// (len(ac.satisfiedByReplica[kind][i]) > ac.constraints[i].numReplicas) in
863-
// phase 3. A constraint may remain under-satisfied. If a store is
864-
// under-satisfied in phase 2, it cannot be corrected in phase 3 and will
865-
// continue to be under-satisfied. (TODO: Is this right during review)
866-
//
867-
// NB: the given ac.constraints can represent either replica or voter
868-
// constraints, but we still populate both voter and non-voter indices
869-
// regardless because we need to know which constraints are satisfied by all
870-
// replicas to determine when promotions or demotions should occur.
868+
// initialize takes in the constraints, current replica set, and a constraint
869+
// matcher. It populates ac.constraints, ac.satisfiedByReplica and
870+
// ac.satisfiedNoConstraintReplica by analyzing the current replica set and the
871+
// constraints replicas satisfy. The given buf.replicas is already populated
872+
// with the current replica set from buf.tryAddingStore.
871873
//
872-
// It has three phases: 1. For each replica (voter and non-voter), determine
873-
// which constraint indices in ac.constraints its store satisfies. We record
874-
// these matches in buf.replicaConstraintIndices by iterating over all replicas,
875-
// all constraint conjunctions, and checking store satisfaction for each using
876-
// constraintMatcher.storeMatches.
874+
// The algorithm proceeds in 3 phases, with the description of each phase
875+
// outlined below at the start of each phase.
877876
//
878-
// buf.replicaConstraintIndices[kind][i] will be populated with the constraint
879-
// indices that the i+1-th voter/non-voter satisfies. This serves as a scratch
880-
// space. Note that as we assign constraint to stores that satisfy it, we will
881-
// be clearing the constraint indices for that store in
882-
// buf.replicaConstraintIndices[kind][i] to indicate that we cannot reuse the
883-
// store to satisfy a different constraint.
877+
// NB: ac.initialize guarantees to assign exactly one constraint in
878+
// ac.satisfiedByReplica to each store that satisfies at least one constraint.
879+
// But constraints may remain under-satisfied. If a constraint remains
880+
// under-satisfied after phase 2, it cannot be corrected in phase 3 and remain
881+
// to be under-satisfied. This is not essential to correctness but just happens
882+
// to be true of the algorithm.
884883
//
885-
// During this phase, it also populates ac.satisfiedByReplica[kind][i] for
886-
// stores that satisfy exactly one constraint and populates
887-
// ac.satisfiedNoConstraintReplica[kind][i] for stores that satisfy no
888-
// constraint. Since we will be assigning at least one constraint to each store,
889-
// these stores are unambiguous.
890-
//
891-
// 2. For each constraint, iterate over all replicas that satisfy it and assign
892-
// replicas to the constraint in a round-robin manner, skipping once the
893-
// constraint is satisfied. This phase does not allow over-satisfaction.
894884
// TODO(wenyihu6): Add an example for phase 2 - if s1 satisfies c1(num=1) and s2
895885
// satisfies both c1(num=1) and c2(num=1), we should prefer assigning s2 to c2
896-
// rather than consuming it again for c1. TODO(wenyihu6): Verify behavior in
897-
// cases like: s1 satisfies c1(num=1) and c2(num=1), s2 satisfies only c1, and
898-
// s1 is used for c1 and c2 cannot be satisfied although we could have.
886+
// rather than consuming it again for c1.
899887
//
900-
// 3. For any remaining stores that satisfy multiple constraints, assign each to
901-
// the first constraint it satisfies. This phase allows over-satisfaction, and
902-
// some constraints may still end up under- or over-filled.
903-
904-
// TODO(wenyihu6): document the lifecycle of scratch space
905-
// replicaConstraintIndices once understood. TODO(wenyihu6): add more tests +
906-
// examples here
888+
// NB: Note that buf.replicaConstraintIndices serves as a scratch space to
889+
// reduce memory allocation and is expected to be empty at the start of every
890+
// analyzedConstraints.initialize call and cleared at the end of the call. For
891+
// every store that satisfies a constraint, we will be clearing the constraint
892+
// indices for that store in buf.replicaConstraintIndices[kind][i] once we have
893+
// assigned it to a constraint in ac.satisfiedByReplica[kind][i]. Since we
894+
// guarantee that each store that satisfies a constraint is assigned to exactly
895+
// one constraint, buf.replicaConstraintIndices will be an empty 2D slice as
896+
// part of the algorithm. In addition, clearing the constraint indices for that
897+
// store in buf.replicaConstraintIndices is also important to help us indicates
898+
// that the store cannot be reused to satisfy a different constraint.
899+
//
900+
// TODO(wenyihu6): add more tests + examples here
907901
func (ac *analyzedConstraints) initialize(
908902
constraints []internedConstraintsConjunction,
909903
buf *analyzeConstraintsBuf,
@@ -918,6 +912,16 @@ func (ac *analyzedConstraints) initialize(
918912
ac.satisfiedByReplica[voterIndex] = extend2DSlice(ac.satisfiedByReplica[voterIndex])
919913
ac.satisfiedByReplica[nonVoterIndex] = extend2DSlice(ac.satisfiedByReplica[nonVoterIndex])
920914
}
915+
// In phase 1, for each replica (voter and non-voter), determine which
916+
// constraint indices in ac.constraints its store satisfies. We record these
917+
// matches in buf.replicaConstraintIndices by iterating over all replicas,
918+
// all constraint conjunctions, and checking store satisfaction for each
919+
// using constraintMatcher.storeMatches. In addition, it also populates
920+
// ac.satisfiedNoConstraintReplica[kind][i] for stores that satisfy no
921+
// constraint and ac.satisfiedByReplica[kind][i] for stores that satisfy
922+
// exactly one constraint. Since we will be assigning at least one
923+
// constraint to each store, these stores are unambiguous.
924+
//
921925
// Compute the list of all constraints satisfied by each store.
922926
for kind := voterIndex; kind < numReplicaKinds; kind++ {
923927
for i, store := range buf.replicas[kind] {
@@ -952,6 +956,12 @@ func (ac *analyzedConstraints) initialize(
952956
len(ac.satisfiedByReplica[nonVoterIndex][constraintIndex]) >= int(ac.constraints[constraintIndex].numReplicas)
953957
}
954958

959+
// In phase 2, for each under-satisfied constraint, iterate through all replicas
960+
// and assign them to the first store that meets the constraint. Continue until
961+
// the constraint becomes satisfied, then move on to the next one. Skip further
962+
// matching once a constraint is fulfilled. Note that this phase does not allow
963+
// over-satisfaction.
964+
//
955965
// The only stores not yet in ac are the ones that satisfy multiple
956966
// constraints. For each store, the constraint indices it satisfies are in
957967
// increasing order. Satisfy constraints in order, while not
@@ -975,11 +985,6 @@ func (ac *analyzedConstraints) initialize(
975985
}
976986
}
977987

978-
// NB: We check the `satisfied` flag directly instead of calling
979-
// `isConstraintSatisfied` again, since this value should be set
980-
// in the previous loop correctly when the constraint becomes
981-
// satisfied.
982-
//
983988
// Check if constraint is now satisfied to avoid
984989
// over-satisfaction.
985990
if satisfied {
@@ -992,6 +997,12 @@ func (ac *analyzedConstraints) initialize(
992997
}
993998
}
994999
}
1000+
// In phase 3, for each remaining store that satisfies >1 constraints,
1001+
// assign each to the first constraint it satisfies. This phase allows
1002+
// over-satisfaction
1003+
// (len(ac.satisfiedByReplica[voterIndex][i]+len(ac.satisfiedByReplica[nonVoterIndex][i]))
1004+
// > ac.constraints[i].numReplicas).
1005+
//
9951006
// Nothing over-satisfied. Go and greedily assign.
9961007
for kind := voterIndex; kind < numReplicaKinds; kind++ {
9971008
for i := range buf.replicaConstraintIndices[kind] {
@@ -1036,20 +1047,16 @@ func diversityOfTwoStoreSets(
10361047
return sumScore, numSamples
10371048
}
10381049

1039-
// diversityScore returns the voter and replica diversity scores of the given replicas sets.
1040-
//
1041-
// voterDiversityScore: sum of diversity scores over all pairs from V×V(voter-voter pairs).
1042-
// replicaDiversityScore: sum of diversity scores over all pairs from:
1043-
// V×V(voter–voter pairs), N×N(non-voter–non-voter pairs), V×N(voter–non-voter
1044-
// pairs).
1045-
//
1046-
// diversity score of a single pair is computed using
1047-
// localityTiers.diversityScore which finds the longest common prefix of two
1048-
// localities.
1050+
// diversityScore measures how geographically spread out replicas are across the
1051+
// cluster. Higher scores indicate better fault tolerance because replicas are
1052+
// placed further apart in the locality hierarchy. Returns voterDiversityScore
1053+
// and replicaDiversityScore which are the average pairwise distance across all
1054+
// voter-voter pairs and replica pairs respectively.
10491055
func diversityScore(
10501056
replicas [numReplicaKinds][]storeAndLocality,
10511057
) (voterDiversityScore, replicaDiversityScore float64) {
1052-
// Helper to calculate average, or a max-value if no samples (represents lowest possible diversity)
1058+
// Helper to calculate average, or a max-value if no samples (represents
1059+
// lowest possible diversity).
10531060
scoreFromSumAndSamples := func(sumScore float64, numSamples int) float64 {
10541061
if numSamples == 0 {
10551062
return roachpb.MaxDiversityScore
@@ -1071,9 +1078,10 @@ func diversityScore(
10711078
return scoreFromSumAndSamples(voterSum, voterSamples), scoreFromSumAndSamples(totalSum, totalSamples)
10721079
}
10731080

1074-
// finishInit analyzes the span config and the range’s current replica set. It
1075-
// prepares the MMA to compute lease-transfer and rebalancing candidates while
1076-
// satisfying both constraint requirements and leaseholder preferences.
1081+
// finishInit completes initialization for the rangeAnalyzedConstraints, It
1082+
// initializes the constraints and voterConstraints, determines the leaseholder
1083+
// preference index for all voter replicas, builds locality tier information,
1084+
// and computes diversity scores.
10771085
func (rac *rangeAnalyzedConstraints) finishInit(
10781086
spanConfig *normalizedSpanConfig,
10791087
constraintMatcher storeMatchesConstraintInterface,
@@ -1479,6 +1487,9 @@ type analyzeConstraintsBuf struct {
14791487
// constraints. We still populate both voter and non-voter indexes because
14801488
// we need to know which constraints are satisfied by all replicas to
14811489
// determine when promotions or demotions should occur.
1490+
//
1491+
// NB: expected to be empty at the start of every
1492+
// analyzedConstraints.initialize call.
14821493
replicaConstraintIndices [numReplicaKinds][][]int32
14831494
}
14841495

0 commit comments

Comments
 (0)