@@ -1123,11 +1123,16 @@ func (ac *analyzedConstraints) initialize(
11231123 }
11241124}
11251125
1126- // diversityOfTwoStoreSets computes the diversity score between two sets of
1127- // stores .
1126+ // diversityOfTwoStoreSets performs a pairwise diversity score computation between
1127+ // the two sets of localities and returns their sum as well as the number of samples .
11281128//
1129- // When sameStores is false, intersection between this and other is empty and no
1130- // de-duplication is needed. For example, given two sets of stores [1, 2, 3] and
1129+ // To simplify the implementation, `this` and `other` must either be disjoint or
1130+ // the same set. If they are the same set (sameStores is true), de-duplication
1131+ // is performed (to avoid double counting). Otherwise, double-counting will
1132+ // occur if there are elements present in both sets, so the sets should be
1133+ // disjoint.
1134+ //
1135+ // For example, when sameStores is false, given two sets of stores [1, 2, 3] and
11311136// [4, 5, 6], diversity score is computed among all pairs (1, 4), (1, 5), (1,
11321137// 6), (2, 4), (2, 5), (2, 6), (3, 4), (3, 5), (3, 6).
11331138//
@@ -1154,10 +1159,16 @@ func diversityOfTwoStoreSets(
11541159}
11551160
11561161// diversityScore measures how geographically spread out replicas are across the
1157- // cluster. Higher scores indicate better fault tolerance because replicas are
1158- // placed further apart in the locality hierarchy. Returns voterDiversityScore
1159- // and replicaDiversityScore which are the average pairwise distance across all
1160- // voter-voter pairs and replica pairs respectively.
1162+ // cluster for a range. Higher scores indicate better fault tolerance because
1163+ // replicas are placed further apart in the locality hierarchy.
1164+ //
1165+ // Returns the average pairwise diversity score between all distinct voter-voter
1166+ // pairs and all distinct replica-replica pairs, respectively.
1167+ //
1168+ // For example, for a range consisting of one voter and one non-voter only,
1169+ // voterDiversityScore is zero (there are no distinct voter pairs), but the
1170+ // replicaDiversityScore equals the diversity score between the voter and the
1171+ // non-voter (there are no distinct non-voter pairs either).
11611172func diversityScore (
11621173 replicas [numReplicaKinds ][]storeAndLocality ,
11631174) (voterDiversityScore , replicaDiversityScore float64 ) {
@@ -1777,20 +1788,38 @@ type localityTiers struct {
17771788 str string
17781789}
17791790
1791+ // diversityScore returns a score comparing the two localities which ranges from
1792+ // 1, meaning completely diverse, to 0 which means not diverse at all (that
1793+ // their localities match). This function ignores the locality tier key names
1794+ // and only considers differences in their values.
1795+ //
1796+ // All localities are sorted from most global to most local so any localities
1797+ // after any differing values are irrelevant.
1798+ //
1799+ // While we recommend that all nodes have the same locality keys and same
1800+ // total number of keys, there's nothing wrong with having different locality
1801+ // keys as long as the immediately next keys are all the same for each value.
1802+ // For example:
1803+ // region:USA -> state:NY -> ...
1804+ // region:USA -> state:WA -> ...
1805+ // region:EUR -> country:UK -> ...
1806+ // region:EUR -> country:France -> ...
1807+ // is perfectly fine. This holds true at each level lower as well.
1808+ //
1809+ // There is also a need to consider the cases where the localities have
1810+ // different lengths. For these cases, we pessimistically treat the missing keys
1811+ // on one side as being identical.
17801812func (l localityTiers ) diversityScore (other localityTiers ) float64 {
1781- length := len (l .tiers )
1782- lengthOther := len (other .tiers )
1783- if lengthOther < length {
1784- length = lengthOther
1785- }
1786- for i := 0 ; i < length ; i ++ {
1813+ minLen := min (len (l .tiers ), len (other .tiers ))
1814+
1815+ for i := 0 ; i < minLen ; i ++ {
17871816 if l .tiers [i ] != other .tiers [i ] {
1788- return float64 (length - i ) / float64 (length )
1817+ return float64 (minLen - i ) / float64 (minLen )
17891818 }
17901819 }
1791- if length != lengthOther {
1792- return roachpb . MaxDiversityScore / float64 ( length + 1 )
1793- }
1820+
1821+ // The localities are the same up to the min length; pessimistically treat
1822+ // the missing keys on one side as being identical.
17941823 return 0
17951824}
17961825
0 commit comments