Skip to content

Commit 9d535db

Browse files
craig[bot]spilchen
andcommitted
Merge #158625
158625: ccl/multiregionccl: stabilize global_tables follower-read checks r=spilchen a=spilchen Follower read assertions in global_tables tests were flaky due to relying on AS OF SYSTEM TIME now(), where the local replica's closed timestamp hadn't yet advanced enough. This caused transactions to exceed their uncertainty window when closed timestamp hadn't been bumped recently. The intent of the test is to ensure that queries run as of now can use follower reads. Since timing can interphere, this added retry logic to try the query again if the trace didn't match expectations. Closes #152099 Release note: None Epic: none Co-authored-by: Matt Spilchen <matt.spilchen@cockroachlabs.com>
2 parents c7813f5 + 934cee4 commit 9d535db

File tree

2 files changed

+43
-20
lines changed

2 files changed

+43
-20
lines changed

pkg/ccl/multiregionccl/datadriven_test.go

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -261,10 +261,13 @@ SET CLUSTER SETTING kv.allocator.min_lease_transfer_interval = '5m'
261261

262262
case "trace-sql":
263263
mustHaveArgOrFatal(t, d, serverIdx)
264+
var idx int
265+
d.ScanArgs(t, serverIdx, &idx)
266+
shouldRetry := d.HasArg(traceSQLRetryArg)
267+
retryTimeout := testutils.DefaultSucceedsSoonDuration
268+
264269
var rec tracingpb.Recording
265270
queryFunc := func() (localRead bool, followerRead bool, err error) {
266-
var idx int
267-
d.ScanArgs(t, serverIdx, &idx)
268271
sqlDB, err := ds.getSQLConn(idx)
269272
if err != nil {
270273
return false, false, err
@@ -291,22 +294,41 @@ SET CLUSTER SETTING kv.allocator.min_lease_transfer_interval = '5m'
291294
}
292295
return localRead, followerRead, nil
293296
}
294-
localRead, followerRead, err := queryFunc()
295-
if err != nil {
296-
return err.Error()
297-
}
298-
var output strings.Builder
299-
output.WriteString(
300-
fmt.Sprintf("served locally: %s\n", strconv.FormatBool(localRead)))
301-
// Only print follower read information if the query was served locally.
302-
if localRead {
297+
298+
runOnce := func() (string, error) {
299+
localRead, followerRead, err := queryFunc()
300+
if err != nil {
301+
return "", err
302+
}
303+
var output strings.Builder
303304
output.WriteString(
304-
fmt.Sprintf("served via follower read: %s\n", strconv.FormatBool(followerRead)))
305+
fmt.Sprintf("served locally: %s\n", strconv.FormatBool(localRead)))
306+
// Only print follower read information if the query was served locally.
307+
if localRead {
308+
output.WriteString(
309+
fmt.Sprintf("served via follower read: %s\n", strconv.FormatBool(followerRead)))
310+
}
311+
if d.Expected != output.String() {
312+
return "", errors.AssertionFailedf("not a match, trace:\n%s\n", rec)
313+
}
314+
return output.String(), nil
315+
}
316+
317+
var output string
318+
var err error
319+
if shouldRetry {
320+
err = testutils.SucceedsWithinError(func() error {
321+
var attemptErr error
322+
output, attemptErr = runOnce()
323+
return attemptErr
324+
}, retryTimeout)
325+
} else {
326+
output, err = runOnce()
305327
}
306-
if d.Expected != output.String() {
307-
return errors.AssertionFailedf("not a match, trace:\n%s\n", rec).Error()
328+
if err != nil {
329+
return err.Error()
308330
}
309-
return output.String()
331+
return output
310332

311333
case "refresh-range-descriptor-cache":
312334
mustHaveArgOrFatal(t, d, tableName)
@@ -484,6 +506,7 @@ const (
484506
partitionName = "partition-name"
485507
numVoters = "num-voters"
486508
numNonVoters = "num-non-voters"
509+
traceSQLRetryArg = "retry"
487510
)
488511

489512
type replicaType int

pkg/ccl/multiregionccl/testdata/global_tables

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ SELECT * FROM db.global WHERE k = 2
3232
----
3333
LEAD_FOR_GLOBAL_READS
3434

35-
trace-sql idx=1
35+
trace-sql idx=1 retry
3636
SELECT * FROM db.global WHERE k = 1
3737
----
3838
served locally: true
@@ -43,7 +43,7 @@ SELECT * FROM db.global WHERE k = 2
4343
----
4444
LEAD_FOR_GLOBAL_READS
4545

46-
trace-sql idx=2
46+
trace-sql idx=2 retry
4747
SELECT * FROM db.global WHERE k = 1
4848
----
4949
served locally: true
@@ -54,7 +54,7 @@ SELECT * FROM db.global WHERE k = 2
5454
----
5555
LEAD_FOR_GLOBAL_READS
5656

57-
trace-sql idx=3
57+
trace-sql idx=3 retry
5858
SELECT * FROM db.global WHERE k = 1
5959
----
6060
served locally: true
@@ -65,7 +65,7 @@ SELECT * FROM db.global WHERE k = 2
6565
----
6666
LEAD_FOR_GLOBAL_READS
6767

68-
trace-sql idx=4
68+
trace-sql idx=4 retry
6969
SELECT * FROM db.global WHERE k = 1
7070
----
7171
served locally: true
@@ -95,7 +95,7 @@ SELECT * FROM db.global WHERE k = 2
9595
----
9696
LEAD_FOR_GLOBAL_READS
9797

98-
trace-sql idx=5
98+
trace-sql idx=5 retry
9999
SELECT * FROM db.global WHERE k = 1
100100
----
101101
served locally: true

0 commit comments

Comments
 (0)