Skip to content

Commit 8f079ba

Browse files
craig[bot]alyshanjahani-crlrafiss
committed
158398: ui, sql: Record KV CPU Time in statement and transaction statistics r=alyshanjahani-crl a=alyshanjahani-crl This commit starts recording the newly added kvCPUTime field from topLevelQueryStats and exposes it in the UI. Fixes: https://cockroachlabs.atlassian.net/browse/CRDB-57265 Release note (ui change): KV CPU Time is now recorded to statement_statistics and transaction_statistics and is displayed in the SQL Activity page. 158643: rttanalysis: use SetupEx/ResetEx for multi-database benchmark r=rafiss a=rafiss Update buildNDatabasesWithMTables to return slices for SetupEx and ResetEx, with each database having its own element. This should make it less likely to encounter txn retry errors during cleanup. Fixes: #156226 Release note: None Co-authored-by: alyshanjahani-crl <alyshan@cockroachlabs.com> Co-authored-by: Rafi Shamim <rafi@cockroachlabs.com>
3 parents 6038f1b + 3cc7a2d + edf0aa4 commit 8f079ba

File tree

24 files changed

+162
-21
lines changed

24 files changed

+162
-21
lines changed

pkg/bench/rttanalysis/orm_queries_bench_test.go

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -946,8 +946,8 @@ ORDER BY
946946
{
947947
Name: `liquibase migrations on multiple dbs`,
948948
// 15 databases, each with 40 tables.
949-
Setup: liquibaseSetup,
950-
Reset: liquibaseReset,
949+
SetupEx: liquibaseSetup,
950+
ResetEx: liquibaseReset,
951951
Stmt: `SELECT
952952
NULL AS table_cat,
953953
n.nspname AS table_schem,
@@ -1067,16 +1067,18 @@ func buildNTypes(n int) string {
10671067
return b.String()
10681068
}
10691069

1070-
func buildNDatabasesWithMTables(amtDbs int, amtTbls int) (string, string) {
1071-
b := strings.Builder{}
1072-
reset := strings.Builder{}
1070+
func buildNDatabasesWithMTables(amtDbs int, amtTbls int) ([]string, []string) {
1071+
setupEx := make([]string, amtDbs)
1072+
resetEx := make([]string, amtDbs)
10731073
tbls := buildNTables(amtTbls)
10741074
for i := 0; i < amtDbs; i++ {
10751075
db := fmt.Sprintf("d%d", i)
1076+
b := strings.Builder{}
10761077
b.WriteString(fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s;\n", db))
1077-
reset.WriteString(fmt.Sprintf("DROP DATABASE %s;\n", db))
10781078
b.WriteString(fmt.Sprintf("USE %s;\n", db))
10791079
b.WriteString(tbls)
1080+
setupEx[i] = b.String()
1081+
resetEx[i] = fmt.Sprintf("DROP DATABASE %s", db)
10801082
}
1081-
return b.String(), reset.String()
1083+
return setupEx, resetEx
10821084
}

pkg/sql/appstatspb/app_stats.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ func (t *TransactionStatistics) Add(other *TransactionStatistics) {
131131
t.RowsRead.Add(other.RowsRead, t.Count, other.Count)
132132
t.BytesRead.Add(other.BytesRead, t.Count, other.Count)
133133
t.RowsWritten.Add(other.RowsWritten, t.Count, other.Count)
134+
t.KVCPUTimeNanos.Add(other.KVCPUTimeNanos, t.Count, other.Count)
134135

135136
t.ExecStats.Add(other.ExecStats)
136137

@@ -181,6 +182,7 @@ func (s *StatementStatistics) Add(other *StatementStatistics) {
181182
s.BytesRead.Add(other.BytesRead, s.Count, other.Count)
182183
s.RowsRead.Add(other.RowsRead, s.Count, other.Count)
183184
s.RowsWritten.Add(other.RowsWritten, s.Count, other.Count)
185+
s.KVCPUTimeNanos.Add(other.KVCPUTimeNanos, s.Count, other.Count)
184186
s.Nodes = util.CombineUnique(s.Nodes, other.Nodes)
185187
s.KVNodeIDs = util.CombineUnique(s.KVNodeIDs, other.KVNodeIDs)
186188
s.Regions = util.CombineUnique(s.Regions, other.Regions)

pkg/sql/appstatspb/app_stats.proto

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ message StatementStatistics {
142142
// system.statement_hints table.
143143
optional int64 stmt_hints_count = 37 [(gogoproto.nullable) = false];
144144

145+
// kv_cpu_time_nanos is the CPU time in nanoseconds spent by KV. This is
146+
// representative of the "synchronous" portion of work performed by KV
147+
// (ex/ not replication related work).
148+
optional NumericStat kv_cpu_time_nanos = 38 [(gogoproto.nullable) = false, (gogoproto.customname) = "KVCPUTimeNanos"];
149+
145150
// Note: be sure to update `sql/app_stats.go` when adding/removing fields here!
146151

147152
reserved 13, 14, 17, 18, 19, 20;
@@ -188,6 +193,11 @@ message TransactionStatistics {
188193

189194
// RowsWritten collects the number of rows written to disk.
190195
optional NumericStat rows_written = 10 [(gogoproto.nullable) = false];
196+
197+
// kv_cpu_time_nanos is the CPU time in nanoseconds spent by KV. This is
198+
// representative of the "synchronous" portion of work performed by KV
199+
// (ex/ not replication related work).
200+
optional NumericStat kv_cpu_time_nanos = 12 [(gogoproto.nullable) = false, (gogoproto.customname) = "KVCPUTimeNanos"];
191201
}
192202

193203

pkg/sql/conn_executor.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1757,15 +1757,14 @@ type connExecutor struct {
17571757
// client to send statements while holding the transaction open.
17581758
idleLatency time.Duration
17591759

1760-
// rowsRead and bytesRead are separate from QueryLevelStats because they are
1761-
// accumulated independently since they are always collected, as opposed to
1762-
// QueryLevelStats which are sampled.
1760+
// rowsRead, bytesRead, kvCPUTimeNanos, and rowsWritten are separate from accumulatedStats
1761+
// since they are always collected as opposed to QueryLevelStats which are sampled.
17631762
rowsRead int64
17641763
bytesRead int64
1765-
17661764
// rowsWritten tracks the number of rows written (modified) by all
17671765
// statements in this txn so far.
1768-
rowsWritten int64
1766+
rowsWritten int64
1767+
kvCPUTimeNanos time.Duration
17691768

17701769
// rowsWrittenLogged and rowsReadLogged indicates whether we have
17711770
// already logged an event about reaching written/read rows setting,

pkg/sql/conn_executor_exec.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2953,6 +2953,7 @@ func (ex *connExecutor) dispatchToExecutionEngine(
29532953
ex.extraTxnState.rowsRead += stats.rowsRead
29542954
ex.extraTxnState.bytesRead += stats.bytesRead
29552955
ex.extraTxnState.rowsWritten += stats.rowsWritten
2956+
ex.extraTxnState.kvCPUTimeNanos += stats.kvCPUTimeNanos
29562957

29572958
if ppInfo := getPausablePortalInfo(planner); ppInfo != nil && !ppInfo.dispatchToExecutionEngine.cleanup.isComplete {
29582959
// We need to ensure that we're using the planner bound to the first-time
@@ -3341,8 +3342,8 @@ type topLevelQueryStats struct {
33413342
// client receiving the PGWire protocol messages (as well as construcing
33423343
// those messages).
33433344
clientTime time.Duration
3344-
// kvCPUTime is the CPU time consumed by KV operations during query execution.
3345-
kvCPUTime time.Duration
3345+
// kvCPUTimeNanos is the CPU time consumed by KV operations during query execution.
3346+
kvCPUTimeNanos time.Duration
33463347
// NB: when adding another field here, consider whether
33473348
// forwardInnerQueryStats method needs an adjustment.
33483349
}
@@ -3355,7 +3356,7 @@ func (s *topLevelQueryStats) add(other *topLevelQueryStats) {
33553356
s.indexRowsWritten += other.indexRowsWritten
33563357
s.networkEgressEstimate += other.networkEgressEstimate
33573358
s.clientTime += other.clientTime
3358-
s.kvCPUTime += other.kvCPUTime
3359+
s.kvCPUTimeNanos += other.kvCPUTimeNanos
33593360
}
33603361

33613362
// execWithDistSQLEngine converts a plan to a distributed SQL physical plan and
@@ -4201,6 +4202,7 @@ func (ex *connExecutor) onTxnRestart(ctx context.Context) {
42014202
ex.extraTxnState.rowsRead = 0
42024203
ex.extraTxnState.bytesRead = 0
42034204
ex.extraTxnState.rowsWritten = 0
4205+
ex.extraTxnState.kvCPUTimeNanos = 0
42044206

42054207
if ex.server.cfg.TestingKnobs.BeforeRestart != nil {
42064208
ex.server.cfg.TestingKnobs.BeforeRestart(ctx, ex.state.mu.autoRetryReason)
@@ -4232,6 +4234,7 @@ func (ex *connExecutor) recordTransactionStart(txnID uuid.UUID) {
42324234
ex.extraTxnState.accumulatedStats = execstats.QueryLevelStats{}
42334235
ex.extraTxnState.idleLatency = 0
42344236
ex.extraTxnState.rowsRead = 0
4237+
ex.extraTxnState.kvCPUTimeNanos = 0
42354238
ex.extraTxnState.bytesRead = 0
42364239
ex.extraTxnState.rowsWritten = 0
42374240
ex.extraTxnState.rowsWrittenLogged = false
@@ -4344,6 +4347,7 @@ func (ex *connExecutor) recordTransactionFinish(
43444347
RowsRead: ex.extraTxnState.rowsRead,
43454348
RowsWritten: ex.extraTxnState.rowsWritten,
43464349
BytesRead: ex.extraTxnState.bytesRead,
4350+
KVCPUTimeNanos: ex.extraTxnState.kvCPUTimeNanos,
43474351
Priority: ex.state.mu.priority,
43484352
// TODO(107318): add isolation level
43494353
// TODO(107318): add qos

pkg/sql/distsql_running.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1625,7 +1625,7 @@ func forwardInnerQueryStats(f metadataForwarder, stats topLevelQueryStats) {
16251625
meta.Metrics.RowsWritten = stats.rowsWritten
16261626
meta.Metrics.IndexRowsWritten = stats.indexRowsWritten
16271627
meta.Metrics.IndexBytesWritten = stats.indexBytesWritten
1628-
meta.Metrics.KVCPUTime = int64(stats.kvCPUTime)
1628+
meta.Metrics.KVCPUTime = int64(stats.kvCPUTimeNanos)
16291629
// stats.networkEgressEstimate and stats.clientTime are ignored since they
16301630
// only matter at the "true" top-level query (and actually should be zero
16311631
// here anyway).
@@ -1676,7 +1676,7 @@ func (r *DistSQLReceiver) pushMeta(meta *execinfrapb.ProducerMetadata) execinfra
16761676
r.stats.rowsWritten += meta.Metrics.RowsWritten
16771677
r.stats.indexRowsWritten += meta.Metrics.IndexRowsWritten
16781678
r.stats.indexBytesWritten += meta.Metrics.IndexBytesWritten
1679-
r.stats.kvCPUTime += time.Duration(meta.Metrics.KVCPUTime)
1679+
r.stats.kvCPUTimeNanos += time.Duration(meta.Metrics.KVCPUTime)
16801680

16811681
if sm, ok := r.scanStageEstimateMap[meta.Metrics.StageID]; ok {
16821682
sm.rowsRead += uint64(meta.Metrics.RowsRead)

pkg/sql/executor_statement_metrics.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ func (ex *connExecutor) recordStatementSummary(
208208
).
209209
PlanGist(planner.instrumentation.planGist.String(), planner.instrumentation.planGist.Hash()).
210210
LatencyRecorder(ex.statsCollector).
211-
QueryLevelStats(stats.bytesRead, stats.rowsRead, stats.rowsWritten).
211+
QueryLevelStats(stats.bytesRead, stats.rowsRead, stats.rowsWritten, stats.kvCPUTimeNanos.Nanoseconds()).
212212
ExecStats(queryLevelStats).
213213
// TODO(mgartner): Use a slice of struct{uint64, uint64} instead of
214214
// converting to strings.

pkg/sql/instrumentation.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -903,7 +903,7 @@ func (ih *instrumentationHelper) emitExplainAnalyzePlanToOutputBuilder(
903903
}
904904

905905
if grunning.Supported {
906-
ob.AddKVCPUTime(ih.topLevelStats.kvCPUTime)
906+
ob.AddKVCPUTime(ih.topLevelStats.kvCPUTimeNanos)
907907
}
908908
if !ih.containsMutation && ih.vectorized && grunning.Supported {
909909
// Currently we cannot separate SQL CPU time from local KV CPU time for

pkg/sql/routine.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ func (g *routineGenerator) startInternal(ctx context.Context, txn *kv.Txn) (err
415415
statsBuilder.StatementError(err)
416416
return err
417417
}
418-
statsBuilder.QueryLevelStats(queryStats.bytesRead, queryStats.rowsRead, queryStats.rowsWritten)
418+
statsBuilder.QueryLevelStats(queryStats.bytesRead, queryStats.rowsRead, queryStats.rowsWritten, queryStats.kvCPUTimeNanos.Nanoseconds())
419419
forwardInnerQueryStats(g.p.routineMetadataForwarder, queryStats)
420420
if openCursor {
421421
return cursorHelper.createCursor(g.p)

pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_encoding_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ func TestSQLStatsJsonEncoding(t *testing.T) {
9797
"mean": {{.Float}},
9898
"sqDiff": {{.Float}}
9999
},
100+
"kvCPUTimeNanos": {
101+
"mean": {{.Float}},
102+
"sqDiff": {{.Float}}
103+
},
100104
"nodes": [{{joinInts .IntArray}}],
101105
"kvNodeIds": [{{joinInt32s .Int32Array}}],
102106
"regions": [{{joinStrings .StringArray}}],
@@ -478,6 +482,10 @@ func TestSQLStatsJsonEncoding(t *testing.T) {
478482
"rowsWritten": {
479483
"mean": {{.Float}},
480484
"sqDiff": {{.Float}}
485+
},
486+
"kvCPUTimeNanos": {
487+
"mean": {{.Float}},
488+
"sqDiff": {{.Float}}
481489
}
482490
},
483491
"execution_statistics": {

0 commit comments

Comments
 (0)