Skip to content

Commit 8ebc521

Browse files
craig[bot]yuzefovich
andcommitted
Merge #158226
158226: sql/stats: miscellaneous minor cleanup r=yuzefovich a=yuzefovich **sql/opt: unify search for latest full stat** This commit extracts the logic for iterating over all statistics on a table to find the most recent full stat that can be utilized (i.e. while paying attention to session variables that might disallow usage of forecasts and merged partial). In particular, this allows us to fix the bug where `OptimizerUseMergedPartialStatistics` was being ignored when building the scan - the impact of that is pretty minor though, so there is no release note (the variable is `true` by default, so only non-default configs would be affected, and only that `statsCreatedTime` would be reported newer than it should - only used in logs - as well as in the telemetry we'd have the wrong row count. **sql/stats: miscellaneous minor cleanup** This commit contains a bunch of minor cleanups I noticed while reading the stats related code: - remove no longer used `ImportStatsName` - remove redundant disabling of auto stats in logic tests (the framework itself does it already) - fix the pre-allocation slice for out types - update TODOs with Faizaan's name to the corresponding issue numbers - wrap some comments, remove a couple stale / redundant ones, collapse some lines. Epic: None Release note: None Co-authored-by: Yahor Yuzefovich <yahor@cockroachlabs.com>
2 parents 70010fb + 0ca5102 commit 8ebc521

File tree

11 files changed

+94
-134
lines changed

11 files changed

+94
-134
lines changed

pkg/jobs/jobspb/wrap.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,6 @@ const AutoStatsName = "__auto__"
151151
// automatically.
152152
const AutoPartialStatsName = "__auto_partial__"
153153

154-
// ImportStatsName is the name to use for statistics created automatically
155-
// during import.
156-
const ImportStatsName = "__import__"
157-
158154
// ForecastStatsName is the name to use for statistic forecasts.
159155
const ForecastStatsName = "__forecast__"
160156

pkg/sql/create_stats.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro
294294
virtColEnabled := statsOnVirtualCols.Get(n.p.ExecCfg().SV())
295295
// Disable multi-column stats and deleting stats if partial statistics at
296296
// the extremes are requested.
297-
// TODO(faizaanmadhani): Add support for multi-column stats.
297+
// TODO(#94076): add support for creating multi-column stats.
298298
var multiColEnabled bool
299299
if !n.Options.UsingExtremes {
300300
multiColEnabled = stats.MultiColumnStatisticsClusterMode.Get(n.p.ExecCfg().SV())

pkg/sql/distsql_plan_stats.go

Lines changed: 36 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -137,14 +137,10 @@ func computeNumberSamples(ctx context.Context, numRows uint64, st *cluster.Setti
137137
minSampleSize = minAutoHistogramSamples.Default()
138138
}
139139

140-
numSamples := math.Max(
141-
math.Min(
142-
582.0*math.Pow(float64(numRows), 0.29),
143-
float64(maxSampleSize),
144-
),
140+
return uint32(max(
141+
min(582.0*math.Pow(float64(numRows), 0.29), float64(maxSampleSize)),
145142
float64(minSampleSize),
146-
)
147-
return uint32(numSamples)
143+
))
148144
}
149145

150146
func (dsp *DistSQLPlanner) createAndAttachSamplers(
@@ -167,7 +163,6 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(
167163
if autoStatsFractionStaleRowsForTable, ok := desc.AutoStatsFractionStaleRows(); ok {
168164
overhead = autoStatsFractionStaleRowsForTable
169165
}
170-
// Convert to a signed integer first to make the linter happy.
171166
if details.UsingExtremes {
172167
rowsExpected = uint64(int64(
173168
// The total expected number of rows is the estimated number of stale
@@ -188,23 +183,19 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(
188183
sampler := &execinfrapb.SamplerSpec{
189184
Sketches: sketchSpec,
190185
InvertedSketches: invSketchSpec,
186+
MaxFractionIdle: details.MaxFractionIdle,
191187
}
192-
sampler.MaxFractionIdle = details.MaxFractionIdle
193-
// For partial statistics this loop should only iterate once
194-
// since we only support one reqStat at a time.
188+
// For partial statistics this loop should only iterate once since we only
189+
// support one reqStat at a time.
195190
for _, s := range reqStats {
196191
if s.histogram {
197192
var histogramSamplesCount uint32
198193
if tableSampleCount, ok := desc.HistogramSamplesCount(); ok {
199194
histogramSamplesCount = tableSampleCount
200-
} else if clusterSampleCount := histogramSamples.Get(&dsp.st.SV); clusterSampleCount != histogramSamples.Default() {
195+
} else if clusterSampleCount := histogramSamples.Get(&dsp.st.SV); clusterSampleCount != 0 {
201196
histogramSamplesCount = uint32(clusterSampleCount)
202197
} else {
203-
histogramSamplesCount = computeNumberSamples(
204-
ctx,
205-
rowsExpected,
206-
dsp.st,
207-
)
198+
histogramSamplesCount = computeNumberSamples(ctx, rowsExpected, dsp.st)
208199
log.Dev.Infof(ctx, "using computed sample size of %d for histogram construction", histogramSamplesCount)
209200
}
210201
sampler.SampleSize = histogramSamplesCount
@@ -219,7 +210,7 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(
219210

220211
// The sampler outputs the original columns plus a rank column, five
221212
// sketch columns, and two inverted histogram columns.
222-
outTypes := make([]*types.T, 0, len(p.GetResultTypes())+5)
213+
outTypes := make([]*types.T, 0, len(p.GetResultTypes())+8)
223214
outTypes = append(outTypes, p.GetResultTypes()...)
224215
// An INT column for the rank of each row.
225216
outTypes = append(outTypes, types.Int)
@@ -292,18 +283,15 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
292283
// so we only support one requested stat at a time here.
293284
if len(reqStats) > 1 {
294285
return nil, unimplemented.NewWithIssue(
295-
128904,
296-
"cannot process multiple partial statistics requests at once",
286+
128904, "cannot process multiple partial statistics requests at once",
297287
)
298288
}
299289

300290
reqStat := reqStats[0]
301-
302291
if len(reqStat.columns) > 1 {
303-
// TODO (faizaanmadhani): Add support for creating multi-column stats
292+
// TODO(#94076): add support for creating multi-column stats.
304293
return nil, pgerror.Newf(pgcode.FeatureNotSupported, "multi-column partial statistics are not currently supported")
305294
}
306-
307295
if !reqStat.histogram {
308296
return nil, pgerror.Newf(pgcode.FeatureNotSupported, "partial statistics without histograms are not supported")
309297
}
@@ -324,9 +312,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
324312
return nil, err
325313
}
326314

327-
// Calculate the column we need to scan
328-
// TODO (faizaanmadhani): Iterate through all columns in a requested stat when
329-
// when we add support for multi-column statistics.
315+
// Calculate the column we need to scan.
316+
// TODO(#94076): iterate through all columns in a requested stat when we add
317+
// support for multi-column statistics.
330318
var colCfg scanColumnsConfig
331319
colCfg.wantedColumns = append(colCfg.wantedColumns, column.GetID())
332320

@@ -341,19 +329,18 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
341329
if err != nil {
342330
return nil, err
343331
}
344-
// Map the ColumnIDs to their ordinals in scan.cols
345-
// This loop should only iterate once, since we only
346-
// handle single column partial statistics.
347-
// TODO(faizaanmadhani): Add support for multi-column partial stats next
332+
// Map the ColumnIDs to their ordinals in scan.cols. This loop should only
333+
// iterate once, since we only handle single column partial statistics.
334+
// TODO(#94076): add support for creating multi-column stats.
348335
var colIdxMap catalog.TableColMap
349336
for i, c := range scan.catalogCols {
350337
colIdxMap.Set(c.GetID(), i)
351338
}
352339

353340
var stat *stats.TableStatistic
354341
// Find the statistic from the newest table statistic for our column that is
355-
// not partial and not forecasted. The first one we find will be the latest
356-
// due to the newest to oldest ordering property of the cache.
342+
// not partial, not merged, and not forecasted. The first one we find will
343+
// be the latest due to the newest to oldest ordering property of the cache.
357344
for _, t := range tableStats {
358345
if len(t.ColumnIDs) == 1 && column.GetID() == t.ColumnIDs[0] &&
359346
!t.IsPartial() && !t.IsMerged() && !t.IsForecast() {
@@ -380,7 +367,8 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
380367
return nil, pgerror.Newf(
381368
pgcode.ObjectNotInPrerequisiteState,
382369
"column %s does not have a prior statistic",
383-
column.GetName())
370+
column.GetName(),
371+
)
384372
}
385373
if len(stat.Histogram) == 1 && stat.Histogram[0].UpperBound == tree.DNull {
386374
return nil, pgerror.Newf(
@@ -398,8 +386,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
398386
planCtx.EvalContext(), planCtx.ExtendedEvalCtx.Codec, desc, scan.index,
399387
)
400388

401-
lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(ctx,
402-
planCtx.EvalContext(), stat.Histogram)
389+
lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(
390+
ctx, planCtx.EvalContext(), stat.Histogram,
391+
)
403392
if err != nil {
404393
return nil, err
405394
}
@@ -413,13 +402,13 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
413402
}
414403
prevLowerBound = lowerBound
415404

416-
extremesSpans, err := bounds.ConstructUsingExtremesSpans(lowerBound,
417-
upperBound, scan.index)
405+
extremesSpans, err := bounds.ConstructUsingExtremesSpans(
406+
lowerBound, upperBound, scan.index,
407+
)
418408
if err != nil {
419409
return nil, err
420410
}
421411
predicate = bounds.ConstructUsingExtremesPredicate(lowerBound, upperBound, column.GetName())
422-
// Get roachpb.Spans from constraint.Spans
423412
scan.spans, err = sb.SpansFromConstraintSpan(&extremesSpans, span.NoopSplitter())
424413
if err != nil {
425414
return nil, err
@@ -456,9 +445,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
456445
spec.FullStatisticID = stat.StatisticID
457446
}
458447

459-
// For now, this loop should iterate only once, as we only
460-
// handle single-column partial statistics.
461-
// TODO(faizaanmadhani): Add support for multi-column partial stats next
448+
// For now, this loop should iterate only once, as we only handle
449+
// single-column partial statistics.
450+
// TODO(#94076): add support for creating multi-column stats.
462451
for i, colID := range reqStat.columns {
463452
colIdx, ok := colIdxMap.Get(colID)
464453
if !ok {
@@ -497,16 +486,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
497486
sketchSpec = append(sketchSpec, spec)
498487
}
499488
return dsp.createAndAttachSamplers(
500-
ctx,
501-
p,
502-
desc,
503-
tableStats,
504-
details,
505-
sampledColumnIDs,
506-
jobID,
507-
reqStats,
508-
sketchSpec, invSketchSpec,
509-
numIndexes, curIndex), nil
489+
ctx, p, desc, tableStats, details, sampledColumnIDs, jobID, reqStats,
490+
sketchSpec, invSketchSpec, numIndexes, curIndex,
491+
), nil
510492
}
511493

512494
func (dsp *DistSQLPlanner) createStatsPlan(
@@ -748,16 +730,9 @@ func (dsp *DistSQLPlanner) createStatsPlan(
748730
}
749731

750732
return dsp.createAndAttachSamplers(
751-
ctx,
752-
p,
753-
desc,
754-
tableStats,
755-
details,
756-
sampledColumnIDs,
757-
jobID,
758-
reqStats,
759-
sketchSpecs, invSketchSpecs,
760-
numIndexes, curIndex), nil
733+
ctx, p, desc, tableStats, details, sampledColumnIDs, jobID, reqStats,
734+
sketchSpecs, invSketchSpecs, numIndexes, curIndex,
735+
), nil
761736
}
762737

763738
// createPlanForCreateStats creates the DistSQL plan to perform the table stats

pkg/sql/logictest/testdata/logic_test/constrained_stats

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22

33
# Tests for creating partial table statistics with a WHERE clause.
44

5-
statement ok
6-
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false
7-
85
statement ok
96
CREATE TABLE products (
107
id INT PRIMARY KEY,

pkg/sql/logictest/testdata/logic_test/distsql_automatic_partial_stats

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,6 @@
22

33
# Test a simple update and insert case for partial statistics
44

5-
# Disable automatic stats
6-
statement ok
7-
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false
8-
95
statement ok
106
SET CLUSTER SETTING sql.stats.automatic_partial_collection.min_stale_rows = 5
117

pkg/sql/opt/cat/utils.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
1414
"github.com/cockroachdb/cockroach/pkg/sql/sem/idxtype"
1515
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
16+
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
1617
"github.com/cockroachdb/cockroach/pkg/util/encoding"
1718
"github.com/cockroachdb/cockroach/pkg/util/treeprinter"
1819
"github.com/cockroachdb/errors"
@@ -340,3 +341,20 @@ func MaybeMarkRedactable(unsafe string, markRedactable bool) string {
340341
}
341342
return unsafe
342343
}
344+
345+
// FindLatestFullStat finds the most recent full statistic that can be used for
346+
// planning and returns the index to be used with tab.Statistic(). If such
347+
// doesn't exist (meaning that either there are no full stats altogether or that
348+
// the present ones cannot be used based on the session variables), then
349+
// tab.StatisticCount() is returned.
350+
func FindLatestFullStat(tab Table, sd *sessiondata.SessionData) int {
351+
// Stats are ordered with most recent first.
352+
var first int
353+
for first < tab.StatisticCount() &&
354+
(tab.Statistic(first).IsPartial() ||
355+
(tab.Statistic(first).IsMerged() && !sd.OptimizerUseMergedPartialStatistics) ||
356+
(tab.Statistic(first).IsForecast() && !sd.OptimizerUseForecasts)) {
357+
first++
358+
}
359+
return first
360+
}

pkg/sql/opt/exec/execbuilder/relational.go

Lines changed: 34 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -410,34 +410,24 @@ func (b *Builder) maybeAnnotateWithEstimates(node exec.Node, e memo.RelExpr) {
410410
}
411411
if scan, ok := e.(*memo.ScanExpr); ok {
412412
tab := b.mem.Metadata().Table(scan.Table)
413-
if tab.StatisticCount() > 0 {
414-
// The first stat is the most recent full one.
415-
var first int
416-
for first < tab.StatisticCount() &&
417-
(tab.Statistic(first).IsPartial() ||
418-
(tab.Statistic(first).IsMerged() && !b.evalCtx.SessionData().OptimizerUseMergedPartialStatistics) ||
419-
(tab.Statistic(first).IsForecast() && !b.evalCtx.SessionData().OptimizerUseForecasts)) {
420-
first++
413+
first := cat.FindLatestFullStat(tab, b.evalCtx.SessionData())
414+
if first < tab.StatisticCount() {
415+
stat := tab.Statistic(first)
416+
val.TableStatsRowCount = stat.RowCount()
417+
if val.TableStatsRowCount == 0 {
418+
val.TableStatsRowCount = 1
421419
}
422-
423-
if first < tab.StatisticCount() {
424-
stat := tab.Statistic(first)
425-
val.TableStatsRowCount = stat.RowCount()
426-
if val.TableStatsRowCount == 0 {
427-
val.TableStatsRowCount = 1
428-
}
429-
val.TableStatsCreatedAt = stat.CreatedAt()
430-
val.LimitHint = scan.RequiredPhysical().LimitHint
431-
val.Forecast = stat.IsForecast()
432-
if val.Forecast {
433-
val.ForecastAt = stat.CreatedAt()
434-
// Find the first non-forecast full stat.
435-
for i := first + 1; i < tab.StatisticCount(); i++ {
436-
nextStat := tab.Statistic(i)
437-
if !nextStat.IsPartial() && !nextStat.IsForecast() {
438-
val.TableStatsCreatedAt = nextStat.CreatedAt()
439-
break
440-
}
420+
val.TableStatsCreatedAt = stat.CreatedAt()
421+
val.LimitHint = scan.RequiredPhysical().LimitHint
422+
val.Forecast = stat.IsForecast()
423+
if val.Forecast {
424+
val.ForecastAt = stat.CreatedAt()
425+
// Find the first non-forecast full stat.
426+
for i := first + 1; i < tab.StatisticCount(); i++ {
427+
nextStat := tab.Statistic(i)
428+
if !nextStat.IsPartial() && !nextStat.IsForecast() {
429+
val.TableStatsCreatedAt = nextStat.CreatedAt()
430+
break
441431
}
442432
}
443433
}
@@ -898,26 +888,23 @@ func (b *Builder) buildScan(scan *memo.ScanExpr) (_ execPlan, outputCols colOrdM
898888
b.TotalScanRows += stats.RowCount
899889
b.ScanCounts[exec.ScanWithStatsCount]++
900890

901-
// The first stat is the most recent full one. Check if it was a forecast.
902-
var first int
903-
for first < tab.StatisticCount() && tab.Statistic(first).IsPartial() {
904-
first++
905-
}
891+
sd := b.evalCtx.SessionData()
892+
first := cat.FindLatestFullStat(tab, sd)
906893
if first < tab.StatisticCount() && tab.Statistic(first).IsForecast() {
907-
if b.evalCtx.SessionData().OptimizerUseForecasts {
908-
b.ScanCounts[exec.ScanWithStatsForecastCount]++
894+
b.ScanCounts[exec.ScanWithStatsForecastCount]++
909895

910-
// Calculate time since the forecast (or negative time until the forecast).
911-
nanosSinceStatsForecasted := timeutil.Since(tab.Statistic(first).CreatedAt())
912-
if nanosSinceStatsForecasted.Abs() > b.NanosSinceStatsForecasted.Abs() {
913-
b.NanosSinceStatsForecasted = nanosSinceStatsForecasted
914-
}
915-
}
916-
// Find the first non-forecast full stat.
917-
for first < tab.StatisticCount() &&
918-
(tab.Statistic(first).IsPartial() || tab.Statistic(first).IsForecast()) {
919-
first++
896+
// Calculate time since the forecast (or negative time until the forecast).
897+
nanosSinceStatsForecasted := timeutil.Since(tab.Statistic(first).CreatedAt())
898+
if nanosSinceStatsForecasted.Abs() > b.NanosSinceStatsForecasted.Abs() {
899+
b.NanosSinceStatsForecasted = nanosSinceStatsForecasted
920900
}
901+
902+
// Since currently 'first' points at the forecast, then usage of the
903+
// forecasts must be enabled, so in order to find the first full
904+
// non-forecast stat, we'll temporarily disable their usage.
905+
sd.OptimizerUseForecasts = false
906+
first = cat.FindLatestFullStat(tab, sd)
907+
sd.OptimizerUseForecasts = true
921908
}
922909

923910
if first < tab.StatisticCount() {
@@ -945,8 +932,9 @@ func (b *Builder) buildScan(scan *memo.ScanExpr) (_ execPlan, outputCols colOrdM
945932
}
946933

947934
var params exec.ScanParams
948-
params, outputCols, err = b.scanParams(tab, &scan.ScanPrivate,
949-
scan.Relational(), scan.RequiredPhysical(), statsCreatedAt)
935+
params, outputCols, err = b.scanParams(
936+
tab, &scan.ScanPrivate, scan.Relational(), scan.RequiredPhysical(), statsCreatedAt,
937+
)
950938
if err != nil {
951939
return execPlan{}, colOrdMap{}, err
952940
}

0 commit comments

Comments
 (0)