Skip to content

Commit 0ca5102

Browse files
committed
sql/stats: miscellaneous minor cleanup
This commit contains a bunch of minor cleanups I noticed while reading the stats related code: - remove no longer used `ImportStatsName` - remove redundant disabling of auto stats in logic tests (the framework itself does it already) - fix the pre-allocation slice for out types - update TODOs with Faizaan's name to the corresponding issue numbers - wrap some comments, remove a couple stale / redundant ones, collapse some lines. Release note: None
1 parent 9901daf commit 0ca5102

File tree

8 files changed

+41
-79
lines changed

8 files changed

+41
-79
lines changed

pkg/jobs/jobspb/wrap.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,6 @@ const AutoStatsName = "__auto__"
151151
// automatically.
152152
const AutoPartialStatsName = "__auto_partial__"
153153

154-
// ImportStatsName is the name to use for statistics created automatically
155-
// during import.
156-
const ImportStatsName = "__import__"
157-
158154
// ForecastStatsName is the name to use for statistic forecasts.
159155
const ForecastStatsName = "__forecast__"
160156

pkg/sql/create_stats.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro
294294
virtColEnabled := statsOnVirtualCols.Get(n.p.ExecCfg().SV())
295295
// Disable multi-column stats and deleting stats if partial statistics at
296296
// the extremes are requested.
297-
// TODO(faizaanmadhani): Add support for multi-column stats.
297+
// TODO(#94076): add support for creating multi-column stats.
298298
var multiColEnabled bool
299299
if !n.Options.UsingExtremes {
300300
multiColEnabled = stats.MultiColumnStatisticsClusterMode.Get(n.p.ExecCfg().SV())

pkg/sql/distsql_plan_stats.go

Lines changed: 36 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -137,14 +137,10 @@ func computeNumberSamples(ctx context.Context, numRows uint64, st *cluster.Setti
137137
minSampleSize = minAutoHistogramSamples.Default()
138138
}
139139

140-
numSamples := math.Max(
141-
math.Min(
142-
582.0*math.Pow(float64(numRows), 0.29),
143-
float64(maxSampleSize),
144-
),
140+
return uint32(max(
141+
min(582.0*math.Pow(float64(numRows), 0.29), float64(maxSampleSize)),
145142
float64(minSampleSize),
146-
)
147-
return uint32(numSamples)
143+
))
148144
}
149145

150146
func (dsp *DistSQLPlanner) createAndAttachSamplers(
@@ -167,7 +163,6 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(
167163
if autoStatsFractionStaleRowsForTable, ok := desc.AutoStatsFractionStaleRows(); ok {
168164
overhead = autoStatsFractionStaleRowsForTable
169165
}
170-
// Convert to a signed integer first to make the linter happy.
171166
if details.UsingExtremes {
172167
rowsExpected = uint64(int64(
173168
// The total expected number of rows is the estimated number of stale
@@ -188,23 +183,19 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(
188183
sampler := &execinfrapb.SamplerSpec{
189184
Sketches: sketchSpec,
190185
InvertedSketches: invSketchSpec,
186+
MaxFractionIdle: details.MaxFractionIdle,
191187
}
192-
sampler.MaxFractionIdle = details.MaxFractionIdle
193-
// For partial statistics this loop should only iterate once
194-
// since we only support one reqStat at a time.
188+
// For partial statistics this loop should only iterate once since we only
189+
// support one reqStat at a time.
195190
for _, s := range reqStats {
196191
if s.histogram {
197192
var histogramSamplesCount uint32
198193
if tableSampleCount, ok := desc.HistogramSamplesCount(); ok {
199194
histogramSamplesCount = tableSampleCount
200-
} else if clusterSampleCount := histogramSamples.Get(&dsp.st.SV); clusterSampleCount != histogramSamples.Default() {
195+
} else if clusterSampleCount := histogramSamples.Get(&dsp.st.SV); clusterSampleCount != 0 {
201196
histogramSamplesCount = uint32(clusterSampleCount)
202197
} else {
203-
histogramSamplesCount = computeNumberSamples(
204-
ctx,
205-
rowsExpected,
206-
dsp.st,
207-
)
198+
histogramSamplesCount = computeNumberSamples(ctx, rowsExpected, dsp.st)
208199
log.Dev.Infof(ctx, "using computed sample size of %d for histogram construction", histogramSamplesCount)
209200
}
210201
sampler.SampleSize = histogramSamplesCount
@@ -219,7 +210,7 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(
219210

220211
// The sampler outputs the original columns plus a rank column, five
221212
// sketch columns, and two inverted histogram columns.
222-
outTypes := make([]*types.T, 0, len(p.GetResultTypes())+5)
213+
outTypes := make([]*types.T, 0, len(p.GetResultTypes())+8)
223214
outTypes = append(outTypes, p.GetResultTypes()...)
224215
// An INT column for the rank of each row.
225216
outTypes = append(outTypes, types.Int)
@@ -292,18 +283,15 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
292283
// so we only support one requested stat at a time here.
293284
if len(reqStats) > 1 {
294285
return nil, unimplemented.NewWithIssue(
295-
128904,
296-
"cannot process multiple partial statistics requests at once",
286+
128904, "cannot process multiple partial statistics requests at once",
297287
)
298288
}
299289

300290
reqStat := reqStats[0]
301-
302291
if len(reqStat.columns) > 1 {
303-
// TODO (faizaanmadhani): Add support for creating multi-column stats
292+
// TODO(#94076): add support for creating multi-column stats.
304293
return nil, pgerror.Newf(pgcode.FeatureNotSupported, "multi-column partial statistics are not currently supported")
305294
}
306-
307295
if !reqStat.histogram {
308296
return nil, pgerror.Newf(pgcode.FeatureNotSupported, "partial statistics without histograms are not supported")
309297
}
@@ -324,9 +312,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
324312
return nil, err
325313
}
326314

327-
// Calculate the column we need to scan
328-
// TODO (faizaanmadhani): Iterate through all columns in a requested stat when
329-
// when we add support for multi-column statistics.
315+
// Calculate the column we need to scan.
316+
// TODO(#94076): iterate through all columns in a requested stat when we add
317+
// support for multi-column statistics.
330318
var colCfg scanColumnsConfig
331319
colCfg.wantedColumns = append(colCfg.wantedColumns, column.GetID())
332320

@@ -341,19 +329,18 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
341329
if err != nil {
342330
return nil, err
343331
}
344-
// Map the ColumnIDs to their ordinals in scan.cols
345-
// This loop should only iterate once, since we only
346-
// handle single column partial statistics.
347-
// TODO(faizaanmadhani): Add support for multi-column partial stats next
332+
// Map the ColumnIDs to their ordinals in scan.cols. This loop should only
333+
// iterate once, since we only handle single column partial statistics.
334+
// TODO(#94076): add support for creating multi-column stats.
348335
var colIdxMap catalog.TableColMap
349336
for i, c := range scan.catalogCols {
350337
colIdxMap.Set(c.GetID(), i)
351338
}
352339

353340
var stat *stats.TableStatistic
354341
// Find the statistic from the newest table statistic for our column that is
355-
// not partial and not forecasted. The first one we find will be the latest
356-
// due to the newest to oldest ordering property of the cache.
342+
// not partial, not merged, and not forecasted. The first one we find will
343+
// be the latest due to the newest to oldest ordering property of the cache.
357344
for _, t := range tableStats {
358345
if len(t.ColumnIDs) == 1 && column.GetID() == t.ColumnIDs[0] &&
359346
!t.IsPartial() && !t.IsMerged() && !t.IsForecast() {
@@ -380,7 +367,8 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
380367
return nil, pgerror.Newf(
381368
pgcode.ObjectNotInPrerequisiteState,
382369
"column %s does not have a prior statistic",
383-
column.GetName())
370+
column.GetName(),
371+
)
384372
}
385373
if len(stat.Histogram) == 1 && stat.Histogram[0].UpperBound == tree.DNull {
386374
return nil, pgerror.Newf(
@@ -398,8 +386,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
398386
planCtx.EvalContext(), planCtx.ExtendedEvalCtx.Codec, desc, scan.index,
399387
)
400388

401-
lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(ctx,
402-
planCtx.EvalContext(), stat.Histogram)
389+
lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(
390+
ctx, planCtx.EvalContext(), stat.Histogram,
391+
)
403392
if err != nil {
404393
return nil, err
405394
}
@@ -413,13 +402,13 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
413402
}
414403
prevLowerBound = lowerBound
415404

416-
extremesSpans, err := bounds.ConstructUsingExtremesSpans(lowerBound,
417-
upperBound, scan.index)
405+
extremesSpans, err := bounds.ConstructUsingExtremesSpans(
406+
lowerBound, upperBound, scan.index,
407+
)
418408
if err != nil {
419409
return nil, err
420410
}
421411
predicate = bounds.ConstructUsingExtremesPredicate(lowerBound, upperBound, column.GetName())
422-
// Get roachpb.Spans from constraint.Spans
423412
scan.spans, err = sb.SpansFromConstraintSpan(&extremesSpans, span.NoopSplitter())
424413
if err != nil {
425414
return nil, err
@@ -456,9 +445,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
456445
spec.FullStatisticID = stat.StatisticID
457446
}
458447

459-
// For now, this loop should iterate only once, as we only
460-
// handle single-column partial statistics.
461-
// TODO(faizaanmadhani): Add support for multi-column partial stats next
448+
// For now, this loop should iterate only once, as we only handle
449+
// single-column partial statistics.
450+
// TODO(#94076): add support for creating multi-column stats.
462451
for i, colID := range reqStat.columns {
463452
colIdx, ok := colIdxMap.Get(colID)
464453
if !ok {
@@ -497,16 +486,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
497486
sketchSpec = append(sketchSpec, spec)
498487
}
499488
return dsp.createAndAttachSamplers(
500-
ctx,
501-
p,
502-
desc,
503-
tableStats,
504-
details,
505-
sampledColumnIDs,
506-
jobID,
507-
reqStats,
508-
sketchSpec, invSketchSpec,
509-
numIndexes, curIndex), nil
489+
ctx, p, desc, tableStats, details, sampledColumnIDs, jobID, reqStats,
490+
sketchSpec, invSketchSpec, numIndexes, curIndex,
491+
), nil
510492
}
511493

512494
func (dsp *DistSQLPlanner) createStatsPlan(
@@ -748,16 +730,9 @@ func (dsp *DistSQLPlanner) createStatsPlan(
748730
}
749731

750732
return dsp.createAndAttachSamplers(
751-
ctx,
752-
p,
753-
desc,
754-
tableStats,
755-
details,
756-
sampledColumnIDs,
757-
jobID,
758-
reqStats,
759-
sketchSpecs, invSketchSpecs,
760-
numIndexes, curIndex), nil
733+
ctx, p, desc, tableStats, details, sampledColumnIDs, jobID, reqStats,
734+
sketchSpecs, invSketchSpecs, numIndexes, curIndex,
735+
), nil
761736
}
762737

763738
// createPlanForCreateStats creates the DistSQL plan to perform the table stats

pkg/sql/logictest/testdata/logic_test/constrained_stats

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22

33
# Tests for creating partial table statistics with a WHERE clause.
44

5-
statement ok
6-
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false
7-
85
statement ok
96
CREATE TABLE products (
107
id INT PRIMARY KEY,

pkg/sql/logictest/testdata/logic_test/distsql_automatic_partial_stats

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,6 @@
22

33
# Test a simple update and insert case for partial statistics
44

5-
# Disable automatic stats
6-
statement ok
7-
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false
8-
95
statement ok
106
SET CLUSTER SETTING sql.stats.automatic_partial_collection.min_stale_rows = 5
117

pkg/sql/opt/optbuilder/misc_statements.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,9 @@ func (b *Builder) buildCreateStatistics(n *tree.CreateStats, inScope *scope) (ou
178178
tabMeta := b.addTable(t, &tn)
179179
tabID = tabMeta.MetaID
180180
} else if _, ok = ds.(cat.View); ok {
181-
panic(pgerror.New(
182-
pgcode.WrongObjectType, "cannot create statistics on views"))
181+
panic(pgerror.New(pgcode.WrongObjectType, "cannot create statistics on views"))
183182
} else {
184-
panic(pgerror.Newf(pgcode.WrongObjectType,
185-
"cannot create statistics on %T", ds))
183+
panic(pgerror.Newf(pgcode.WrongObjectType, "cannot create statistics on %T", ds))
186184
}
187185

188186
indexOrd := cat.PrimaryIndex

pkg/sql/stats/forecast.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ func forecastColumnStatistics(
292292
forecast = &TableStatistic{
293293
TableStatisticProto: TableStatisticProto{
294294
TableID: tableID,
295-
StatisticID: 0, // TODO(michae2): Add support for SHOW HISTOGRAM.
295+
StatisticID: 0, // TODO(#86358): add support for SHOW HISTOGRAM.
296296
Name: jobspb.ForecastStatsName,
297297
ColumnIDs: columnIDs,
298298
CreatedAt: at,

pkg/sql/stats/merge.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ func mergePartialStatistic(
259259
mergedTableStatistic := &TableStatistic{
260260
TableStatisticProto: TableStatisticProto{
261261
TableID: fullStat.TableID,
262-
StatisticID: 0, // TODO (faizaanmadhani): Add support for SHOW HISTOGRAM.
262+
StatisticID: 0, // TODO(#86358): add support for SHOW HISTOGRAM.
263263
Name: jobspb.MergedStatsName,
264264
ColumnIDs: fullStat.ColumnIDs,
265265
CreatedAt: partialStat.CreatedAt,

0 commit comments

Comments
 (0)