Skip to content

Commit 29d8485

Browse files
craig[bot]KeithCh
andcommitted
Merge #156901
156901: changefeedccl: add static labels to changefeed stage metrics r=asg0451,log-head a=KeithCh This commit adds static labels to changefeed stage metrics. Resolves: #156290 Release note: None Co-authored-by: Keith Chow <keith.chow@cockroachlabs.com>
2 parents 222df72 + 1d9c7a2 commit 29d8485

File tree

2 files changed

+48
-17
lines changed

2 files changed

+48
-17
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1820,6 +1820,7 @@ layers:
18201820
derivative: NON_NEGATIVE_DERIVATIVE
18211821
- name: changefeed.stage.checkpoint_job_progress.latency
18221822
exported_name: changefeed_stage_checkpoint_job_progress_latency
1823+
labeled_name: 'changefeed.stage.latency{name: checkpoint_job_progress}'
18231824
description: 'Latency of the changefeed stage: checkpointing job progress'
18241825
y_axis_label: Latency
18251826
type: HISTOGRAM
@@ -1828,6 +1829,7 @@ layers:
18281829
derivative: NONE
18291830
- name: changefeed.stage.downstream_client_send.latency
18301831
exported_name: changefeed_stage_downstream_client_send_latency
1832+
labeled_name: 'changefeed.stage.latency{name: downstream_client_send}'
18311833
description: 'Latency of the changefeed stage: flushing messages from the sink''s client to its downstream. This includes sends that failed for most but not all sinks.'
18321834
y_axis_label: Latency
18331835
type: HISTOGRAM
@@ -1836,6 +1838,7 @@ layers:
18361838
derivative: NONE
18371839
- name: changefeed.stage.emit_row.latency
18381840
exported_name: changefeed_stage_emit_row_latency
1841+
labeled_name: 'changefeed.stage.latency{name: emit_row}'
18391842
description: 'Latency of the changefeed stage: emitting row to sink'
18401843
y_axis_label: Latency
18411844
type: HISTOGRAM
@@ -1844,6 +1847,7 @@ layers:
18441847
derivative: NONE
18451848
- name: changefeed.stage.encode.latency
18461849
exported_name: changefeed_stage_encode_latency
1850+
labeled_name: 'changefeed.stage.latency{name: encode}'
18471851
description: 'Latency of the changefeed stage: encoding data'
18481852
y_axis_label: Latency
18491853
type: HISTOGRAM
@@ -1852,6 +1856,7 @@ layers:
18521856
derivative: NONE
18531857
- name: changefeed.stage.frontier_persistence.latency
18541858
exported_name: changefeed_stage_frontier_persistence_latency
1859+
labeled_name: 'changefeed.stage.latency{name: frontier_persistence}'
18551860
description: 'Latency of the changefeed stage: persisting frontier to job info'
18561861
y_axis_label: Latency
18571862
type: HISTOGRAM
@@ -1860,6 +1865,7 @@ layers:
18601865
derivative: NONE
18611866
- name: changefeed.stage.kv_feed_buffer.latency
18621867
exported_name: changefeed_stage_kv_feed_buffer_latency
1868+
labeled_name: 'changefeed.stage.latency{name: kv_feed_buffer}'
18631869
description: 'Latency of the changefeed stage: waiting to buffer kv events'
18641870
y_axis_label: Latency
18651871
type: HISTOGRAM
@@ -1868,6 +1874,7 @@ layers:
18681874
derivative: NONE
18691875
- name: changefeed.stage.kv_feed_wait_for_table_event.latency
18701876
exported_name: changefeed_stage_kv_feed_wait_for_table_event_latency
1877+
labeled_name: 'changefeed.stage.latency{name: kv_feed_wait_for_table_event}'
18711878
description: 'Latency of the changefeed stage: waiting for a table schema event to join to the kv event'
18721879
y_axis_label: Latency
18731880
type: HISTOGRAM
@@ -1876,6 +1883,7 @@ layers:
18761883
derivative: NONE
18771884
- name: changefeed.stage.pts.create.latency
18781885
exported_name: changefeed_stage_pts_create_latency
1886+
labeled_name: 'changefeed.stage.pts.latency{name: create}'
18791887
description: 'Latency of the changefeed stage: Time spent creating protected timestamp records on changefeed creation'
18801888
y_axis_label: Latency
18811889
type: HISTOGRAM
@@ -1884,6 +1892,7 @@ layers:
18841892
derivative: NONE
18851893
- name: changefeed.stage.pts.manage.latency
18861894
exported_name: changefeed_stage_pts_manage_latency
1895+
labeled_name: 'changefeed.stage.pts.latency{name: manage}'
18871896
description: 'Latency of the changefeed stage: Time spent successfully managing protected timestamp records on highwater advance, including time spent creating new protected timestamps when needed'
18881897
y_axis_label: Latency
18891898
type: HISTOGRAM
@@ -1892,6 +1901,7 @@ layers:
18921901
derivative: NONE
18931902
- name: changefeed.stage.pts.manage_error.latency
18941903
exported_name: changefeed_stage_pts_manage_error_latency
1904+
labeled_name: 'changefeed.stage.pts.latency{name: manage_error}'
18951905
description: 'Latency of the changefeed stage: Time spent managing protected timestamp when we eventually error'
18961906
y_axis_label: Latency
18971907
type: HISTOGRAM
@@ -1900,6 +1910,7 @@ layers:
19001910
derivative: NONE
19011911
- name: changefeed.stage.rangefeed_buffer_checkpoint.latency
19021912
exported_name: changefeed_stage_rangefeed_buffer_checkpoint_latency
1913+
labeled_name: 'changefeed.stage.latency{name: rangefeed_buffer_checkpoint}'
19031914
description: 'Latency of the changefeed stage: buffering rangefeed checkpoint events'
19041915
y_axis_label: Latency
19051916
type: HISTOGRAM
@@ -1908,6 +1919,7 @@ layers:
19081919
derivative: NONE
19091920
- name: changefeed.stage.rangefeed_buffer_value.latency
19101921
exported_name: changefeed_stage_rangefeed_buffer_value_latency
1922+
labeled_name: 'changefeed.stage.latency{name: rangefeed_buffer_value}'
19111923
description: 'Latency of the changefeed stage: buffering rangefeed value events'
19121924
y_axis_label: Latency
19131925
type: HISTOGRAM

pkg/ccl/changefeedccl/timers/timers.go

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package timers
77

88
import (
9+
"fmt"
910
"time"
1011

1112
"github.com/cockroachdb/cockroach/pkg/util/metric"
@@ -34,34 +35,52 @@ func (*Timers) MetricStruct() {}
3435
var _ metric.Struct = &Timers{}
3536

3637
func New(histogramWindow time.Duration) *Timers {
37-
histogramOptsFor := func(name, desc string) metric.HistogramOptions {
38+
const (
39+
stagePrefix = "changefeed.stage"
40+
latencySuffix = "latency"
41+
ptsSubCategory = "pts"
42+
)
43+
44+
histogramOptsFor := func(name, labeledName, labelName, desc string) metric.HistogramOptions {
3845
return metric.HistogramOptions{
3946
Metadata: metric.Metadata{
40-
Name: name,
41-
Help: desc,
42-
Unit: metric.Unit_NANOSECONDS,
43-
Measurement: "Latency",
47+
Name: name,
48+
Help: desc,
49+
Unit: metric.Unit_NANOSECONDS,
50+
Measurement: "Latency",
51+
LabeledName: labeledName,
52+
StaticLabels: metric.MakeLabelPairs(metric.LabelName, labelName),
4453
},
4554
Duration: histogramWindow,
4655
Buckets: prometheus.ExponentialBucketsRange(float64(1*time.Microsecond), float64(1*time.Hour), 60),
4756
Mode: metric.HistogramModePrometheus,
4857
}
4958
}
5059

60+
stageOpts := func(name, labelName, desc string) metric.HistogramOptions {
61+
labeledName := fmt.Sprintf("%s.%s", stagePrefix, latencySuffix)
62+
return histogramOptsFor(name, labeledName, labelName, desc)
63+
}
64+
65+
ptsStageOpts := func(name, labelName, desc string) metric.HistogramOptions {
66+
labeledName := fmt.Sprintf("%s.%s.%s", stagePrefix, ptsSubCategory, latencySuffix)
67+
return histogramOptsFor(name, labeledName, labelName, desc)
68+
}
69+
5170
b := aggmetric.MakeBuilder("scope")
5271
return &Timers{
53-
CheckpointJobProgress: b.Histogram(histogramOptsFor("changefeed.stage.checkpoint_job_progress.latency", "Latency of the changefeed stage: checkpointing job progress")),
54-
FrontierPersistence: b.Histogram(histogramOptsFor("changefeed.stage.frontier_persistence.latency", "Latency of the changefeed stage: persisting frontier to job info")),
55-
Encode: b.Histogram(histogramOptsFor("changefeed.stage.encode.latency", "Latency of the changefeed stage: encoding data")),
56-
EmitRow: b.Histogram(histogramOptsFor("changefeed.stage.emit_row.latency", "Latency of the changefeed stage: emitting row to sink")),
57-
DownstreamClientSend: b.Histogram(histogramOptsFor("changefeed.stage.downstream_client_send.latency", "Latency of the changefeed stage: flushing messages from the sink's client to its downstream. This includes sends that failed for most but not all sinks.")),
58-
KVFeedWaitForTableEvent: b.Histogram(histogramOptsFor("changefeed.stage.kv_feed_wait_for_table_event.latency", "Latency of the changefeed stage: waiting for a table schema event to join to the kv event")),
59-
KVFeedBuffer: b.Histogram(histogramOptsFor("changefeed.stage.kv_feed_buffer.latency", "Latency of the changefeed stage: waiting to buffer kv events")),
60-
RangefeedBufferValue: b.Histogram(histogramOptsFor("changefeed.stage.rangefeed_buffer_value.latency", "Latency of the changefeed stage: buffering rangefeed value events")),
61-
RangefeedBufferCheckpoint: b.Histogram(histogramOptsFor("changefeed.stage.rangefeed_buffer_checkpoint.latency", "Latency of the changefeed stage: buffering rangefeed checkpoint events")),
62-
PTSManage: b.Histogram(histogramOptsFor("changefeed.stage.pts.manage.latency", "Latency of the changefeed stage: Time spent successfully managing protected timestamp records on highwater advance, including time spent creating new protected timestamps when needed")),
63-
PTSManageError: b.Histogram(histogramOptsFor("changefeed.stage.pts.manage_error.latency", "Latency of the changefeed stage: Time spent managing protected timestamp when we eventually error")),
64-
PTSCreate: b.Histogram(histogramOptsFor("changefeed.stage.pts.create.latency", "Latency of the changefeed stage: Time spent creating protected timestamp records on changefeed creation")),
72+
CheckpointJobProgress: b.Histogram(stageOpts("changefeed.stage.checkpoint_job_progress.latency", "checkpoint_job_progress", "Latency of the changefeed stage: checkpointing job progress")),
73+
FrontierPersistence: b.Histogram(stageOpts("changefeed.stage.frontier_persistence.latency", "frontier_persistence", "Latency of the changefeed stage: persisting frontier to job info")),
74+
Encode: b.Histogram(stageOpts("changefeed.stage.encode.latency", "encode", "Latency of the changefeed stage: encoding data")),
75+
EmitRow: b.Histogram(stageOpts("changefeed.stage.emit_row.latency", "emit_row", "Latency of the changefeed stage: emitting row to sink")),
76+
DownstreamClientSend: b.Histogram(stageOpts("changefeed.stage.downstream_client_send.latency", "downstream_client_send", "Latency of the changefeed stage: flushing messages from the sink's client to its downstream. This includes sends that failed for most but not all sinks.")),
77+
KVFeedWaitForTableEvent: b.Histogram(stageOpts("changefeed.stage.kv_feed_wait_for_table_event.latency", "kv_feed_wait_for_table_event", "Latency of the changefeed stage: waiting for a table schema event to join to the kv event")),
78+
KVFeedBuffer: b.Histogram(stageOpts("changefeed.stage.kv_feed_buffer.latency", "kv_feed_buffer", "Latency of the changefeed stage: waiting to buffer kv events")),
79+
RangefeedBufferValue: b.Histogram(stageOpts("changefeed.stage.rangefeed_buffer_value.latency", "rangefeed_buffer_value", "Latency of the changefeed stage: buffering rangefeed value events")),
80+
RangefeedBufferCheckpoint: b.Histogram(stageOpts("changefeed.stage.rangefeed_buffer_checkpoint.latency", "rangefeed_buffer_checkpoint", "Latency of the changefeed stage: buffering rangefeed checkpoint events")),
81+
PTSManage: b.Histogram(ptsStageOpts("changefeed.stage.pts.manage.latency", "manage", "Latency of the changefeed stage: Time spent successfully managing protected timestamp records on highwater advance, including time spent creating new protected timestamps when needed")),
82+
PTSManageError: b.Histogram(ptsStageOpts("changefeed.stage.pts.manage_error.latency", "manage_error", "Latency of the changefeed stage: Time spent managing protected timestamp when we eventually error")),
83+
PTSCreate: b.Histogram(ptsStageOpts("changefeed.stage.pts.create.latency", "create", "Latency of the changefeed stage: Time spent creating protected timestamp records on changefeed creation")),
6584
}
6685
}
6786

0 commit comments

Comments
 (0)