Skip to content

Commit 1d9c7a2

Browse files
committed
changefeedccl: add static labels to changefeed stage metrics
This commit adds static labels to changefeed stage metrics. Resolves: #156290 Release note: None
1 parent 6d96fe6 commit 1d9c7a2

File tree

2 files changed

+48
-17
lines changed

2 files changed

+48
-17
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,6 +1817,7 @@ layers:
18171817
derivative: NON_NEGATIVE_DERIVATIVE
18181818
- name: changefeed.stage.checkpoint_job_progress.latency
18191819
exported_name: changefeed_stage_checkpoint_job_progress_latency
1820+
labeled_name: 'changefeed.stage.latency{name: checkpoint_job_progress}'
18201821
description: 'Latency of the changefeed stage: checkpointing job progress'
18211822
y_axis_label: Latency
18221823
type: HISTOGRAM
@@ -1825,6 +1826,7 @@ layers:
18251826
derivative: NONE
18261827
- name: changefeed.stage.downstream_client_send.latency
18271828
exported_name: changefeed_stage_downstream_client_send_latency
1829+
labeled_name: 'changefeed.stage.latency{name: downstream_client_send}'
18281830
description: 'Latency of the changefeed stage: flushing messages from the sink''s client to its downstream. This includes sends that failed for most but not all sinks.'
18291831
y_axis_label: Latency
18301832
type: HISTOGRAM
@@ -1833,6 +1835,7 @@ layers:
18331835
derivative: NONE
18341836
- name: changefeed.stage.emit_row.latency
18351837
exported_name: changefeed_stage_emit_row_latency
1838+
labeled_name: 'changefeed.stage.latency{name: emit_row}'
18361839
description: 'Latency of the changefeed stage: emitting row to sink'
18371840
y_axis_label: Latency
18381841
type: HISTOGRAM
@@ -1841,6 +1844,7 @@ layers:
18411844
derivative: NONE
18421845
- name: changefeed.stage.encode.latency
18431846
exported_name: changefeed_stage_encode_latency
1847+
labeled_name: 'changefeed.stage.latency{name: encode}'
18441848
description: 'Latency of the changefeed stage: encoding data'
18451849
y_axis_label: Latency
18461850
type: HISTOGRAM
@@ -1849,6 +1853,7 @@ layers:
18491853
derivative: NONE
18501854
- name: changefeed.stage.frontier_persistence.latency
18511855
exported_name: changefeed_stage_frontier_persistence_latency
1856+
labeled_name: 'changefeed.stage.latency{name: frontier_persistence}'
18521857
description: 'Latency of the changefeed stage: persisting frontier to job info'
18531858
y_axis_label: Latency
18541859
type: HISTOGRAM
@@ -1857,6 +1862,7 @@ layers:
18571862
derivative: NONE
18581863
- name: changefeed.stage.kv_feed_buffer.latency
18591864
exported_name: changefeed_stage_kv_feed_buffer_latency
1865+
labeled_name: 'changefeed.stage.latency{name: kv_feed_buffer}'
18601866
description: 'Latency of the changefeed stage: waiting to buffer kv events'
18611867
y_axis_label: Latency
18621868
type: HISTOGRAM
@@ -1865,6 +1871,7 @@ layers:
18651871
derivative: NONE
18661872
- name: changefeed.stage.kv_feed_wait_for_table_event.latency
18671873
exported_name: changefeed_stage_kv_feed_wait_for_table_event_latency
1874+
labeled_name: 'changefeed.stage.latency{name: kv_feed_wait_for_table_event}'
18681875
description: 'Latency of the changefeed stage: waiting for a table schema event to join to the kv event'
18691876
y_axis_label: Latency
18701877
type: HISTOGRAM
@@ -1873,6 +1880,7 @@ layers:
18731880
derivative: NONE
18741881
- name: changefeed.stage.pts.create.latency
18751882
exported_name: changefeed_stage_pts_create_latency
1883+
labeled_name: 'changefeed.stage.pts.latency{name: create}'
18761884
description: 'Latency of the changefeed stage: Time spent creating protected timestamp records on changefeed creation'
18771885
y_axis_label: Latency
18781886
type: HISTOGRAM
@@ -1881,6 +1889,7 @@ layers:
18811889
derivative: NONE
18821890
- name: changefeed.stage.pts.manage.latency
18831891
exported_name: changefeed_stage_pts_manage_latency
1892+
labeled_name: 'changefeed.stage.pts.latency{name: manage}'
18841893
description: 'Latency of the changefeed stage: Time spent successfully managing protected timestamp records on highwater advance, including time spent creating new protected timestamps when needed'
18851894
y_axis_label: Latency
18861895
type: HISTOGRAM
@@ -1889,6 +1898,7 @@ layers:
18891898
derivative: NONE
18901899
- name: changefeed.stage.pts.manage_error.latency
18911900
exported_name: changefeed_stage_pts_manage_error_latency
1901+
labeled_name: 'changefeed.stage.pts.latency{name: manage_error}'
18921902
description: 'Latency of the changefeed stage: Time spent managing protected timestamp when we eventually error'
18931903
y_axis_label: Latency
18941904
type: HISTOGRAM
@@ -1897,6 +1907,7 @@ layers:
18971907
derivative: NONE
18981908
- name: changefeed.stage.rangefeed_buffer_checkpoint.latency
18991909
exported_name: changefeed_stage_rangefeed_buffer_checkpoint_latency
1910+
labeled_name: 'changefeed.stage.latency{name: rangefeed_buffer_checkpoint}'
19001911
description: 'Latency of the changefeed stage: buffering rangefeed checkpoint events'
19011912
y_axis_label: Latency
19021913
type: HISTOGRAM
@@ -1905,6 +1916,7 @@ layers:
19051916
derivative: NONE
19061917
- name: changefeed.stage.rangefeed_buffer_value.latency
19071918
exported_name: changefeed_stage_rangefeed_buffer_value_latency
1919+
labeled_name: 'changefeed.stage.latency{name: rangefeed_buffer_value}'
19081920
description: 'Latency of the changefeed stage: buffering rangefeed value events'
19091921
y_axis_label: Latency
19101922
type: HISTOGRAM

pkg/ccl/changefeedccl/timers/timers.go

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package timers
77

88
import (
9+
"fmt"
910
"time"
1011

1112
"github.com/cockroachdb/cockroach/pkg/util/metric"
@@ -34,34 +35,52 @@ func (*Timers) MetricStruct() {}
3435
var _ metric.Struct = &Timers{}
3536

3637
func New(histogramWindow time.Duration) *Timers {
37-
histogramOptsFor := func(name, desc string) metric.HistogramOptions {
38+
const (
39+
stagePrefix = "changefeed.stage"
40+
latencySuffix = "latency"
41+
ptsSubCategory = "pts"
42+
)
43+
44+
histogramOptsFor := func(name, labeledName, labelName, desc string) metric.HistogramOptions {
3845
return metric.HistogramOptions{
3946
Metadata: metric.Metadata{
40-
Name: name,
41-
Help: desc,
42-
Unit: metric.Unit_NANOSECONDS,
43-
Measurement: "Latency",
47+
Name: name,
48+
Help: desc,
49+
Unit: metric.Unit_NANOSECONDS,
50+
Measurement: "Latency",
51+
LabeledName: labeledName,
52+
StaticLabels: metric.MakeLabelPairs(metric.LabelName, labelName),
4453
},
4554
Duration: histogramWindow,
4655
Buckets: prometheus.ExponentialBucketsRange(float64(1*time.Microsecond), float64(1*time.Hour), 60),
4756
Mode: metric.HistogramModePrometheus,
4857
}
4958
}
5059

60+
stageOpts := func(name, labelName, desc string) metric.HistogramOptions {
61+
labeledName := fmt.Sprintf("%s.%s", stagePrefix, latencySuffix)
62+
return histogramOptsFor(name, labeledName, labelName, desc)
63+
}
64+
65+
ptsStageOpts := func(name, labelName, desc string) metric.HistogramOptions {
66+
labeledName := fmt.Sprintf("%s.%s.%s", stagePrefix, ptsSubCategory, latencySuffix)
67+
return histogramOptsFor(name, labeledName, labelName, desc)
68+
}
69+
5170
b := aggmetric.MakeBuilder("scope")
5271
return &Timers{
53-
CheckpointJobProgress: b.Histogram(histogramOptsFor("changefeed.stage.checkpoint_job_progress.latency", "Latency of the changefeed stage: checkpointing job progress")),
54-
FrontierPersistence: b.Histogram(histogramOptsFor("changefeed.stage.frontier_persistence.latency", "Latency of the changefeed stage: persisting frontier to job info")),
55-
Encode: b.Histogram(histogramOptsFor("changefeed.stage.encode.latency", "Latency of the changefeed stage: encoding data")),
56-
EmitRow: b.Histogram(histogramOptsFor("changefeed.stage.emit_row.latency", "Latency of the changefeed stage: emitting row to sink")),
57-
DownstreamClientSend: b.Histogram(histogramOptsFor("changefeed.stage.downstream_client_send.latency", "Latency of the changefeed stage: flushing messages from the sink's client to its downstream. This includes sends that failed for most but not all sinks.")),
58-
KVFeedWaitForTableEvent: b.Histogram(histogramOptsFor("changefeed.stage.kv_feed_wait_for_table_event.latency", "Latency of the changefeed stage: waiting for a table schema event to join to the kv event")),
59-
KVFeedBuffer: b.Histogram(histogramOptsFor("changefeed.stage.kv_feed_buffer.latency", "Latency of the changefeed stage: waiting to buffer kv events")),
60-
RangefeedBufferValue: b.Histogram(histogramOptsFor("changefeed.stage.rangefeed_buffer_value.latency", "Latency of the changefeed stage: buffering rangefeed value events")),
61-
RangefeedBufferCheckpoint: b.Histogram(histogramOptsFor("changefeed.stage.rangefeed_buffer_checkpoint.latency", "Latency of the changefeed stage: buffering rangefeed checkpoint events")),
62-
PTSManage: b.Histogram(histogramOptsFor("changefeed.stage.pts.manage.latency", "Latency of the changefeed stage: Time spent successfully managing protected timestamp records on highwater advance, including time spent creating new protected timestamps when needed")),
63-
PTSManageError: b.Histogram(histogramOptsFor("changefeed.stage.pts.manage_error.latency", "Latency of the changefeed stage: Time spent managing protected timestamp when we eventually error")),
64-
PTSCreate: b.Histogram(histogramOptsFor("changefeed.stage.pts.create.latency", "Latency of the changefeed stage: Time spent creating protected timestamp records on changefeed creation")),
72+
CheckpointJobProgress: b.Histogram(stageOpts("changefeed.stage.checkpoint_job_progress.latency", "checkpoint_job_progress", "Latency of the changefeed stage: checkpointing job progress")),
73+
FrontierPersistence: b.Histogram(stageOpts("changefeed.stage.frontier_persistence.latency", "frontier_persistence", "Latency of the changefeed stage: persisting frontier to job info")),
74+
Encode: b.Histogram(stageOpts("changefeed.stage.encode.latency", "encode", "Latency of the changefeed stage: encoding data")),
75+
EmitRow: b.Histogram(stageOpts("changefeed.stage.emit_row.latency", "emit_row", "Latency of the changefeed stage: emitting row to sink")),
76+
DownstreamClientSend: b.Histogram(stageOpts("changefeed.stage.downstream_client_send.latency", "downstream_client_send", "Latency of the changefeed stage: flushing messages from the sink's client to its downstream. This includes sends that failed for most but not all sinks.")),
77+
KVFeedWaitForTableEvent: b.Histogram(stageOpts("changefeed.stage.kv_feed_wait_for_table_event.latency", "kv_feed_wait_for_table_event", "Latency of the changefeed stage: waiting for a table schema event to join to the kv event")),
78+
KVFeedBuffer: b.Histogram(stageOpts("changefeed.stage.kv_feed_buffer.latency", "kv_feed_buffer", "Latency of the changefeed stage: waiting to buffer kv events")),
79+
RangefeedBufferValue: b.Histogram(stageOpts("changefeed.stage.rangefeed_buffer_value.latency", "rangefeed_buffer_value", "Latency of the changefeed stage: buffering rangefeed value events")),
80+
RangefeedBufferCheckpoint: b.Histogram(stageOpts("changefeed.stage.rangefeed_buffer_checkpoint.latency", "rangefeed_buffer_checkpoint", "Latency of the changefeed stage: buffering rangefeed checkpoint events")),
81+
PTSManage: b.Histogram(ptsStageOpts("changefeed.stage.pts.manage.latency", "manage", "Latency of the changefeed stage: Time spent successfully managing protected timestamp records on highwater advance, including time spent creating new protected timestamps when needed")),
82+
PTSManageError: b.Histogram(ptsStageOpts("changefeed.stage.pts.manage_error.latency", "manage_error", "Latency of the changefeed stage: Time spent managing protected timestamp when we eventually error")),
83+
PTSCreate: b.Histogram(ptsStageOpts("changefeed.stage.pts.create.latency", "create", "Latency of the changefeed stage: Time spent creating protected timestamp records on changefeed creation")),
6584
}
6685
}
6786

0 commit comments

Comments
 (0)