1- # Yaml for creating and configuring the drt-scale cluster. This also configures Datadog.
2- # Build the roachprod and roachtest binaries (using --cross) before running this script
1+ # YAML for creating and configuring the drt-scale cluster. This also configures Datadog.
2+ # Build the drtprod and roachtest binaries (using --cross=linux) before running this script
3+ #
4+ # Planned Execution Date: 2025-12
35environment :
46 ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT : 622274581499-compute@developer.gserviceaccount.com
57 ROACHPROD_DNS : drt.crdb.io
@@ -9,20 +11,28 @@ environment:
911 CLUSTER : drt-scale-300
1012 WORKLOAD_CLUSTER : workload-scale-300
1113 CLUSTER_NODES : 300
12- TOTAL_PARTITIONS : 15
14+ RACKS : 300
15+ NODES_PER_ZONE : 100
16+ TOTAL_PARTITIONS : 300
17+ PARTITION_TYPE : partitions
1318 WORKLOAD_NODES : 15
19+ VERSION : v25.4.1 # TBD - update once a custom binary is available (also update stage command)
20+ WAREHOUSES : 4000000
1421
1522dependent_file_locations :
23+ - pkg/cmd/drtprod/scripts/setup_dmsetup_disk_staller
1624 - pkg/cmd/drtprod/scripts/setup_datadog_cluster
1725 - pkg/cmd/drtprod/scripts/setup_datadog_workload
1826 - pkg/cmd/drtprod/scripts/tpcc_init.sh
1927 - pkg/cmd/drtprod/scripts/generate_tpcc_run.sh
28+ - pkg/cmd/drtprod/scripts/populate_workload_keys.sh
2029 - artifacts/roachtest
2130 - artifacts/drtprod
2231
2332targets :
2433 # crdb cluster specs
25- - target_name : $CLUSTER
34+ - target_name : $CLUSTER cluster initialisation
35+ notify_progress : true
2636 steps :
2737 - command : create
2838 args :
@@ -31,7 +41,7 @@ targets:
3141 clouds : gce
3242 gce-managed : true
3343 gce-enable-multiple-stores : true
34- gce-zones : " us-central1-a:30 ,us-central1-b:30 ,us-central1-c:30 "
44+ gce-zones : " us-central1-a:$NODES_PER_ZONE ,us-central1-b:$NODES_PER_ZONE ,us-central1-c:$NODES_PER_ZONE "
3545 nodes : $CLUSTER_NODES
3646 gce-machine-type : n2-standard-16
3747 local-ssd : false
@@ -43,36 +53,38 @@ targets:
4353 lifetime : 8760h
4454 gce-image : " ubuntu-2204-jammy-v20250112"
4555 - command : sync
56+ skip_notification : true
4657 flags :
4758 clouds : gce
59+ - script : " pkg/cmd/drtprod/scripts/setup_dmsetup_disk_staller"
60+ skip_notification : true
4861 - command : stage
62+ skip_notification : true
4963 args :
5064 - $CLUSTER
5165 - release
52- - v25.2.0-rc.1 # for libgeos
53- - command : stage
54- args :
55- - $CLUSTER
56- - cockroach
57- - release-25.2.1-rc
66+ - $VERSION
5867 - script : " pkg/cmd/drtprod/scripts/setup_datadog_cluster"
68+ skip_notification : true
5969 - command : start
6070 args :
6171 - $CLUSTER
6272 - " --binary"
6373 - " ./cockroach"
74+ - " --env" # from defaults
75+ - " COCKROACH_TESTING_FORCE_RELEASE_BRANCH=true"
76+ - " --env" # from defaults
77+ - " COCKROACH_INTERNAL_DISABLE_METAMORPHIC_TESTING=true"
78+ - " --env" # for MMA test case scenario
79+ - " COCKROACH_ALLOW_MMA=true"
6480 flags :
6581 # add flag to set provisioned throughput on each store according to their cloud provider limits
6682 enable-fluent-sink : true
6783 store-count : 2
6884 args : --wal-failover=among-stores
6985 restart : false
7086 sql-port : 26257
71- - command : run
72- args :
73- - $CLUSTER
74- - --
75- - " sudo systemctl unmask cron.service ; sudo systemctl enable cron.service ; echo \" crontab -l ; echo '@reboot sleep 100 && ~/cockroach.sh' | crontab -\" > t.sh ; sh t.sh ; rm t.sh"
87+ racks : $RACKS
7688 - command : sql
7789 args :
7890 - $CLUSTER:1
@@ -104,7 +116,8 @@ targets:
104116 - command : stage
105117 args :
106118 - $WORKLOAD_CLUSTER
107- - cockroach
119+ - release
120+ - $VERSION
108121 - command : put
109122 args :
110123 - $WORKLOAD_CLUSTER
@@ -116,42 +129,42 @@ targets:
116129 - artifacts/drtprod
117130 - script : " pkg/cmd/drtprod/scripts/setup_datadog_workload"
118131 - target_name : post_tasks
132+ notify_progress : true
119133 dependent_targets :
120- - $CLUSTER
134+ - $CLUSTER cluster initialisation
121135 - $WORKLOAD_CLUSTER
122136 steps :
123137 - script : rm
138+ skip_notification : true
124139 args :
125140 - -rf
126141 - certs-$CLUSTER
127- - command : get
142+ - command : fetch-certs
143+ skip_notification : true
128144 args :
129145 - $CLUSTER:1
130- - certs
131146 - certs-$CLUSTER
132147 - command : put
148+ skip_notification : true
133149 args :
134150 - $WORKLOAD_CLUSTER
135151 - certs-$CLUSTER
136152 - certs
137- - command : ssh
138- args :
139- - $WORKLOAD_CLUSTER
140- - --
141- - chmod
142- - 600
143- - ' ./certs/*'
144153 - script : " pkg/cmd/drtprod/scripts/tpcc_init.sh"
145154 args :
146155 - cct_tpcc
147156 - false
148157 flags :
149- warehouses : 4000000
158+ partitions : $TOTAL_PARTITIONS
159+ replicate-static-columns : true
160+ partition-strategy : leases
161+ warehouses : $WAREHOUSES
150162 db : cct_tpcc
151163 - script : pkg/cmd/drtprod/scripts/populate_workload_keys.sh
152164 - target_name : tpcc_run
165+ notify_progress : true
153166 dependent_targets :
154- - $CLUSTER
167+ - $CLUSTER cluster initialisation
155168 - $WORKLOAD_CLUSTER
156169 steps :
157170 - script : " pkg/cmd/drtprod/scripts/generate_tpcc_run.sh"
@@ -160,12 +173,11 @@ targets:
160173 - false
161174 flags :
162175 db : cct_tpcc
163- warehouses : 4000000
164- active-warehouses : 500000
165- active-workers : 2000
166- conns : 2000
167- max-rate : 2500
168- workers : 500000
176+ warehouses : $WAREHOUSES
177+ active-warehouses : 266666
178+ workers : 266666
179+ conns : 1000
180+ active-workers : 1000
169181 duration : 12h
170182 ramp : 1h
171183 wait : 0
0 commit comments