diff --git a/modules/python/clusterloader2/cri/config/config.yaml b/modules/python/clusterloader2/cri/config/config.yaml index 5079b7c10a..bf1573b404 100644 --- a/modules/python/clusterloader2/cri/config/config.yaml +++ b/modules/python/clusterloader2/cri/config/config.yaml @@ -21,6 +21,7 @@ name: resource-consumer {{$registryEndpoint := DefaultParam .CL2_REGISTRY_ENDPOINT "akscritelescope.azure.io" }} {{$osType := DefaultParam .CL2_OS_TYPE "linux"}} {{$scrapeKubelets := DefaultParam .CL2_SCRAPE_KUBELETS false}} +{{$scrapeContainerd := DefaultParam .CL2_SCRAPE_CONTAINERD false}} {{$hostNetwork := DefaultParam .CL2_HOST_NETWORK "true"}} namespace: @@ -68,6 +69,13 @@ steps: action: start {{end}} + {{if $scrapeContainerd}} + - module: + path: /containerd-measurements.yaml + params: + action: start + {{end}} + {{range $j := Loop $steps}} - name: Create deployment {{$j}} phases: @@ -143,6 +151,13 @@ steps: action: gather {{end}} + {{if $scrapeContainerd}} + - module: + path: /containerd-measurements.yaml + params: + action: gather + {{end}} + {{range $j := Loop $steps}} - name: Deleting deployments {{$j}} phases: diff --git a/modules/python/clusterloader2/cri/config/containerd-measurements.yaml b/modules/python/clusterloader2/cri/config/containerd-measurements.yaml new file mode 100644 index 0000000000..c64324c525 --- /dev/null +++ b/modules/python/clusterloader2/cri/config/containerd-measurements.yaml @@ -0,0 +1,29 @@ +{{$action := .action}} # start, gather + +steps: + - name: {{$action}} Containerd Measurements + measurements: + - Identifier: ContainerdCriImagePullingThroughput + Method: GenericPrometheusQuery + Params: + action: {{$action}} + metricName: ContainerdCriImagePullingThroughput + metricVersion: v1 + unit: MB/s + queries: + # Weighted average throughput per image pull (nodes with more pulls have more weight) + - name: Avg + query: sum(rate(containerd_cri_image_pulling_throughput_sum{nodepool=~"userpool.*"}[%v])) / sum(rate(containerd_cri_image_pulling_throughput_count{nodepool=~"userpool.*"}[%v])) + # Unweighted average - each node contributes equally regardless of pull count + - name: AvgPerNode + query: avg(sum by (instance) (rate(containerd_cri_image_pulling_throughput_sum{nodepool=~"userpool.*"}[%v])) / sum by (instance) (rate(containerd_cri_image_pulling_throughput_count{nodepool=~"userpool.*"}[%v]))) + # Number of successful image pull observations + - name: Count + query: sum(containerd_cri_image_pulling_throughput_count{nodepool=~"userpool.*"}) + # Cluster level percentiles - throughput distribution across nodes + - name: Perc50 + query: quantile(0.5, sum by (instance) (rate(containerd_cri_image_pulling_throughput_sum{nodepool=~"userpool.*"}[%v])) / sum by (instance) (rate(containerd_cri_image_pulling_throughput_count{nodepool=~"userpool.*"}[%v]))) + - name: Perc90 + query: quantile(0.9, sum by (instance) (rate(containerd_cri_image_pulling_throughput_sum{nodepool=~"userpool.*"}[%v])) / sum by (instance) (rate(containerd_cri_image_pulling_throughput_count{nodepool=~"userpool.*"}[%v]))) + - name: Perc99 + query: quantile(0.99, sum by (instance) (rate(containerd_cri_image_pulling_throughput_sum{nodepool=~"userpool.*"}[%v])) / sum by (instance) (rate(containerd_cri_image_pulling_throughput_count{nodepool=~"userpool.*"}[%v]))) diff --git a/modules/python/clusterloader2/cri/cri.py b/modules/python/clusterloader2/cri/cri.py index 20b9c68586..e4c79f657b 100644 --- a/modules/python/clusterloader2/cri/cri.py +++ b/modules/python/clusterloader2/cri/cri.py @@ -17,7 +17,7 @@ def override_config_clusterloader2( node_count, node_per_step, max_pods, repeats, operation_timeout, load_type, scale_enabled, pod_startup_latency_threshold, provider, - registry_endpoint, os_type, scrape_kubelets, host_network, override_file): + registry_endpoint, os_type, scrape_kubelets, scrape_containerd, host_network, override_file): client = KubernetesClient(os.path.expanduser("~/.kube/config")) nodes = client.get_nodes(label_selector="cri-resource-consume=true") if len(nodes) == 0: @@ -91,14 +91,19 @@ def override_config_clusterloader2( file.write(f"CL2_REGISTRY_ENDPOINT: {registry_endpoint}\n") file.write(f"CL2_OS_TYPE: {os_type}\n") file.write(f"CL2_SCRAPE_KUBELETS: {str(scrape_kubelets).lower()}\n") + file.write(f"CL2_SCRAPE_CONTAINERD: {str(scrape_containerd).lower()}\n") + if scrape_containerd: + file.write("CONTAINERD_SCRAPE_INTERVAL: 15s\n") file.write(f"CL2_HOST_NETWORK: {str(host_network).lower()}\n") file.close() -def execute_clusterloader2(cl2_image, cl2_config_dir, cl2_report_dir, kubeconfig, provider, scrape_kubelets): +def execute_clusterloader2(cl2_image, cl2_config_dir, cl2_report_dir, kubeconfig, provider, scrape_kubelets, scrape_containerd): run_cl2_command(kubeconfig, cl2_image, cl2_config_dir, cl2_report_dir, provider, overrides=True, enable_prometheus=True, - tear_down_prometheus=False, scrape_kubelets=scrape_kubelets) + tear_down_prometheus=False, scrape_kubelets=scrape_kubelets, scrape_containerd=scrape_containerd) +# Note: verify_measurement only checks kubelet metrics (accessible via node proxy endpoint). +# Containerd metrics are only available via Prometheus and cannot be verified here. def verify_measurement(): client = KubernetesClient(os.path.expanduser("~/.kube/config")) nodes = client.get_nodes(label_selector="cri-resource-consume=true") @@ -266,6 +271,13 @@ def main(): default=False, help="Whether to scrape kubelets", ) + parser_override.add_argument( + "--scrape_containerd", + type=str2bool, + choices=[True, False], + default=False, + help="Whether to scrape containerd", + ) parser_override.add_argument( "--host_network", type=str2bool, @@ -302,6 +314,13 @@ def main(): default=False, help="Whether to scrape kubelets", ) + parser_execute.add_argument( + "--scrape_containerd", + type=str2bool, + choices=[True, False], + default=False, + help="Whether to scrape containerd", + ) # Sub-command for collect_clusterloader2 parser_collect = subparsers.add_parser( @@ -366,6 +385,7 @@ def main(): args.registry_endpoint, args.os_type, args.scrape_kubelets, + args.scrape_containerd, args.host_network, args.cl2_override_file, ) @@ -377,6 +397,7 @@ def main(): args.kubeconfig, args.provider, args.scrape_kubelets, + args.scrape_containerd, ) elif args.command == "collect": collect_clusterloader2( diff --git a/modules/python/tests/test_cri.py b/modules/python/tests/test_cri.py index 1155ca0b4a..8ea6d2649d 100644 --- a/modules/python/tests/test_cri.py +++ b/modules/python/tests/test_cri.py @@ -64,6 +64,7 @@ def test_override_config_clusterloader2(self, mock_kubernetes_client, mock_open) os_type="linux", scrape_kubelets=True, host_network=True, + scrape_containerd=False, override_file="/mock/override.yaml" ) @@ -120,6 +121,7 @@ def test_override_config_clusterloader2_host_network_false(self, mock_kubernetes os_type="linux", scrape_kubelets=False, host_network=False, + scrape_containerd=False, override_file="/mock/override.yaml" ) @@ -138,13 +140,14 @@ def test_execute_clusterloader2(self, mock_run_cl2_command): cl2_report_dir="/mock/report", kubeconfig="/mock/kubeconfig", provider="aks", - scrape_kubelets=True + scrape_kubelets=True, + scrape_containerd=False ) # Verify the command execution mock_run_cl2_command.assert_called_once_with( "/mock/kubeconfig", "mock-image", "/mock/config", "/mock/report", "aks", - overrides=True, enable_prometheus=True, tear_down_prometheus=False, scrape_kubelets=True + overrides=True, enable_prometheus=True, tear_down_prometheus=False, scrape_kubelets=True, scrape_containerd=False ) @patch('clusterloader2.cri.cri.KubernetesClient') @@ -235,12 +238,13 @@ def test_override_command(self, mock_override): "--os_type", "linux", "--scrape_kubelets", "False", "--host_network", "False", + "--scrape_containerd", "False", "--cl2_override_file", "/tmp/override.yaml" ] with patch.object(sys, 'argv', test_args): main() mock_override.assert_called_once_with( - 5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "test registry endpoint", "linux", False, False, "/tmp/override.yaml" + 5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "test registry endpoint", "linux", False, False, False, "/tmp/override.yaml" ) @patch("clusterloader2.cri.cri.override_config_clusterloader2") @@ -265,7 +269,7 @@ def test_override_command_default_host_network(self, mock_override): with patch.object(sys, 'argv', test_args): main() mock_override.assert_called_once_with( - 5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "test registry endpoint", "linux", False, True, "/tmp/override.yaml" + 5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "test registry endpoint", "linux", False, False, True, "/tmp/override.yaml" ) @patch("clusterloader2.cri.cri.execute_clusterloader2") @@ -277,13 +281,14 @@ def test_execute_command(self, mock_execute): "--cl2_report_dir", "/reports", "--kubeconfig", "/home/user/.kube/config", "--provider", "gcp", - "--scrape_kubelets", "True" + "--scrape_kubelets", "True", + "--scrape_containerd", "False" ] with patch.object(sys, 'argv', test_args): main() mock_execute.assert_called_once_with( "gcr.io/cl2:latest", "/configs", "/reports", - "/home/user/.kube/config", "gcp", True + "/home/user/.kube/config", "gcp", True, False ) @patch("clusterloader2.cri.cri.collect_clusterloader2") diff --git a/pipelines/perf-eval/ACR Benchmark/image-pull-n10.yml b/pipelines/perf-eval/ACR Benchmark/image-pull-n10.yml new file mode 100644 index 0000000000..b4c97f0813 --- /dev/null +++ b/pipelines/perf-eval/ACR Benchmark/image-pull-n10.yml @@ -0,0 +1,41 @@ +trigger: none +schedules: + - cron: "0 */4 * * *" + displayName: "Every 4 Hour" + branches: + include: + - main + always: true + +variables: + SCENARIO_TYPE: perf-eval + SCENARIO_NAME: image-pull-n10 + +stages: + - stage: azure_eastus2_image_pull + dependsOn: [] + jobs: + - template: /jobs/competitive-test.yml + parameters: + cloud: azure + regions: + - eastus2 + engine: clusterloader2 + engine_input: + image: "ghcr.io/azure/clusterloader2:v20250513" + topology: cri-resource-consume + matrix: + image-pull-10pods: + node_count: 10 + max_pods: 30 + repeats: 1 + operation_timeout: 3m + load_type: memory + scrape_containerd: True + scrape_kubelets: True + kubernetes_version: "1.34" + pod_startup_latency_threshold: 20s + max_parallel: 1 + credential_type: service_connection + ssh_key_enabled: false + timeout_in_minutes: 60 diff --git a/scenarios/perf-eval/image-pull-n10/README.md b/scenarios/perf-eval/image-pull-n10/README.md new file mode 100644 index 0000000000..5b9df159e6 --- /dev/null +++ b/scenarios/perf-eval/image-pull-n10/README.md @@ -0,0 +1,81 @@ +# image-pull-n10 + +## Overview + +Measures containerd image pulling throughput (MB/s) and network plugin operation metrics using the CRI module with `scrape_containerd: True`. Uses the `cri-resource-consume` topology. + +## Infrastructure + +| Component | Configuration | +|-----------|---------------| +| Cloud Provider | Azure | +| Cluster SKU | Standard | +| Network Plugin | Azure CNI Overlay | +| Default Node Pool | 3 x Standard_D4s_v3 | +| Prometheus Pool | 1 x Standard_D8s_v3 | +| User Pool | 10 x Standard_D4s_v3 | + +## Test Workload + +| Component | Value | +|-----------|-------| +| Registry | Azure Container Registry (`akscritelescope.azurecr.io`) | +| Image | `e2e-test-images/resource-consumer:1.13` | +| Image Size | ~50MB | + +## Metrics Collected + +### ContainerdCriImagePullingThroughput + +Image pull throughput (MB/s) with the following aggregations: + +| Metric | Description | +|--------|-------------| +| **Avg** | Weighted average throughput per image pull | +| **AvgPerNode** | Unweighted average - each node contributes equally | +| **Count** | Total number of image pulls | +| **Perc50** | 50th percentile (median) throughput across nodes | +| **Perc90** | 90th percentile throughput across nodes | +| **Perc99** | 99th percentile throughput across nodes | + +## Known Limitations + +### Cannot Use histogram_quantile() Per Node + +Using Prometheus `histogram_quantile()` on per-node throughput data always returns `10` (the maximum bucket boundary) regardless of actual throughput values. This happens because: + +- The histogram has fixed bucket boundaries: `0.5, 1, 2, 4, 6, 8, 10` MB/s +- When actual throughput exceeds 10 MB/s, all samples fall into the `+Inf` bucket +- `histogram_quantile()` can only interpolate within defined buckets, so it caps at `10` + +**Current Approach**: Instead of `histogram_quantile()` per node, we use weighted average (`_sum / _count`) per node, then compute percentiles across the node averages. + +### Per-Node Metrics May Return "no samples" + +The per-node metrics (`AvgPerNode`, `Perc50`, `Perc90`, `Perc99`) may return "no samples" while aggregate metrics (`Avg`, `Count`) work correctly. This is caused by Prometheus `rate()` function requiring **at least 2 data points** within the query window. + +**Root Cause**: If image pulls complete faster than the Prometheus scrape interval (default 15s), only one data point is collected per pull operation. The `rate()` function cannot compute a rate from a single sample, resulting in empty per-node results. + +**Why Aggregate Metrics Work**: `Avg` and `Count` use `sum()` which aggregates samples across all pods/nodes before applying `rate()`, accumulating enough data points within the window. + +**Workaround Options**: +- Increase scrape frequency (may impact cluster performance) +- Use larger images that take longer to pull +- Rely on aggregate metrics (`Avg`, `Count`) for throughput analysis + +### Metric Includes Unpack Time + +The `containerd_cri_image_pulling_throughput` metric measures **total image size divided by total pull time**, which includes both: +- Image layer download time +- Image layer decompression/unpack time + +This is not a pure network throughput metric. See [containerd source](https://github.com/containerd/containerd/blob/main/internal/cri/server/images/image_pull.go). + +### verify_measurement() Cannot Check Containerd Metrics + +The CRI module's `verify_measurement()` function only validates kubelet metrics (accessible via Kubernetes node proxy endpoint at `/api/v1/nodes/{node}/proxy/metrics`). Containerd metrics are only available through the Prometheus server and cannot be verified through this endpoint. + +## References + +- [Best Practices](../../../docs/best-practices.md) +- [Test Scenario Implementation Guide](../../../docs/test-scenario-implementation-guide.md) diff --git a/scenarios/perf-eval/image-pull-n10/terraform-inputs/azure.tfvars b/scenarios/perf-eval/image-pull-n10/terraform-inputs/azure.tfvars new file mode 100644 index 0000000000..e4699c3d8f --- /dev/null +++ b/scenarios/perf-eval/image-pull-n10/terraform-inputs/azure.tfvars @@ -0,0 +1,64 @@ +scenario_type = "perf-eval" +scenario_name = "image-pull-n10" +deletion_delay = "1h" +owner = "acr" + +network_config_list = [ + { + role = "client" + vnet_name = "imgpull-vnet" + vnet_address_space = "10.0.0.0/9" + subnet = [ + { + name = "imgpull-subnet-1" + address_prefix = "10.0.0.0/16" + } + ] + network_security_group_name = "" + nic_public_ip_associations = [] + nsr_rules = [] + } +] + +aks_config_list = [ + { + role = "client" + aks_name = "img-pull-10" + dns_prefix = "imgpull" + subnet_name = "imgpull-vnet" + sku_tier = "Standard" + network_profile = { + network_plugin = "azure" + network_plugin_mode = "overlay" + pod_cidr = "10.0.0.0/9" + service_cidr = "192.168.0.0/16" + dns_service_ip = "192.168.0.10" + } + default_node_pool = { + name = "default" + node_count = 3 + vm_size = "Standard_D4ds_v5" + os_disk_type = "Managed" + only_critical_addons_enabled = true + temporary_name_for_rotation = "defaulttmp" + } + extra_node_pool = [ + { + name = "prompool" + node_count = 1 + auto_scaling_enabled = false + vm_size = "Standard_D4ds_v5" + os_disk_type = "Managed" + node_labels = { "prometheus" = "true" } + }, + { + name = "userpool" + node_count = 10 + auto_scaling_enabled = false + vm_size = "Standard_D4ds_v5" + os_disk_type = "Managed" + node_labels = { "cri-resource-consume" = "true" } + } + ] + } +] diff --git a/scenarios/perf-eval/image-pull-n10/terraform-test-inputs/azure.json b/scenarios/perf-eval/image-pull-n10/terraform-test-inputs/azure.json new file mode 100644 index 0000000000..e5f2ac736c --- /dev/null +++ b/scenarios/perf-eval/image-pull-n10/terraform-test-inputs/azure.json @@ -0,0 +1,4 @@ +{ + "run_id": "test-run", + "region": "eastus2" +} diff --git a/steps/engine/clusterloader2/cri/execute.yml b/steps/engine/clusterloader2/cri/execute.yml index 422cbe2356..f70bebd701 100644 --- a/steps/engine/clusterloader2/cri/execute.yml +++ b/steps/engine/clusterloader2/cri/execute.yml @@ -25,6 +25,7 @@ steps: --registry_endpoint ${REGISTRY_ENDPOINT:-"akscritelescope.azurecr.io"} \ --os_type ${OS_TYPE:-linux} \ --scrape_kubelets ${SCRAPE_KUBELETS:-False} \ + --scrape_containerd ${SCRAPE_CONTAINERD:-False} \ --host_network ${HOST_NETWORK:-True} \ --cl2_override_file ${CL2_CONFIG_DIR}/overrides.yaml PYTHONPATH=$PYTHONPATH:$(pwd) python3 $PYTHON_SCRIPT_FILE execute \ @@ -33,7 +34,8 @@ steps: --cl2_report_dir $CL2_REPORT_DIR \ --kubeconfig ${HOME}/.kube/config \ --provider $CLOUD \ - --scrape_kubelets ${SCRAPE_KUBELETS:-False} + --scrape_kubelets ${SCRAPE_KUBELETS:-False} \ + --scrape_containerd ${SCRAPE_CONTAINERD:-False} workingDirectory: modules/python env: ${{ if eq(parameters.cloud, 'azure') }}: