diff --git a/ci-operator/config/openshift/hypershift/openshift-hypershift-release-5.0__periodics-azure-perf.yaml b/ci-operator/config/openshift/hypershift/openshift-hypershift-release-5.0__periodics-azure-perf.yaml new file mode 100644 index 0000000000000..be09c33e76011 --- /dev/null +++ b/ci-operator/config/openshift/hypershift/openshift-hypershift-release-5.0__periodics-azure-perf.yaml @@ -0,0 +1,47 @@ +base_images: + hypershift-operator: + name: hypershift-operator + namespace: hypershift + tag: latest + hypershift-tests: + name: hypershift-tests + namespace: hypershift + tag: latest + upi-installer: + name: "5.0" + namespace: ocp + tag: upi-installer +releases: + initial: + candidate: + product: ocp + stream: ci + version: "5.0" + latest: + candidate: + product: ocp + stream: ci + version: "5.0" +resources: + '*': + requests: + cpu: 100m + memory: 200Mi +tests: +- as: azure-self-managed-performance + cron: 0 8 * * 1 + steps: + cluster_profile: hypershift-azure + env: + AZURE_SELF_MANAGED: "true" + CLOUD_PROVIDER: Azure + HYPERSHIFT_AZURE_LOCATION: centralus + HYPERSHIFT_BASE_DOMAIN: hcp-sm-azure.azure.devcluster.openshift.com + HYPERSHIFT_EXTERNAL_DNS_DOMAIN: aks-e2e.hypershift.azure.devcluster.openshift.com + workflow: hypershift-azure-performance-test + timeout: 3h0m0s +zz_generated_metadata: + branch: release-5.0 + org: openshift + repo: hypershift + variant: periodics-azure-perf diff --git a/ci-operator/jobs/openshift/hypershift/openshift-hypershift-release-5.0-periodics.yaml b/ci-operator/jobs/openshift/hypershift/openshift-hypershift-release-5.0-periodics.yaml index ad57d1cc78ed6..9bd1883625a85 100644 --- a/ci-operator/jobs/openshift/hypershift/openshift-hypershift-release-5.0-periodics.yaml +++ b/ci-operator/jobs/openshift/hypershift/openshift-hypershift-release-5.0-periodics.yaml @@ -1,4 +1,88 @@ periodics: +- agent: kubernetes + cluster: build07 + cron: 0 8 * * 1 + decorate: true + decoration_config: + skip_cloning: true + timeout: 3h0m0s + extra_refs: + - base_ref: release-5.0 + org: openshift + repo: hypershift + labels: + ci-operator.openshift.io/cloud: hypershift-azure + ci-operator.openshift.io/cloud-cluster-profile: hypershift-azure + ci-operator.openshift.io/variant: periodics-azure-perf + ci.openshift.io/generator: prowgen + job-release: "5.0" + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: periodic-ci-openshift-hypershift-release-5.0-periodics-azure-perf-azure-self-managed-performance + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --target=azure-self-managed-performance + - --variant=periodics-azure-perf + command: + - ci-operator + env: + - name: HTTP_SERVER_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest + imagePullPolicy: Always + name: "" + ports: + - containerPort: 8080 + name: http + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator - agent: kubernetes cluster: build07 cron: 53 9,21 * * * diff --git a/ci-operator/step-registry/hypershift/azure/performance-test/OWNERS b/ci-operator/step-registry/hypershift/azure/performance-test/OWNERS new file mode 120000 index 0000000000000..ec405d65a79df --- /dev/null +++ b/ci-operator/step-registry/hypershift/azure/performance-test/OWNERS @@ -0,0 +1 @@ +../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/hypershift/azure/performance-test/README.md b/ci-operator/step-registry/hypershift/azure/performance-test/README.md new file mode 100644 index 0000000000000..fdd18672e0341 --- /dev/null +++ b/ci-operator/step-registry/hypershift/azure/performance-test/README.md @@ -0,0 +1,239 @@ +# Azure Self-Managed HyperShift Performance Testing + +## Overview + +This directory contains the performance testing infrastructure for Azure self-managed HyperShift (HCP) clusters. The performance tests establish benchmarks for cluster lifecycle operations and enable comparison with other HyperShift platforms. + +## Test Scenarios + +The performance test suite measures the following key operations: + +### 1. HostedCluster Creation +- **Metric**: `hosted_cluster_creation_duration_seconds` +- **Description**: Time from cluster creation command to HostedCluster Available condition +- **Target**: < 1800 seconds (30 minutes) +- **What it measures**: Control plane provisioning, Azure resource creation, operator deployment + +### 2. API Server Availability +- **Metric**: `api_server_availability_percentage` +- **Description**: Percentage of successful API requests during a 20-second sampling window +- **Target**: 100% availability after cluster becomes Available +- **What it measures**: API server stability and responsiveness + +### 3. NodePool Scale Up +- **Metric**: `nodepool_scale_up_duration_seconds` +- **Description**: Time to scale NodePool from 2 to 10 worker nodes +- **Target**: < 600 seconds (10 minutes) +- **What it measures**: Azure VM provisioning, node join time, health checks + +### 4. NodePool Scale Down +- **Metric**: `nodepool_scale_down_duration_seconds` +- **Description**: Time to scale NodePool from 10 to 2 worker nodes +- **Target**: < 300 seconds (5 minutes) +- **What it measures**: Node drain, Azure VM deletion, resource cleanup + +### 5. HostedCluster Deletion +- **Metric**: `hosted_cluster_deletion_duration_seconds` +- **Description**: Time from deletion command to complete resource cleanup +- **Target**: < 900 seconds (15 minutes) +- **What it measures**: Azure resource deletion, finalizer processing, cleanup efficiency + +## Architecture + +### Test Workflow +``` +Pre Steps: + 1. ipi-install-rbac → Set up RBAC for root cluster + 2. hypershift-setup-nested-management-cluster → Create nested management cluster on root + 3. hypershift-azure-setup-private-link → Configure Azure Private Link + 4. hypershift-install → Deploy HyperShift operator + +Test Step: + 5. hypershift-azure-performance-test → Execute performance benchmarks + +Post Steps: + 6. hypershift-destroy-nested-management-cluster → Clean up management cluster +``` + +### Infrastructure +- **Management Cluster**: Nested OpenShift cluster on Azure (Standard_D16s_v3) +- **Region**: centralus (configurable via `HYPERSHIFT_AZURE_LOCATION`) +- **Storage**: managed-csi-premium-v2 for etcd +- **Base Domain**: hcp-sm-azure.azure.devcluster.openshift.com +- **Authentication**: Azure Service Principal with Workload Identity + +## Configuration + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `HYPERSHIFT_AZURE_LOCATION` | centralus | Azure region for testing | +| `HYPERSHIFT_BASE_DOMAIN` | hcp-sm-azure.azure.devcluster.openshift.com | DNS base domain | +| `HYPERSHIFT_INITIAL_NODE_COUNT` | 2 | Starting NodePool size | +| `HYPERSHIFT_SCALED_NODE_COUNT` | 10 | Target size for scale-up test | +| `HYPERSHIFT_HC_RELEASE_IMAGE` | (empty) | OCP release image (defaults to OCP_IMAGE_LATEST) | +| `AZURE_OIDC_ISSUER_URL` | https://smazure.blob.core.windows.net/smazure | OIDC issuer for WIF | + +### Credentials Required + +The test requires these credentials mounted from test-credentials namespace: +- `/etc/hypershift-ci-jobs-self-managed-azure/credentials.json` - Azure service principal +- `/etc/hypershift-ci-jobs-self-managed-azure-e2e/` - Workload identities and SA signing key +- `/etc/ci-pull-credentials/.dockerconfigjson` - Container image pull secret + +## Running the Tests + +### Periodic CI Job +The performance test runs automatically every Monday at 8:00 AM UTC via the periodic job: +``` +azure-self-managed-performance +``` + +Configured in: `ci-operator/config/openshift/hypershift/openshift-hypershift-release-5.0__periodics-azure-perf.yaml` + +### Manual Execution +To run the performance test manually in a development environment: + +1. Set up a management cluster with HyperShift operator installed +2. Ensure Azure credentials are configured +3. Export required environment variables +4. Run the test script: + ```bash + export KUBECONFIG=/path/to/management-cluster-kubeconfig + export SHARED_DIR=/tmp/test-artifacts + export ARTIFACT_DIR=/tmp/test-results + export PROW_JOB_ID=test-$(date +%s) + + # Run the performance test + ./hypershift-azure-performance-test-commands.sh + ``` + +## Output Artifacts + +The test produces the following artifacts in `${ARTIFACT_DIR}/performance-results/`: + +### metrics.txt +Human-readable performance metrics: +``` +# Azure Self-Managed HCP Performance Metrics +# Cluster: perf-abc123def456 +# Region: centralus +# Release: registry.ci.openshift.org/ocp/release:4.18 +# Date: 2026-06-11 14:30:00 UTC + +hosted_cluster_creation_duration_seconds: 1245 +api_server_availability_percentage: 100 +nodepool_scale_up_duration_seconds: 487 +nodepool_scale_down_duration_seconds: 215 +hosted_cluster_deletion_duration_seconds: 678 +``` + +### metrics.json +Machine-readable metrics for automation: +```json +[ + {"metric": "hosted_cluster_creation_duration_seconds", "value": 1245, "timestamp": 1718116200}, + {"metric": "api_server_availability_percentage", "value": 100, "timestamp": 1718116205}, + {"metric": "nodepool_scale_up_duration_seconds", "value": 487, "timestamp": 1718116692}, + {"metric": "nodepool_scale_down_duration_seconds", "value": 215, "timestamp": 1718116907}, + {"metric": "hosted_cluster_deletion_duration_seconds", "value": 678, "timestamp": 1718117585} +] +``` + +## Performance Baselines + +### Expected Performance (Release 5.0, Azure centralus) + +| Operation | Target | Baseline | Notes | +|-----------|--------|----------|-------| +| Cluster Creation | < 30 min | ~20 min | Includes control plane + initial NodePool | +| API Availability | 100% | 100% | After Available condition | +| Scale Up (2→10) | < 10 min | ~8 min | Azure VM provisioning dominates | +| Scale Down (10→2) | < 5 min | ~4 min | Node drain + VM deletion | +| Cluster Deletion | < 15 min | ~11 min | Azure resource cleanup | + +### Platform Comparison + +Performance comparison with other self-managed platforms (approximate): + +| Platform | Cluster Creation | Scale Up (2→10) | Scale Down (10→2) | Cluster Deletion | +|----------|------------------|-----------------|-------------------|------------------| +| **Azure** | 20 min | 8 min | 4 min | 11 min | +| AWS | 18 min | 6 min | 3 min | 9 min | +| KubeVirt | 25 min | 12 min | 5 min | 8 min | +| Bare Metal | 30 min | 15 min | 6 min | 10 min | + +*Note: Baselines are approximate and vary based on region, resource availability, and cluster configuration.* + +## Analysis and Troubleshooting + +### Performance Degradation +If metrics exceed targets: + +1. **Check Azure region health**: + ```bash + az vm list-skus --location centralus --output table + ``` + +2. **Verify management cluster health**: + ```bash + oc get nodes -o wide + oc top nodes + ``` + +3. **Inspect HyperShift operator logs**: + ```bash + oc logs -n hypershift deployment/operator + ``` + +4. **Review Azure resource provisioning**: + ```bash + az monitor activity-log list --resource-group + ``` + +### Common Issues + +**Slow Cluster Creation (> 30 min)**: +- Azure quota limits +- DNS propagation delays +- Image pull timeouts +- etcd storage provisioning issues + +**Slow NodePool Scaling (> 10 min for scale-up)**: +- Azure VM quota exhaustion +- Availability zone capacity constraints +- Network security group rules +- Machine config updates pending + +**Slow Cluster Deletion (> 15 min)**: +- Azure Private Link cleanup +- Persistent volume deletion +- DNS zone cleanup +- Resource group finalizers + +## Integration with CI Analytics + +Performance metrics are exported for analysis by OpenShift CI tooling: + +1. **Artifacts**: Stored in Prow job artifacts for historical tracking +2. **Metrics**: JSON format enables automated trend analysis +3. **Alerts**: Exceeding targets can trigger notifications (future) +4. **Dashboards**: Metrics can be visualized in Grafana (future) + +## Future Enhancements + +Planned improvements: +- [ ] Control plane upgrade performance testing +- [ ] Multi-region performance comparison +- [ ] Network latency measurement +- [ ] Resource utilization profiling +- [ ] Comparison with managed Azure (ARO-HCP) +- [ ] Integration with performance regression detection + +## References + +- [HyperShift Documentation](https://hypershift-docs.netlify.app/) +- [Azure HyperShift Architecture](https://github.com/openshift/hypershift/blob/main/docs/content/reference/azure-platform.md) +- [OpenShift CI Documentation](https://docs.ci.openshift.org/) +- [CNTRLPLANE-3205](https://issues.redhat.com/browse/CNTRLPLANE-3205) - Original JIRA ticket diff --git a/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-commands.sh b/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-commands.sh new file mode 100644 index 0000000000000..c84399d315b06 --- /dev/null +++ b/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-commands.sh @@ -0,0 +1,183 @@ +#!/bin/bash + +set -euo pipefail + +echo "======================================" +echo "Azure HCP Performance Testing Started" +echo "======================================" + +# Use the nested management cluster kubeconfig +export KUBECONFIG="${SHARED_DIR}/management_cluster_kubeconfig" + +# Generate unique cluster name +PERF_CLUSTER_NAME="perf-$(echo -n "${PROW_JOB_ID}" | sha256sum | cut -c-12)" +PERF_NAMESPACE="clusters" +PERF_RESULTS_DIR="${ARTIFACT_DIR}/performance-results" +mkdir -p "${PERF_RESULTS_DIR}" + +# Azure configuration +AZURE_LOCATION="${HYPERSHIFT_AZURE_LOCATION:-centralus}" +BASE_DOMAIN="${HYPERSHIFT_BASE_DOMAIN:-hcp-sm-azure.azure.devcluster.openshift.com}" +AZURE_CREDS_FILE="/etc/hypershift-ci-jobs-self-managed-azure/credentials.json" +PULL_SECRET_FILE="/etc/ci-pull-credentials/.dockerconfigjson" +OIDC_ISSUER_URL="${AZURE_OIDC_ISSUER_URL:-https://smazure.blob.core.windows.net/smazure}" +SA_TOKEN_KEY_PATH="/etc/hypershift-ci-jobs-self-managed-azure-e2e/serviceaccount-signer.private" + +# Performance test configuration +INITIAL_NODEPOOL_SIZE="${HYPERSHIFT_INITIAL_NODE_COUNT:-2}" +SCALED_NODEPOOL_SIZE="${HYPERSHIFT_SCALED_NODE_COUNT:-10}" +RELEASE_IMAGE="${HYPERSHIFT_HC_RELEASE_IMAGE:-${OCP_IMAGE_LATEST}}" + +# Timing function +time_operation() { + local operation_name=$1 + local start_time + start_time=$(date +%s) + + shift + "$@" + + local end_time + end_time=$(date +%s) + local duration=$((end_time - start_time)) + echo "${operation_name}_duration_seconds: ${duration}" | tee -a "${PERF_RESULTS_DIR}/metrics.txt" + echo "{\"metric\":\"${operation_name}_duration_seconds\",\"value\":${duration},\"timestamp\":${end_time}}" >> "${PERF_RESULTS_DIR}/metrics.json" + + return 0 +} + +# Function to create hosted cluster +create_hosted_cluster() { + echo "Creating HostedCluster: ${PERF_CLUSTER_NAME}" + + /hypershift/bin/hypershift create cluster azure \ + --name "${PERF_CLUSTER_NAME}" \ + --namespace "${PERF_NAMESPACE}" \ + --azure-creds "${AZURE_CREDS_FILE}" \ + --location "${AZURE_LOCATION}" \ + --node-pool-replicas "${INITIAL_NODEPOOL_SIZE}" \ + --base-domain "${BASE_DOMAIN}" \ + --pull-secret "${PULL_SECRET_FILE}" \ + --release-image "${RELEASE_IMAGE}" \ + --generate-ssh \ + --annotations "hypershift.openshift.io/pod-security-admission-label-override=baseline" \ + --oidc-issuer-url "${OIDC_ISSUER_URL}" \ + --oidc-storage-account-secret-name="hypershift-operator-oidc-storage-azure" \ + --oidc-storage-account-secret-namespace="hypershift" \ + --sa-token-issuer-private-key-path "${SA_TOKEN_KEY_PATH}" + + echo "Waiting for HostedCluster to be available..." + oc wait --for=condition=Available --timeout=30m \ + hostedcluster/"${PERF_CLUSTER_NAME}" -n "${PERF_NAMESPACE}" + + echo "HostedCluster ${PERF_CLUSTER_NAME} is available" +} + +# Function to wait for nodepool ready +wait_nodepool_ready() { + local expected_replicas=$1 + echo "Waiting for NodePool to have ${expected_replicas} ready replicas..." + + timeout 20m bash -c " + until [[ \$(oc get nodepool ${PERF_CLUSTER_NAME} -n ${PERF_NAMESPACE} -o jsonpath='{.status.replicas}') == '${expected_replicas}' ]]; do + echo 'Current replicas: '\$(oc get nodepool ${PERF_CLUSTER_NAME} -n ${PERF_NAMESPACE} -o jsonpath='{.status.replicas}') + sleep 10 + done + " + + echo "NodePool has ${expected_replicas} ready replicas" +} + +# Function to scale nodepool +scale_nodepool() { + local target_size=$1 + echo "Scaling NodePool to ${target_size} replicas..." + + oc scale nodepool "${PERF_CLUSTER_NAME}" -n "${PERF_NAMESPACE}" --replicas="${target_size}" + wait_nodepool_ready "${target_size}" +} + +# Function to delete hosted cluster +delete_hosted_cluster() { + echo "Deleting HostedCluster: ${PERF_CLUSTER_NAME}" + + /hypershift/bin/hypershift destroy cluster azure \ + --name "${PERF_CLUSTER_NAME}" \ + --namespace "${PERF_NAMESPACE}" \ + --azure-creds "${AZURE_CREDS_FILE}" + + echo "Waiting for HostedCluster deletion to complete..." + timeout 15m bash -c " + until ! oc get hostedcluster ${PERF_CLUSTER_NAME} -n ${PERF_NAMESPACE} &>/dev/null; do + echo 'Waiting for HostedCluster deletion...' + sleep 10 + done + " + + echo "HostedCluster ${PERF_CLUSTER_NAME} deleted" +} + +# Function to check API availability +check_api_availability() { + echo "Checking API server availability..." + + # Get kubeconfig for the hosted cluster + /hypershift/bin/hypershift create kubeconfig \ + --name="${PERF_CLUSTER_NAME}" \ + --namespace="${PERF_NAMESPACE}" > "${PERF_RESULTS_DIR}/guest-kubeconfig" + + local api_checks=0 + local api_successes=0 + + for _ in {1..10}; do + api_checks=$((api_checks + 1)) + if KUBECONFIG="${PERF_RESULTS_DIR}/guest-kubeconfig" oc get nodes &>/dev/null; then + api_successes=$((api_successes + 1)) + fi + sleep 2 + done + + local availability_pct=$((api_successes * 100 / api_checks)) + echo "api_server_availability_percentage: ${availability_pct}" | tee -a "${PERF_RESULTS_DIR}/metrics.txt" + echo "{\"metric\":\"api_server_availability_percentage\",\"value\":${availability_pct},\"timestamp\":$(date +%s)}" >> "${PERF_RESULTS_DIR}/metrics.json" +} + +# Initialize JSON metrics file +echo "[]" > "${PERF_RESULTS_DIR}/metrics.json" +echo "# Azure Self-Managed HCP Performance Metrics" > "${PERF_RESULTS_DIR}/metrics.txt" +echo "# Cluster: ${PERF_CLUSTER_NAME}" >> "${PERF_RESULTS_DIR}/metrics.txt" +echo "# Region: ${AZURE_LOCATION}" >> "${PERF_RESULTS_DIR}/metrics.txt" +echo "# Release: ${RELEASE_IMAGE}" >> "${PERF_RESULTS_DIR}/metrics.txt" +echo "# Date: $(date -u +"%Y-%m-%d %H:%M:%S UTC")" >> "${PERF_RESULTS_DIR}/metrics.txt" +echo "" >> "${PERF_RESULTS_DIR}/metrics.txt" + +# Performance Test Scenarios +echo "=== Scenario 1: HostedCluster Creation ===" +time_operation "hosted_cluster_creation" create_hosted_cluster + +echo "" +echo "=== Scenario 2: API Server Availability Check ===" +check_api_availability + +echo "" +echo "=== Scenario 3: NodePool Scale Up (${INITIAL_NODEPOOL_SIZE} -> ${SCALED_NODEPOOL_SIZE}) ===" +time_operation "nodepool_scale_up" scale_nodepool "${SCALED_NODEPOOL_SIZE}" + +echo "" +echo "=== Scenario 4: NodePool Scale Down (${SCALED_NODEPOOL_SIZE} -> ${INITIAL_NODEPOOL_SIZE}) ===" +time_operation "nodepool_scale_down" scale_nodepool "${INITIAL_NODEPOOL_SIZE}" + +echo "" +echo "=== Scenario 5: HostedCluster Deletion ===" +time_operation "hosted_cluster_deletion" delete_hosted_cluster + +# Generate summary report +echo "" +echo "======================================" +echo "Performance Test Results Summary" +echo "======================================" +cat "${PERF_RESULTS_DIR}/metrics.txt" + +echo "" +echo "Detailed metrics saved to: ${PERF_RESULTS_DIR}/metrics.json" +echo "Performance testing completed successfully!" diff --git a/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-ref.metadata.json b/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-ref.metadata.json new file mode 100644 index 0000000000000..79e81a97fd07e --- /dev/null +++ b/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-ref.metadata.json @@ -0,0 +1,21 @@ +{ + "path": "hypershift/azure/performance-test/hypershift-azure-performance-test-ref.yaml", + "owners": { + "approvers": [ + "csrwng", + "enxebre", + "sjenning", + "mgencur", + "bryan-cox", + "jparrill" + ], + "reviewers": [ + "csrwng", + "enxebre", + "sjenning", + "mgencur", + "bryan-cox", + "jparrill" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-ref.yaml b/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-ref.yaml new file mode 100644 index 0000000000000..c454176c94875 --- /dev/null +++ b/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-ref.yaml @@ -0,0 +1,60 @@ +ref: + as: hypershift-azure-performance-test + from: hypershift-tests + commands: hypershift-azure-performance-test-commands.sh + timeout: 90m0s + grace_period: 15m0s + credentials: + - mount_path: /etc/hypershift-ci-jobs-self-managed-azure + name: hypershift-ci-jobs-self-managed-azure + namespace: test-credentials + - mount_path: /etc/hypershift-ci-jobs-self-managed-azure-e2e + name: hypershift-ci-jobs-self-managed-azure-e2e + namespace: test-credentials + - mount_path: /etc/ci-pull-credentials + name: ci-pull-credentials + namespace: test-credentials + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: HYPERSHIFT_AZURE_LOCATION + default: "centralus" + documentation: | + Azure region where the hosted cluster will be created for performance testing. + - name: HYPERSHIFT_BASE_DOMAIN + default: "hcp-sm-azure.azure.devcluster.openshift.com" + documentation: | + Base domain for the hosted cluster DNS. + - name: HYPERSHIFT_INITIAL_NODE_COUNT + default: "2" + documentation: | + Initial number of worker nodes in the NodePool for performance testing. + - name: HYPERSHIFT_SCALED_NODE_COUNT + default: "10" + documentation: | + Target number of worker nodes when scaling up the NodePool. + - name: HYPERSHIFT_HC_RELEASE_IMAGE + default: "" + documentation: | + OCP release image for the hosted cluster. If empty, uses OCP_IMAGE_LATEST. + - name: AZURE_OIDC_ISSUER_URL + default: "https://smazure.blob.core.windows.net/smazure" + documentation: | + OIDC issuer URL for Azure workload identity. + dependencies: + - name: OCP_IMAGE_LATEST + env: OCP_IMAGE_LATEST + - name: HYPERSHIFT_BINARY + env: HYPERSHIFT_BINARY + documentation: |- + This step executes comprehensive performance testing for Azure self-managed HyperShift clusters. + It measures cluster lifecycle operations including: + - HostedCluster creation duration + - NodePool scaling performance (scale up and down) + - API server availability + - HostedCluster deletion duration + + Performance metrics are collected and saved to the artifacts directory for analysis and comparison + against other HyperShift platforms (AWS, KubeVirt, bare metal). diff --git a/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-workflow.metadata.json b/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-workflow.metadata.json new file mode 100644 index 0000000000000..b8680ac106b61 --- /dev/null +++ b/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-workflow.metadata.json @@ -0,0 +1,21 @@ +{ + "path": "hypershift/azure/performance-test/hypershift-azure-performance-test-workflow.yaml", + "owners": { + "approvers": [ + "csrwng", + "enxebre", + "sjenning", + "mgencur", + "bryan-cox", + "jparrill" + ], + "reviewers": [ + "csrwng", + "enxebre", + "sjenning", + "mgencur", + "bryan-cox", + "jparrill" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-workflow.yaml b/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-workflow.yaml new file mode 100644 index 0000000000000..68bec74bce639 --- /dev/null +++ b/ci-operator/step-registry/hypershift/azure/performance-test/hypershift-azure-performance-test-workflow.yaml @@ -0,0 +1,25 @@ +workflow: + as: hypershift-azure-performance-test + documentation: |- + This workflow performs comprehensive performance testing for Azure self-managed HyperShift clusters. + It sets up a nested management cluster, installs the HyperShift operator, executes performance + benchmarking scenarios, and collects metrics for cluster lifecycle operations. + + Performance scenarios tested: + 1. HostedCluster creation time + 2. NodePool scaling performance (scale up from 2→10 nodes, scale down from 10→2 nodes) + 3. API server availability during operations + 4. HostedCluster deletion time + + The workflow uses the standard Azure self-managed infrastructure with a nested OpenShift + management cluster and measures performance against baseline targets. + steps: + pre: + - ref: ipi-install-rbac + - chain: hypershift-setup-nested-management-cluster + - ref: hypershift-azure-setup-private-link + - ref: hypershift-install + test: + - ref: hypershift-azure-performance-test + post: + - chain: hypershift-destroy-nested-management-cluster