openshift · mehabhalodiya · Jun 11, 2026 · coderabbitai · Jun 12, 2026
diff --git a/...r/config/openshift/hypershift/openshift-hypershift-release-5.0__periodics-azure-perf.yaml b/...r/config/openshift/hypershift/openshift-hypershift-release-5.0__periodics-azure-perf.yaml
@@ -0,0 +1,47 @@
+base_images:
+  hypershift-operator:
+    name: hypershift-operator
+    namespace: hypershift
+    tag: latest
+  hypershift-tests:
+    name: hypershift-tests
+    namespace: hypershift
+    tag: latest
+  upi-installer:
+    name: "5.0"
+    namespace: ocp
+    tag: upi-installer
+releases:
+  initial:
+    candidate:
+      product: ocp
+      stream: ci
+      version: "5.0"
+  latest:
+    candidate:
+      product: ocp
+      stream: ci
+      version: "5.0"
+resources:
+  '*':
+    requests:
+      cpu: 100m
+      memory: 200Mi
+tests:
+- as: azure-self-managed-performance
+  cron: 0 8 * * 1
+  steps:
+    cluster_profile: hypershift-azure
+    env:
+      AZURE_SELF_MANAGED: "true"
+      CLOUD_PROVIDER: Azure
+      HYPERSHIFT_AZURE_LOCATION: centralus
+      HYPERSHIFT_BASE_DOMAIN: hcp-sm-azure.azure.devcluster.openshift.com
+      HYPERSHIFT_EXTERNAL_DNS_DOMAIN: aks-e2e.hypershift.azure.devcluster.openshift.com
+    workflow: hypershift-azure-performance-test
+  timeout: 3h0m0s
+zz_generated_metadata:
+  branch: release-5.0
+  org: openshift
+  repo: hypershift
+  variant: periodics-azure-perf
diff --git a/ci-operator/jobs/openshift/hypershift/openshift-hypershift-release-5.0-periodics.yaml b/ci-operator/jobs/openshift/hypershift/openshift-hypershift-release-5.0-periodics.yaml
@@ -1,4 +1,88 @@
 periodics:
+- agent: kubernetes
+  cluster: build07
+  cron: 0 8 * * 1
+  decorate: true
+  decoration_config:
+    skip_cloning: true
+    timeout: 3h0m0s
+  extra_refs:
+  - base_ref: release-5.0
+    org: openshift
+    repo: hypershift
+  labels:
+    ci-operator.openshift.io/cloud: hypershift-azure
+    ci-operator.openshift.io/cloud-cluster-profile: hypershift-azure
+    ci-operator.openshift.io/variant: periodics-azure-perf
+    ci.openshift.io/generator: prowgen
+    job-release: "5.0"
+    pj-rehearse.openshift.io/can-be-rehearsed: "true"
+  name: periodic-ci-openshift-hypershift-release-5.0-periodics-azure-perf-azure-self-managed-performance
+  spec:
+    containers:
+    - args:
+      - --gcs-upload-secret=/secrets/gcs/service-account.json
+      - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
+      - --lease-server-credentials-file=/etc/boskos/credentials
+      - --report-credentials-file=/etc/report/credentials
+      - --secret-dir=/secrets/ci-pull-credentials
+      - --target=azure-self-managed-performance
+      - --variant=periodics-azure-perf
+      command:
+      - ci-operator
+      env:
+      - name: HTTP_SERVER_IP
+        valueFrom:
+          fieldRef:
+            fieldPath: status.podIP
+      image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest
+      imagePullPolicy: Always
+      name: ""
+      ports:
+      - containerPort: 8080
+        name: http
+      resources:
+        requests:
+          cpu: 10m
+      volumeMounts:
+      - mountPath: /etc/boskos
+        name: boskos
+        readOnly: true
+      - mountPath: /secrets/ci-pull-credentials
+        name: ci-pull-credentials
+        readOnly: true
+      - mountPath: /secrets/gcs
+        name: gcs-credentials
+        readOnly: true
+      - mountPath: /secrets/manifest-tool
+        name: manifest-tool-local-pusher
+        readOnly: true
+      - mountPath: /etc/pull-secret
+        name: pull-secret
+        readOnly: true
+      - mountPath: /etc/report
+        name: result-aggregator
+        readOnly: true
+    serviceAccountName: ci-operator
+    volumes:
+    - name: boskos
+      secret:
+        items:
+        - key: credentials
+          path: credentials
+        secretName: boskos-credentials
+    - name: ci-pull-credentials
+      secret:
+        secretName: ci-pull-credentials
+    - name: manifest-tool-local-pusher
+      secret:
+        secretName: manifest-tool-local-pusher
+    - name: pull-secret
+      secret:
+        secretName: registry-pull-credentials
+    - name: result-aggregator
+      secret:
+        secretName: result-aggregator
 - agent: kubernetes
   cluster: build07
   cron: 53 9,21 * * *

diff --git a/ci-operator/step-registry/hypershift/azure/performance-test/OWNERS b/ci-operator/step-registry/hypershift/azure/performance-test/OWNERS
@@ -0,0 +1 @@
+../OWNERS
diff --git a/ci-operator/step-registry/hypershift/azure/performance-test/README.md b/ci-operator/step-registry/hypershift/azure/performance-test/README.md
@@ -0,0 +1,239 @@
+# Azure Self-Managed HyperShift Performance Testing
+
+## Overview
+
+This directory contains the performance testing infrastructure for Azure self-managed HyperShift (HCP) clusters. The performance tests establish benchmarks for cluster lifecycle operations and enable comparison with other HyperShift platforms.
+
+## Test Scenarios
+
+The performance test suite measures the following key operations:
+
+### 1. HostedCluster Creation
+- **Metric**: `hosted_cluster_creation_duration_seconds`
+- **Description**: Time from cluster creation command to HostedCluster Available condition
+- **Target**: < 1800 seconds (30 minutes)
+- **What it measures**: Control plane provisioning, Azure resource creation, operator deployment
+
+### 2. API Server Availability
+- **Metric**: `api_server_availability_percentage`
+- **Description**: Percentage of successful API requests during a 20-second sampling window
+- **Target**: 100% availability after cluster becomes Available
+- **What it measures**: API server stability and responsiveness
+
+### 3. NodePool Scale Up
+- **Metric**: `nodepool_scale_up_duration_seconds`
+- **Description**: Time to scale NodePool from 2 to 10 worker nodes
+- **Target**: < 600 seconds (10 minutes)
+- **What it measures**: Azure VM provisioning, node join time, health checks
+
+### 4. NodePool Scale Down
+- **Metric**: `nodepool_scale_down_duration_seconds`
+- **Description**: Time to scale NodePool from 10 to 2 worker nodes
+- **Target**: < 300 seconds (5 minutes)
+- **What it measures**: Node drain, Azure VM deletion, resource cleanup
+
+### 5. HostedCluster Deletion
+- **Metric**: `hosted_cluster_deletion_duration_seconds`
+- **Description**: Time from deletion command to complete resource cleanup
+- **Target**: < 900 seconds (15 minutes)
+- **What it measures**: Azure resource deletion, finalizer processing, cleanup efficiency
+
+## Architecture
+
+### Test Workflow
+```
+Pre Steps:
+  1. ipi-install-rbac → Set up RBAC for root cluster
+  2. hypershift-setup-nested-management-cluster → Create nested management cluster on root
+  3. hypershift-azure-setup-private-link → Configure Azure Private Link
+  4. hypershift-install → Deploy HyperShift operator
+
+Test Step:
+  5. hypershift-azure-performance-test → Execute performance benchmarks
+
+Post Steps:
+  6. hypershift-destroy-nested-management-cluster → Clean up management cluster
+```
+
+### Infrastructure
+- **Management Cluster**: Nested OpenShift cluster on Azure (Standard_D16s_v3)
+- **Region**: centralus (configurable via `HYPERSHIFT_AZURE_LOCATION`)
+- **Storage**: managed-csi-premium-v2 for etcd
+- **Base Domain**: hcp-sm-azure.azure.devcluster.openshift.com
+- **Authentication**: Azure Service Principal with Workload Identity
+
+## Configuration
+
+### Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `HYPERSHIFT_AZURE_LOCATION` | centralus | Azure region for testing |
+| `HYPERSHIFT_BASE_DOMAIN` | hcp-sm-azure.azure.devcluster.openshift.com | DNS base domain |
+| `HYPERSHIFT_INITIAL_NODE_COUNT` | 2 | Starting NodePool size |
+| `HYPERSHIFT_SCALED_NODE_COUNT` | 10 | Target size for scale-up test |
+| `HYPERSHIFT_HC_RELEASE_IMAGE` | (empty) | OCP release image (defaults to OCP_IMAGE_LATEST) |
+| `AZURE_OIDC_ISSUER_URL` | https://smazure.blob.core.windows.net/smazure | OIDC issuer for WIF |
+
+### Credentials Required
+
+The test requires these credentials mounted from test-credentials namespace:
+- `/etc/hypershift-ci-jobs-self-managed-azure/credentials.json` - Azure service principal
+- `/etc/hypershift-ci-jobs-self-managed-azure-e2e/` - Workload identities and SA signing key
+- `/etc/ci-pull-credentials/.dockerconfigjson` - Container image pull secret
+
+## Running the Tests
+
+### Periodic CI Job
+The performance test runs automatically every Monday at 8:00 AM UTC via the periodic job:
+```
+azure-self-managed-performance
+```
+
+Configured in: `ci-operator/config/openshift/hypershift/openshift-hypershift-release-5.0__periodics-azure-perf.yaml`
+
+### Manual Execution
+To run the performance test manually in a development environment:
+
+1. Set up a management cluster with HyperShift operator installed
+2. Ensure Azure credentials are configured
+3. Export required environment variables
+4. Run the test script:
+   ```bash
+   export KUBECONFIG=/path/to/management-cluster-kubeconfig
+   export SHARED_DIR=/tmp/test-artifacts
+   export ARTIFACT_DIR=/tmp/test-results
+   export PROW_JOB_ID=test-$(date +%s)
+
+   # Run the performance test
+   ./hypershift-azure-performance-test-commands.sh
-### Manual Execution
-To run the performance test manually in a development environment:
-
-1. Set up a management cluster with HyperShift operator installed
-2. Ensure Azure credentials are configured
-3. Export required environment variables
-4. Run the test script:
-   ```bash
-   export KUBECONFIG=/path/to/management-cluster-kubeconfig
-   export SHARED_DIR=/tmp/test-artifacts
-   export ARTIFACT_DIR=/tmp/test-results
-   export PROW_JOB_ID=test-$(date +%s)
-   
-   # Run the performance test
-   ./hypershift-azure-performance-test-commands.sh
+### Manual Execution
+To run the performance test manually in a development environment:
+
+1. Set up a management cluster with HyperShift operator installed
+2. Ensure Azure credentials are configured
+3. Copy the management cluster kubeconfig into `${SHARED_DIR}/management_cluster_kubeconfig`
+4. Export required environment variables
+5. Run the test script:
-### Manual Execution
-To run the performance test manually in a development environment:
-
-1. Set up a management cluster with HyperShift operator installed
-2. Ensure Azure credentials are configured
-3. Export required environment variables
-4. Run the test script:
-   ```bash
-   export KUBECONFIG=/path/to/management-cluster-kubeconfig
-   export SHARED_DIR=/tmp/test-artifacts
-   export ARTIFACT_DIR=/tmp/test-results
-   export PROW_JOB_ID=test-$(date +%s)
-   
-   # Run the performance test
-   ./hypershift-azure-performance-test-commands.sh
+### Manual Execution
+To run the performance test manually in a development environment:
+
+1. Set up a management cluster with HyperShift operator installed
+2. Ensure Azure credentials are configured
+3. Copy the management cluster kubeconfig into `${SHARED_DIR}/management_cluster_kubeconfig`
+4. Export required environment variables
+5. Run the test script:
+   ```
+
+## Output Artifacts
+
+The test produces the following artifacts in `${ARTIFACT_DIR}/performance-results/`:
+
+### metrics.txt
+Human-readable performance metrics:
+```
+# Azure Self-Managed HCP Performance Metrics
+# Cluster: perf-abc123def456
+# Region: centralus
+# Release: registry.ci.openshift.org/ocp/release:4.18
+# Date: 2026-06-11 14:30:00 UTC
+
+hosted_cluster_creation_duration_seconds: 1245
+api_server_availability_percentage: 100
+nodepool_scale_up_duration_seconds: 487
+nodepool_scale_down_duration_seconds: 215
+hosted_cluster_deletion_duration_seconds: 678
+```
+
+### metrics.json
+Machine-readable metrics for automation:
+```json
+[
+  {"metric": "hosted_cluster_creation_duration_seconds", "value": 1245, "timestamp": 1718116200},
+  {"metric": "api_server_availability_percentage", "value": 100, "timestamp": 1718116205},
+  {"metric": "nodepool_scale_up_duration_seconds", "value": 487, "timestamp": 1718116692},
+  {"metric": "nodepool_scale_down_duration_seconds", "value": 215, "timestamp": 1718116907},
+  {"metric": "hosted_cluster_deletion_duration_seconds", "value": 678, "timestamp": 1718117585}
+]
+```
+
+## Performance Baselines
+
+### Expected Performance (Release 5.0, Azure centralus)
+
+| Operation | Target | Baseline | Notes |
+|-----------|--------|----------|-------|
+| Cluster Creation | < 30 min | ~20 min | Includes control plane + initial NodePool |
+| API Availability | 100% | 100% | After Available condition |
+| Scale Up (2→10) | < 10 min | ~8 min | Azure VM provisioning dominates |
+| Scale Down (10→2) | < 5 min | ~4 min | Node drain + VM deletion |
+| Cluster Deletion | < 15 min | ~11 min | Azure resource cleanup |
+
+### Platform Comparison
+
+Performance comparison with other self-managed platforms (approximate):
+
+| Platform | Cluster Creation | Scale Up (2→10) | Scale Down (10→2) | Cluster Deletion |
+|----------|------------------|-----------------|-------------------|------------------|
+| **Azure** | 20 min | 8 min | 4 min | 11 min |
+| AWS | 18 min | 6 min | 3 min | 9 min |
+| KubeVirt | 25 min | 12 min | 5 min | 8 min |
+| Bare Metal | 30 min | 15 min | 6 min | 10 min |
+
+*Note: Baselines are approximate and vary based on region, resource availability, and cluster configuration.*
+
+## Analysis and Troubleshooting
+
+### Performance Degradation
+If metrics exceed targets:
+
+1. **Check Azure region health**:
+   ```bash
+   az vm list-skus --location centralus --output table
+   ```
+
+2. **Verify management cluster health**:
+   ```bash
+   oc get nodes -o wide
+   oc top nodes
+   ```
+
+3. **Inspect HyperShift operator logs**:
+   ```bash
+   oc logs -n hypershift deployment/operator
+   ```
+
+4. **Review Azure resource provisioning**:
+   ```bash
+   az monitor activity-log list --resource-group <rg-name>
+   ```
+
+### Common Issues
+
+**Slow Cluster Creation (> 30 min)**:
+- Azure quota limits
+- DNS propagation delays
+- Image pull timeouts
+- etcd storage provisioning issues
+
+**Slow NodePool Scaling (> 10 min for scale-up)**:
+- Azure VM quota exhaustion
+- Availability zone capacity constraints
+- Network security group rules
+- Machine config updates pending
+
+**Slow Cluster Deletion (> 15 min)**:
+- Azure Private Link cleanup
+- Persistent volume deletion
+- DNS zone cleanup
+- Resource group finalizers
+
+## Integration with CI Analytics
+
+Performance metrics are exported for analysis by OpenShift CI tooling:
+
+1. **Artifacts**: Stored in Prow job artifacts for historical tracking
+2. **Metrics**: JSON format enables automated trend analysis
+3. **Alerts**: Exceeding targets can trigger notifications (future)
+4. **Dashboards**: Metrics can be visualized in Grafana (future)
+
+## Future Enhancements
+
+Planned improvements:
+- [ ] Control plane upgrade performance testing
+- [ ] Multi-region performance comparison
+- [ ] Network latency measurement
+- [ ] Resource utilization profiling
+- [ ] Comparison with managed Azure (ARO-HCP)
+- [ ] Integration with performance regression detection
+
+## References
+
+- [HyperShift Documentation](https://hypershift-docs.netlify.app/)
+- [Azure HyperShift Architecture](https://github.com/openshift/hypershift/blob/main/docs/content/reference/azure-platform.md)
+- [OpenShift CI Documentation](https://docs.ci.openshift.org/)
+- [CNTRLPLANE-3205](https://issues.redhat.com/browse/CNTRLPLANE-3205) - Original JIRA ticket