Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
0e25cf3
feat: bump windows image version for 2026-03B (#8074)
rchincha Mar 13, 2026
97de410
feat(rcv1p): unify cert bootstrap flow and add Windows CA refresh task
rchincha Mar 16, 2026
b2e72ac
feat: enhance CA certificates refresh task with endpoint mode based o…
rchincha Mar 18, 2026
b9fcd6f
feat: add tests for certificate endpoint mode handling in AKS custom …
rchincha Mar 19, 2026
9a9c5ee
feat: simplify certificate endpoint mode handling and refresh task re…
rchincha Mar 19, 2026
6cced51
feat: implement conditional CA certificates refresh task registration…
rchincha Mar 19, 2026
5b553ca
feat: enhance CA certificates refresh task registration for legacy CS…
rchincha Mar 19, 2026
e2bc72b
feat: update tests for certificate endpoint mode handling and refresh…
rchincha Mar 20, 2026
ad01392
feat: refactor test setup functions for improved readability and cons…
rchincha Mar 20, 2026
e649f3e
feat: update Get-CustomCloudCertEndpointModeFromLocation to clarify e…
rchincha Mar 20, 2026
b12fb1e
feat: enhance tests for Should-InstallCACertificatesRefreshTask and G…
rchincha Mar 20, 2026
49b2e6e
feat: update cse_cmd.sh and cse_cmd.sh.gtpl to ensure consistent logg…
rchincha Mar 25, 2026
4929b0c
feat: update CA certificates functions for backward compatibility wit…
rchincha Mar 26, 2026
e485641
feat: remove deprecated Ubuntu repository initialization logic from i…
rchincha Mar 27, 2026
6b468ea
Split init-aks-custom-cloud.sh to fix Flatcar/ACL customData size limit
rchincha Apr 2, 2026
c026af5
feat(e2e): add RCV1P cert mode end-to-end tests
rchincha Apr 13, 2026
ee24a23
Address PR review feedback: fix multi-subscription, validation, and e…
rchincha Apr 14, 2026
1f0fc8e
Add Windows not-opted-in negative test for RCV1P cert mode
rchincha Apr 14, 2026
8d17c7c
e2e: add VM instance-level tag update for RCV1P wireserver opt-in
rchincha Apr 16, 2026
8b65cc8
e2e: use JSON injection for VM profile tags at VMSS creation time
rchincha Apr 16, 2026
dbc0a28
e2e: use lightweight PATCH for VM instance tags instead of JSON injec…
rchincha Apr 16, 2026
266f9d5
Revert "e2e: use lightweight PATCH for VM instance tags instead of JS…
rchincha Apr 16, 2026
3907efb
e2e: use Microsoft.Resources/tags API for VM instance tag patching
rchincha Apr 16, 2026
2e8b811
e2e: use BeginUpdate + deferred CSE for VM instance tagging
rchincha Apr 16, 2026
77245c4
e2e: add feature flag check for RCV1P subscription
rchincha Apr 17, 2026
d88d517
REVERT ME: poll wireserver IsOptedInForRootCerts with retry loop
rchincha Apr 17, 2026
2ad18c7
e2e: always log PlatformSettingsOverride feature flag status
rchincha Apr 17, 2026
d6a151e
fix(windows): parse wireserver IsOptedInForRootCerts JSON with Conver…
rchincha Apr 17, 2026
eaaac9b
e2e: make RCV1P_SUBSCRIPTION_ID optional with feature flag auto-detec…
rchincha Apr 18, 2026
e85af60
e2e: always collect Windows CSE logs (not just on failure)
rchincha Apr 18, 2026
395766a
fix: add wireserver HTTP error diagnostic logging for cert endpoints
rchincha Apr 19, 2026
6116cc5
e2e: use testDir() for Windows CSE output log path consistency
rchincha Apr 20, 2026
d71b6cd
fix(e2e): filter CSE extension to fix empty Windows CSE log files
rchincha Apr 21, 2026
0c1587d
fix(e2e): re-fetch VM instance view for fresh CSE extension status
rchincha Apr 21, 2026
2c02745
e2e: trim whitespace from RCV1P_SUBSCRIPTION_ID to fix gating
rchincha Apr 21, 2026
0c5e3e1
e2e: add gen2 Windows RCV1P tests, fix Windows2025 TrustedLaunch
rchincha Apr 22, 2026
69a5d18
e2e: switch RCV1P tests to Azure CNI Overlay to fix IP exhaustion
rchincha Apr 22, 2026
8df18ae
e2e: revert RCV1P from overlay back to kubenet
rchincha Apr 22, 2026
1ae597b
REVERT ME: use dedicated kubenet cluster for RCV1P tests
rchincha Apr 23, 2026
e15d8ca
REVERT ME: use Azure CNI cluster for Windows RCV1P tests
rchincha Apr 23, 2026
4d5ca4e
REVERT ME: add wireserver endpoint diagnostics to Windows RCV1P valid…
rchincha Apr 23, 2026
b936a2f
fix: use correct wireserver JSON field name for rcv1p cert download
rchincha Apr 23, 2026
15c0077
REVERT ME: add azcopy error logging to Windows log collection
rchincha Apr 23, 2026
2efa0f2
REVERT ME: enable verbose test output for azcopy/wireserver diagnostics
rchincha Apr 23, 2026
ce1a29f
REVERT ME: canary check to prove whether SSH validators are broken
rchincha Apr 24, 2026
fcc42a9
Remove canary check - validators confirmed working
rchincha Apr 24, 2026
d37eeae
fix: make wireserver cert retrieval failures fatal on Linux
rchincha Apr 24, 2026
5ff8a25
revert: remove diagnostic commits used during RCV1P development
rchincha Apr 25, 2026
dfb2c10
fix: make wireserver unreachable fatal for RCV1P opt-in check
rchincha Apr 26, 2026
8c89063
fix: use RCV1P Azure CNI cluster for Windows tests when explicit subs…
rchincha Apr 27, 2026
4ada2fe
fix: replace legacy ca-refresh cron entry with location-aware version
rchincha Apr 27, 2026
cf07a71
fix: align Windows wireserver retries to 10 to match Linux parity
rchincha Apr 27, 2026
09b8d20
fix: enhance RCV1P opt-in tag handling in VMSS creation process
rchincha Apr 29, 2026
b99bede
fix: use Azure CNI cluster for Windows RCV1P tests
rchincha May 6, 2026
cafa6ec
revert: drop 'REVERT ME' cluster switching commits (now superseded)
rchincha May 6, 2026
00804a3
revert: drop canary validator and wireserver polling debug commits
rchincha May 6, 2026
5904637
feat(e2e): auto-detect RCV1P feature flag on E2E subscription
rchincha May 7, 2026
c709eae
fix(e2e): skip NotOptedIn tests on auto-detected enrolled subscriptions
rchincha May 7, 2026
e1959ba
fix(e2e): use caller context in getCustomScriptExtensionStatus
rchincha May 7, 2026
dee66b3
fix(e2e): remove TrustedLaunch from non-Gen2 Windows 2025 RCV1P test
rchincha May 7, 2026
117adaa
fix: return code 2 when wireserver is unreachable in is_opted_in_for_…
rchincha May 7, 2026
2cb6a99
fix: throw when opted-in but no certs downloaded with -FailOnError
rchincha May 7, 2026
ccc57f8
e2e: use branch-built CSE zip for Windows RCV1P tests
rchincha May 7, 2026
1efb2c0
fix: parse wireserver IsOptedInForRootCerts JSON response with jq
rchincha May 8, 2026
d1414df
fix(e2e): update BootstrapConfigMutator signatures after rebase
rchincha May 8, 2026
224576e
fix: fail process_cert_operations when no cert bodies are saved
rchincha May 8, 2026
f757ce8
fix: pass repodepot_endpoint explicitly to add_key_ubuntu and add_ms_…
rchincha May 8, 2026
796d1ff
chore(e2e): remove REVERT ME wireserver diagnostic block from Windows…
rchincha May 8, 2026
482ec2d
fix(e2e): use infra.Azure for private DNS operations in RCV1P subscri…
rchincha May 8, 2026
3c0baac
fix: guard against unresolved ADO pipeline variable expressions in RC…
rchincha May 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .pipelines/e2e-rcv1p.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: $(Date:yyyyMMdd)$(Rev:.r)
variables:
TAGS_TO_RUN: "rcv1pcertmode=true"
SKIP_E2E_TESTS: false
E2E_GO_TEST_TIMEOUT: "75m"
schedules:
- cron: "0 11 * * *"
displayName: Daily 3am PST
branches:
include:
- main
always: true
trigger: none
pr: none
jobs:
- template: ./templates/e2e-template.yaml
parameters:
name: RCV1P Cert Mode Tests
IgnoreScenariosWithMissingVhd: false
Copy link
Copy Markdown
Contributor

@r2k1 r2k1 May 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Who is going to monitor this pipeline and address any issues?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably include an explicit run of this pipeline within our daily build system we use for official releases, that way we're guaranteed to have visibility during official release flows

though at the end of the day it's going to be on us to deal with failures

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be enabled in the TME tenant and probably as a async nightly so that it doesn't interfere with "immediate" tests (PRs, etc)

2 changes: 2 additions & 0 deletions .pipelines/scripts/e2e_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ VHD_BUILD_ID="${VHD_BUILD_ID:-}"
IGNORE_SCENARIOS_WITH_MISSING_VHD="${IGNORE_SCENARIOS_WITH_MISSING_VHD:-}"
LOGGING_DIR="${LOGGING_DIR:-}"
E2E_SUBSCRIPTION_ID="${E2E_SUBSCRIPTION_ID:-}"
RCV1P_SUBSCRIPTION_ID="${RCV1P_SUBSCRIPTION_ID:-}"
ENABLE_SECURE_TLS_BOOTSTRAPPING="${ENABLE_SECURE_TLS_BOOTSTRAPPING:-true}"
TAGS_TO_SKIP="${TAGS_TO_SKIP:-}"
TAGS_TO_RUN="${TAGS_TO_RUN:-}"
Expand All @@ -47,6 +48,7 @@ echo "VHD_BUILD_ID: ${VHD_BUILD_ID}"
echo "IGNORE_SCENARIOS_WITH_MISSING_VHD: ${IGNORE_SCENARIOS_WITH_MISSING_VHD}"
echo "LOGGING_DIR: ${LOGGING_DIR}"
echo "E2E_SUBSCRIPTION_ID: ${E2E_SUBSCRIPTION_ID}"
echo "RCV1P_SUBSCRIPTION_ID: ${RCV1P_SUBSCRIPTION_ID}"
echo "ENABLE_SECURE_TLS_BOOTSTRAPPING: ${ENABLE_SECURE_TLS_BOOTSTRAPPING}"
echo "TAGS_TO_SKIP: ${TAGS_TO_SKIP}"
echo "TAGS_TO_RUN: ${TAGS_TO_RUN}"
Expand Down
1 change: 1 addition & 0 deletions .pipelines/templates/e2e-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ jobs:
displayName: Run AgentBaker E2E
env:
E2E_SUBSCRIPTION_ID: $(E2E_SUBSCRIPTION_ID)
RCV1P_SUBSCRIPTION_ID: $(RCV1P_SUBSCRIPTION_ID)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you inject this into the RCV1P E2E pipeline variables directly?

Copy link
Copy Markdown
Author

@rchincha rchincha May 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct, and relevant only for TME tenant since MSFT tenant's E2E_SUBSCRIPTION_ID already exercises rcv1p path. In my test, RCV1P_SUBSCRIPTION_ID was set to a different value and models a customer who can opt-in their subscription for rcv1p.

SYS_SSH_PUBLIC_KEY: $(SYS_SSH_PUBLIC_KEY)
SYS_SSH_PRIVATE_KEY_B64: $(SYS_SSH_PRIVATE_KEY_B64)
BUILD_SRC_DIR: $(System.DefaultWorkingDirectory)
Expand Down
7 changes: 6 additions & 1 deletion aks-node-controller/parser/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func getFuncMap() template.FuncMap {
return template.FuncMap{
"getInitAKSCustomCloudFilepath": getInitAKSCustomCloudFilepath,
"getIsAksCustomCloud": getIsAksCustomCloud,
"getCloudLocation": getCloudLocation,
}
}

Expand Down Expand Up @@ -538,11 +539,15 @@ func getIsAksCustomCloud(customCloudConfig *aksnodeconfigv1.CustomCloudConfig) b
return strings.EqualFold(customCloudConfig.GetCustomCloudEnvName(), helpers.AksCustomCloudName)
}

func getCloudLocation(v *aksnodeconfigv1.Configuration) string {
return strings.ToLower(strings.Join(strings.Fields(v.GetClusterConfig().GetLocation()), ""))
}

/* GetCloudTargetEnv determines and returns whether the region is a sovereign cloud which
have their own data compliance regulations (China/Germany/USGov) or standard. */
// Azure public cloud.
func getCloudTargetEnv(v *aksnodeconfigv1.Configuration) string {
loc := strings.ToLower(strings.Join(strings.Fields(v.GetClusterConfig().GetLocation()), ""))
loc := getCloudLocation(v)
switch {
case strings.HasPrefix(loc, "china"):
return "AzureChinaCloud"
Expand Down
3 changes: 2 additions & 1 deletion aks-node-controller/parser/templates/cse_cmd.sh.gtpl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
echo $(date),$(hostname) > ${PROVISION_OUTPUT};
{{if getIsAksCustomCloud .CustomCloudConfig}}
REPO_DEPOT_ENDPOINT="{{.CustomCloudConfig.RepoDepotEndpoint}}"
{{getInitAKSCustomCloudFilepath}} >> /var/log/azure/cluster-provision.log 2>&1;
{{end}}
LOCATION="{{getCloudLocation .}}"
Comment thread
rchincha marked this conversation as resolved.
Comment thread
rchincha marked this conversation as resolved.
{{getInitAKSCustomCloudFilepath}} >> /var/log/azure/cluster-provision.log 2>&1;
Comment thread
rchincha marked this conversation as resolved.
Comment thread
rchincha marked this conversation as resolved.
Copy link
Copy Markdown
Contributor

@cameronmeissner cameronmeissner May 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should change the name of this template func: maybe getInitCertificateTrustStoreFilepath or something - keeping the notion of "custom cloud" tied to this script at this point doesn't really make sense to me since we're running it everywhere

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was planning a follow up PR that cleans up references to "custom" after this PR lands. Also see my comment below. But ok either way.

/usr/bin/nohup /bin/bash -c "/bin/bash /opt/azure/containers/provision_start.sh"
55 changes: 28 additions & 27 deletions e2e/aks_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,22 +301,23 @@ func getFirewall(ctx context.Context, location, firewallSubnetID, publicIPID str
}

func addFirewallRules(
ctx context.Context, clusterModel *armcontainerservice.ManagedCluster,
ctx context.Context, infra *ClusterInfra, clusterModel *armcontainerservice.ManagedCluster,
) error {
location := *clusterModel.Location
defer toolkit.LogStepCtx(ctx, "adding firewall rules")()

rg := *clusterModel.Properties.NodeResourceGroup
vnet, err := getClusterVNet(ctx, rg)
vnet, err := getClusterVNet(ctx, infra, rg)
if err != nil {
return err
}

// For kubenet, the AKS-managed route table must stay attached so that pod
// routes (managed by cloud-provider-azure) and firewall routes coexist.
// For Azure CNI variants, the subnet may not have any route table, so we
// create and associate a dedicated one before adding the firewall routes.
aksSubnetResp, err := config.Azure.Subnet.Get(ctx, rg, vnet.name, "aks-subnet", nil)
// Find the AKS-managed route table currently associated with the subnet.
// We add firewall routes directly to this table so that both pod routes
// (managed by cloud-provider-azure) and firewall routes coexist. Creating
// a separate route table and swapping the subnet association disconnects
// the pod routes and breaks kubenet networking.
aksSubnetResp, err := infra.Azure.Subnet.Get(ctx, rg, vnet.name, "aks-subnet", nil)
if err != nil {
return fmt.Errorf("failed to get AKS subnet: %w", err)
}
Expand All @@ -334,7 +335,7 @@ func addFirewallRules(
}

toolkit.Logf(ctx, "Creating subnet %s in VNet %s", firewallSubnetName, vnet.name)
subnetPoller, err := config.Azure.Subnet.BeginCreateOrUpdate(
subnetPoller, err := infra.Azure.Subnet.BeginCreateOrUpdate(
ctx,
rg,
vnet.name,
Expand Down Expand Up @@ -367,7 +368,7 @@ func addFirewallRules(
}

toolkit.Logf(ctx, "Creating public IP %s", publicIPName)
pipPoller, err := config.Azure.PublicIPAddresses.BeginCreateOrUpdate(
pipPoller, err := infra.Azure.PublicIPAddresses.BeginCreateOrUpdate(
ctx,
rg,
publicIPName,
Expand All @@ -388,7 +389,7 @@ func addFirewallRules(

firewallName := "abe2e-fw"
firewall := getFirewall(ctx, location, firewallSubnetID, publicIPID)
fwPoller, err := config.Azure.AzureFirewall.BeginCreateOrUpdate(ctx, rg, firewallName, *firewall, nil)
fwPoller, err := infra.Azure.AzureFirewall.BeginCreateOrUpdate(ctx, rg, firewallName, *firewall, nil)
if err != nil {
return fmt.Errorf("failed to start Firewall creation: %w", err)
}
Expand Down Expand Up @@ -434,7 +435,7 @@ func addFirewallRules(

for _, route := range firewallRoutes {
toolkit.Logf(ctx, "Adding route %q to AKS route table %q", *route.Name, aksRTName)
poller, err := config.Azure.Routes.BeginCreateOrUpdate(ctx, rg, aksRTName, *route.Name, route, nil)
poller, err := infra.Azure.Routes.BeginCreateOrUpdate(ctx, rg, aksRTName, *route.Name, route, nil)
if err != nil {
return fmt.Errorf("failed to start adding route %q: %w", *route.Name, err)
}
Expand Down Expand Up @@ -512,7 +513,7 @@ func addPrivateAzureContainerRegistry(ctx context.Context, cluster *armcontainer
if err := createPrivateAzureContainerRegistryPullSecret(ctx, cluster, kube, resourceGroupName, isNonAnonymousPull); err != nil {
return fmt.Errorf("create private acr pull secret: %w", err)
}
vnet, err := getClusterVNet(ctx, *cluster.Properties.NodeResourceGroup)
vnet, err := getClusterVNet(ctx, DefaultClusterInfra, *cluster.Properties.NodeResourceGroup)
Comment thread
rchincha marked this conversation as resolved.
if err != nil {
return err
}
Expand All @@ -533,7 +534,7 @@ func addNetworkIsolatedSettings(ctx context.Context, clusterModel *armcontainers
location := *clusterModel.Location
defer toolkit.LogStepCtx(ctx, fmt.Sprintf("Adding network settings for network isolated cluster %s in rg %s", *clusterModel.Name, *clusterModel.Properties.NodeResourceGroup))

vnet, err := getClusterVNet(ctx, *clusterModel.Properties.NodeResourceGroup)
vnet, err := getClusterVNet(ctx, DefaultClusterInfra, *clusterModel.Properties.NodeResourceGroup)
Comment thread
rchincha marked this conversation as resolved.
if err != nil {
return err
}
Expand Down Expand Up @@ -620,11 +621,11 @@ func addPrivateEndpointForACR(ctx context.Context, nodeResourceGroup, privateACR

privateZoneName := "privatelink.azurecr.io"
var privateZone *armprivatedns.PrivateZone
if privateZone, err = createPrivateZone(ctx, nodeResourceGroup, privateZoneName); err != nil {
if privateZone, err = createPrivateZone(ctx, config.Azure, nodeResourceGroup, privateZoneName); err != nil {
return err
}

if err = createPrivateDNSLink(ctx, vnet, nodeResourceGroup, privateZoneName); err != nil {
if err = createPrivateDNSLink(ctx, config.Azure, vnet, nodeResourceGroup, privateZoneName); err != nil {
return err
}

Expand Down Expand Up @@ -680,7 +681,7 @@ func createPrivateAzureContainerRegistry(ctx context.Context, cluster *armcontai
}
// if ACR gets recreated so should the cluster
toolkit.Logf(ctx, "Private ACR deleted, deleting cluster %s", *cluster.Name)
if err := deleteCluster(ctx, *cluster.Name, resourceGroup); err != nil {
if err := deleteCluster(ctx, DefaultClusterInfra, *cluster.Name, resourceGroup); err != nil {
return fmt.Errorf("failed to delete cluster: %w", err)
}
} else {
Expand Down Expand Up @@ -871,8 +872,8 @@ func createPrivateEndpoint(ctx context.Context, nodeResourceGroup, privateEndpoi
return &resp.PrivateEndpoint, nil
}

func createPrivateZone(ctx context.Context, nodeResourceGroup, privateZoneName string) (*armprivatedns.PrivateZone, error) {
pzResp, err := config.Azure.PrivateZonesClient.Get(
func createPrivateZone(ctx context.Context, azure *config.AzureClient, nodeResourceGroup, privateZoneName string) (*armprivatedns.PrivateZone, error) {
pzResp, err := azure.PrivateZonesClient.Get(
ctx,
nodeResourceGroup,
privateZoneName,
Expand All @@ -884,7 +885,7 @@ func createPrivateZone(ctx context.Context, nodeResourceGroup, privateZoneName s
dnsZoneParams := armprivatedns.PrivateZone{
Location: to.Ptr("global"),
}
poller, err := config.Azure.PrivateZonesClient.BeginCreateOrUpdate(
poller, err := azure.PrivateZonesClient.BeginCreateOrUpdate(
ctx,
nodeResourceGroup,
privateZoneName,
Expand All @@ -895,7 +896,7 @@ func createPrivateZone(ctx context.Context, nodeResourceGroup, privateZoneName s
// 409 means another operation is in progress — wait and re-fetch
var respErr *azcore.ResponseError
if errors.As(err, &respErr) && respErr.StatusCode == 409 {
return waitForPrivateZone(ctx, nodeResourceGroup, privateZoneName)
return waitForPrivateZone(ctx, azure, nodeResourceGroup, privateZoneName)
}
return nil, fmt.Errorf("failed to create private dns zone in BeginCreateOrUpdate: %w", err)
}
Expand All @@ -908,11 +909,11 @@ func createPrivateZone(ctx context.Context, nodeResourceGroup, privateZoneName s
return &resp.PrivateZone, nil
}

func waitForPrivateZone(ctx context.Context, nodeResourceGroup, privateZoneName string) (*armprivatedns.PrivateZone, error) {
func waitForPrivateZone(ctx context.Context, azure *config.AzureClient, nodeResourceGroup, privateZoneName string) (*armprivatedns.PrivateZone, error) {
defer toolkit.LogStepCtxf(ctx, "waiting for private DNS zone %s (409 conflict)", privateZoneName)()
var zone *armprivatedns.PrivateZone
err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) {
resp, err := config.Azure.PrivateZonesClient.Get(ctx, nodeResourceGroup, privateZoneName, nil)
resp, err := azure.PrivateZonesClient.Get(ctx, nodeResourceGroup, privateZoneName, nil)
if err != nil {
var respErr *azcore.ResponseError
if errors.As(err, &respErr) && respErr.StatusCode == 404 {
Expand All @@ -929,9 +930,9 @@ func waitForPrivateZone(ctx context.Context, nodeResourceGroup, privateZoneName
return zone, nil
}

func createPrivateDNSLink(ctx context.Context, vnet VNet, nodeResourceGroup, privateZoneName string) error {
func createPrivateDNSLink(ctx context.Context, azure *config.AzureClient, vnet VNet, nodeResourceGroup, privateZoneName string) error {
networkLinkName := "link-ABE2ETests"
_, err := config.Azure.VirutalNetworkLinksClient.Get(
_, err := azure.VirutalNetworkLinksClient.Get(
ctx,
nodeResourceGroup,
privateZoneName,
Expand All @@ -944,7 +945,7 @@ func createPrivateDNSLink(ctx context.Context, vnet VNet, nodeResourceGroup, pri
return nil
}

vnetForId, err := config.Azure.VNet.Get(ctx, nodeResourceGroup, vnet.name, nil)
vnetForId, err := azure.VNet.Get(ctx, nodeResourceGroup, vnet.name, nil)
if err != nil {
return fmt.Errorf("failed to get vnet: %w", err)
}
Expand All @@ -957,7 +958,7 @@ func createPrivateDNSLink(ctx context.Context, vnet VNet, nodeResourceGroup, pri
RegistrationEnabled: to.Ptr(false),
},
}
poller, err := config.Azure.VirutalNetworkLinksClient.BeginCreateOrUpdate(
poller, err := azure.VirutalNetworkLinksClient.BeginCreateOrUpdate(
ctx,
nodeResourceGroup,
privateZoneName,
Expand All @@ -971,7 +972,7 @@ func createPrivateDNSLink(ctx context.Context, vnet VNet, nodeResourceGroup, pri
if errors.As(err, &respErr) && respErr.StatusCode == 409 {
toolkit.Logf(ctx, "Virtual network link creation conflict (409), waiting for completion")
return wait.PollUntilContextTimeout(ctx, 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) {
_, err := config.Azure.VirutalNetworkLinksClient.Get(ctx, nodeResourceGroup, privateZoneName, networkLinkName, nil)
_, err := azure.VirutalNetworkLinksClient.Get(ctx, nodeResourceGroup, privateZoneName, networkLinkName, nil)
if err != nil {
var respErr *azcore.ResponseError
if errors.As(err, &respErr) && respErr.StatusCode == 404 {
Expand Down
60 changes: 52 additions & 8 deletions e2e/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v7"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources/v3"
)

// cachedFunc creates a thread-safe memoized version of a function.
Expand Down Expand Up @@ -150,56 +151,80 @@ func clusterLatestKubernetesVersion(ctx context.Context, request ClusterRequest)
if err != nil {
return nil, fmt.Errorf("getting latest kubernetes version cluster model: %w", err)
}
return prepareCluster(ctx, model, false, false)
return prepareCluster(ctx, DefaultClusterInfra, model, false, false)
}

var ClusterKubenet = cachedFunc(clusterKubenet)

// clusterKubenet creates a basic cluster using kubenet networking
func clusterKubenet(ctx context.Context, request ClusterRequest) (*Cluster, error) {
return prepareCluster(ctx, getKubenetClusterModel("abe2e-kubenet-v4", request.Location, request.K8sSystemPoolSKU), false, false)
return prepareCluster(ctx, DefaultClusterInfra, getKubenetClusterModel("abe2e-kubenet-v4", request.Location, request.K8sSystemPoolSKU), false, false)
}

var ClusterAzureNetwork = cachedFunc(clusterAzureNetwork)

// clusterAzureNetwork creates a cluster with Azure CNI networking
func clusterAzureNetwork(ctx context.Context, request ClusterRequest) (*Cluster, error) {
return prepareCluster(ctx, getAzureNetworkClusterModel("abe2e-azure-network-v3", request.Location, request.K8sSystemPoolSKU), false, false)
return prepareCluster(ctx, DefaultClusterInfra, getAzureNetworkClusterModel("abe2e-azure-network-v3", request.Location, request.K8sSystemPoolSKU), false, false)
}

var ClusterAzureBootstrapProfileCache = cachedFunc(clusterAzureBootstrapProfileCache)

// clusterAzureBootstrapProfileCache creates a cluster with bootstrap profile cache but without network isolation
func clusterAzureBootstrapProfileCache(ctx context.Context, request ClusterRequest) (*Cluster, error) {
return prepareCluster(ctx, getAzureNetworkClusterModel("abe2e-azure-bootstrapprofile-cache-v1", request.Location, request.K8sSystemPoolSKU), false, true)
return prepareCluster(ctx, DefaultClusterInfra, getAzureNetworkClusterModel("abe2e-azure-bootstrapprofile-cache-v1", request.Location, request.K8sSystemPoolSKU), false, true)
}

var ClusterAzureNetworkIsolated = cachedFunc(clusterAzureNetworkIsolated)

// clusterAzureNetworkIsolated creates a networkisolated Azure network cluster (no internet access)
func clusterAzureNetworkIsolated(ctx context.Context, request ClusterRequest) (*Cluster, error) {
return prepareCluster(ctx, getAzureNetworkClusterModel("abe2e-azure-networkisolated-v1", request.Location, request.K8sSystemPoolSKU), true, false)
return prepareCluster(ctx, DefaultClusterInfra, getAzureNetworkClusterModel("abe2e-azure-networkisolated-v1", request.Location, request.K8sSystemPoolSKU), true, false)
}

var ClusterAzureOverlayNetwork = cachedFunc(clusterAzureOverlayNetwork)

// clusterAzureOverlayNetwork creates a cluster with Azure CNI Overlay networking
func clusterAzureOverlayNetwork(ctx context.Context, request ClusterRequest) (*Cluster, error) {
return prepareCluster(ctx, getAzureOverlayNetworkClusterModel("abe2e-azure-overlay-network-v3", request.Location, request.K8sSystemPoolSKU), false, false)
return prepareCluster(ctx, DefaultClusterInfra, getAzureOverlayNetworkClusterModel("abe2e-azure-overlay-network-v3", request.Location, request.K8sSystemPoolSKU), false, false)
}

var ClusterAzureOverlayNetworkDualStack = cachedFunc(clusterAzureOverlayNetworkDualStack)

// clusterAzureOverlayNetworkDualStack creates a dual-stack (IPv4+IPv6) Azure CNI Overlay cluster
func clusterAzureOverlayNetworkDualStack(ctx context.Context, request ClusterRequest) (*Cluster, error) {
return prepareCluster(ctx, getAzureOverlayNetworkDualStackClusterModel("abe2e-azure-overlay-dualstack-v3", request.Location, request.K8sSystemPoolSKU), false, false)
return prepareCluster(ctx, DefaultClusterInfra, getAzureOverlayNetworkDualStackClusterModel("abe2e-azure-overlay-dualstack-v3", request.Location, request.K8sSystemPoolSKU), false, false)
}

var ClusterCiliumNetwork = cachedFunc(clusterCiliumNetwork)

// clusterCiliumNetwork creates a cluster with Cilium CNI networking
func clusterCiliumNetwork(ctx context.Context, request ClusterRequest) (*Cluster, error) {
return prepareCluster(ctx, getCiliumNetworkClusterModel("abe2e-cilium-network-v3", request.Location, request.K8sSystemPoolSKU), false, false)
return prepareCluster(ctx, DefaultClusterInfra, getCiliumNetworkClusterModel("abe2e-cilium-network-v3", request.Location, request.K8sSystemPoolSKU), false, false)
}

var ClusterRCV1PKubenet = cachedFunc(clusterRCV1PKubenet)

// clusterRCV1PKubenet creates a kubenet cluster in the RCV1P subscription for cert mode testing.
func clusterRCV1PKubenet(ctx context.Context, request ClusterRequest) (*Cluster, error) {
infra := RCV1PClusterInfra()
if infra == nil {
return nil, fmt.Errorf("RCV1P_SUBSCRIPTION_ID not set, cannot create RCV1P cluster")
}
return prepareCluster(ctx, infra, getKubenetClusterModel("abe2e-rcv1p-kubenet-v1", request.Location, request.K8sSystemPoolSKU), false, false)
}

var ClusterRCV1PAzureNetwork = cachedFunc(clusterRCV1PAzureNetwork)

// clusterRCV1PAzureNetwork creates an Azure CNI cluster in the RCV1P subscription for Windows cert mode testing.
// Windows tests require Azure CNI (not kubenet) because baseTemplateWindows() configures the NBC for
// Azure CNI overlay mode.
func clusterRCV1PAzureNetwork(ctx context.Context, request ClusterRequest) (*Cluster, error) {
infra := RCV1PClusterInfra()
if infra == nil {
return nil, fmt.Errorf("RCV1P_SUBSCRIPTION_ID not set, cannot create RCV1P Azure CNI cluster")
}
return prepareCluster(ctx, infra, getAzureNetworkClusterModel("abe2e-rcv1p-azure-v1", request.Location, request.K8sSystemPoolSKU), false, false)
}

// isNotFoundErr checks if an error represents a "not found" response from Azure API
Expand Down Expand Up @@ -228,6 +253,25 @@ var CachedEnsureResourceGroup = cachedFunc(ensureResourceGroup)
var CachedCreateVMManagedIdentity = cachedFunc(config.Azure.CreateVMManagedIdentity)
var CachedCompileAndUploadAKSNodeController = cachedFunc(compileAndUploadAKSNodeController)

// CachedRCV1PEnsureResourceGroup creates the resource group in the RCV1P subscription.
var CachedRCV1PEnsureResourceGroup = cachedFunc(ensureRCV1PResourceGroup)

// CachedRCV1PCreateVMManagedIdentity creates a VM managed identity in the RCV1P subscription.
var CachedRCV1PCreateVMManagedIdentity = cachedFunc(func(ctx context.Context, location string) (string, error) {
if config.RCV1PAzure == nil {
return "", fmt.Errorf("RCV1P_SUBSCRIPTION_ID not set")
}
return config.RCV1PAzure.CreateVMManagedIdentityInRG(ctx, config.RCV1PResourceGroupName(location), location)
})

func ensureRCV1PResourceGroup(ctx context.Context, location string) (armresources.ResourceGroup, error) {
infra := RCV1PClusterInfra()
if infra == nil {
return armresources.ResourceGroup{}, fmt.Errorf("RCV1P_SUBSCRIPTION_ID not set")
}
return ensureResourceGroupWithInfra(ctx, infra, location)
}

// VMSizeSKURequest is the cache key for Resource SKU lookups by VM size and location.
type VMSizeSKURequest struct {
Location string
Expand Down
Loading
Loading