Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
042c0f4
enable aksnodeconfig in all e2es
lilypan26 Apr 28, 2026
ffa89ce
Merge branch 'main' of https://github.com/Azure/AgentBaker into lily/…
lilypan26 Apr 29, 2026
2c493c8
refactor customdatahack and get e2e passing
lilypan26 May 4, 2026
1bb406d
fix scriptless vs nonscriptless e2e logic
lilypan26 May 5, 2026
02b46dc
restore
lilypan26 May 6, 2026
85702a0
Merge branch 'main' of https://github.com/Azure/AgentBaker into lily/…
lilypan26 May 10, 2026
dba6573
add phase 3 custom data and fix e2e logic
lilypan26 May 11, 2026
b8e9aa5
fix diffs
lilypan26 May 18, 2026
9e45dde
add validator
lilypan26 May 19, 2026
eda17a8
do not run scriptless phase 3 e2es for dedicated scriptless scenarios
lilypan26 May 20, 2026
eed133e
fix vnetcniplugins conversion and phase 3 validation check
lilypan26 May 21, 2026
af43215
Merge branch 'main' of https://github.com/Azure/AgentBaker into lily/…
lilypan26 May 21, 2026
0b51e0c
fix scriptless phase 3 validator condition
lilypan26 May 22, 2026
af47c9e
Merge branch 'main' of https://github.com/Azure/AgentBaker into lily/…
lilypan26 May 23, 2026
84fb1ba
Merge branch 'main' of https://github.com/Azure/AgentBaker into lily/…
lilypan26 May 26, 2026
b050969
fix more diffs
lilypan26 May 27, 2026
c181e25
fix diffs
lilypan26 May 27, 2026
1eb9ade
do not enable phase 3 in ubuntu2404 e2e for now
lilypan26 May 27, 2026
093c4f7
fix: update unit tests to remove expected trailing space from GetOrde…
Copilot May 27, 2026
1e7befc
update UTs
lilypan26 May 27, 2026
603e7e3
Merge branch 'lily/scriptless/phase-3-e2e' of https://github.com/Azur…
lilypan26 May 27, 2026
60d6e92
fix: quote proxy values in getProxyVariables to prevent command injec…
Copilot May 27, 2026
e7c279c
fix: propagate gzip helper errors in e2e custom data
Copilot May 27, 2026
f4adeb9
fix: gzip phase3 custom data payload
Copilot May 27, 2026
50768f5
fix http proxy vars quotations
lilypan26 May 27, 2026
428dbcf
Merge branch 'lily/scriptless/phase-3-e2e' of https://github.com/Azur…
lilypan26 May 27, 2026
d4ed38a
address comments
lilypan26 May 27, 2026
2ef3c72
Potential fix for pull request finding
lilypan26 May 27, 2026
425c7ff
improve env compare validation
lilypan26 May 27, 2026
442c525
fix proxy var comapre
lilypan26 May 27, 2026
2126d61
do not log values
lilypan26 May 27, 2026
c6675f0
Merge branch 'lily/scriptless/phase-3-e2e' of https://github.com/Azur…
lilypan26 May 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 85 additions & 25 deletions aks-node-controller/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@
"github.com/urfave/cli/v3"
)

func isDeprecatedCSEVar(key string) bool {
switch key {
case "CLOUD_INIT_STATUS_SCRIPT",
"HYPERKUBE_URL",
"MCR_REPOSITORY_BASE",
"BLOCK_OUTBOUND_NETWORK":
return true
}
return false
}

type App struct {
// cmdRun is a function that runs the given command.
// the goal of this field is to make it easier to test the app by mocking the command runner.
Expand Down Expand Up @@ -257,14 +268,7 @@
}

// Extract CSE-specific env vars from provision config by filtering out unmodified OS env vars.
osEnv := envSliceToMap(os.Environ())
pcAllEnv := envSliceToMap(provisionConfigCmd.Env)
pcEnv := make(map[string]string, len(pcAllEnv))
for k, v := range pcAllEnv {
if osVal, inOS := osEnv[k]; !inOS || osVal != v {
pcEnv[k] = v
}
}
pcEnv := extractCSEEnvVars(provisionConfigCmd.Env)

// Parse env vars directly from the NBC command file content.
nbcCmdContent, err := os.ReadFile(flags.NBCCmd)
Expand All @@ -274,8 +278,36 @@
}
nbcEnv := parseEnvVarsFromNBCCmdContent(string(nbcCmdContent))

// Collect all keys from both environments.
allKeys := make(map[string]struct{})
diffs := diffEnvMaps(pcEnv, nbcEnv)

now := time.Now()
if len(diffs) == 0 {
slog.Info("env compare: no differences found between provision-config and nbc-cmd env vars")
eventLogger.LogEvent("CompareEnvs", "env vars match between provision-config and nbc-cmd", helpers.EventLevelInformational, now, now)
} else {
message := fmt.Sprintf("env var differences (%d): %s", len(diffs), strings.Join(diffs, "; "))
slog.Info(message)
eventLogger.LogEvent("CompareEnvs", message, helpers.EventLevelInformational, now, now)
}
}

// extractCSEEnvVars filters a command's env slice to only CSE-specific variables
// by removing entries that match the current OS environment.
func extractCSEEnvVars(cmdEnv []string) map[string]string {
osEnv := envSliceToMap(os.Environ())
allEnv := envSliceToMap(cmdEnv)
cseEnv := make(map[string]string, len(allEnv))
for k, v := range allEnv {
if osVal, inOS := osEnv[k]; !inOS || osVal != v {
cseEnv[k] = v
}
}
return cseEnv
}

// diffEnvMaps compares two environment variable maps and returns a sorted list of human-readable differences.
func diffEnvMaps(pcEnv, nbcEnv map[string]string) []string {
allKeys := make(map[string]struct{}, len(pcEnv)+len(nbcEnv))
for k := range pcEnv {
allKeys[k] = struct{}{}
}
Expand All @@ -297,21 +329,29 @@
case inPC && !inNBC:
diffs = append(diffs, fmt.Sprintf("only-in-pc: %s", key))
case !inPC && inNBC:
diffs = append(diffs, fmt.Sprintf("only-in-nbc: %s", key))
case pcVal != nbcVal:
if !isDeprecatedCSEVar(key) {
diffs = append(diffs, fmt.Sprintf("only-in-nbc: %s", key))
}
case !envValsEqual(pcVal, nbcVal):
diffs = append(diffs, fmt.Sprintf("differs: %s", key))
}
}
return diffs
}

now := time.Now()
if len(diffs) == 0 {
slog.Info("env compare: no differences found between provision-config and nbc-cmd env vars")
eventLogger.LogEvent("CompareEnvs", "env vars match between provision-config and nbc-cmd", helpers.EventLevelInformational, now, now)
} else {
message := fmt.Sprintf("env var differences (%d): %s", len(diffs), strings.Join(diffs, "; "))
slog.Info(message)
eventLogger.LogEvent("CompareEnvs", message, helpers.EventLevelInformational, now, now)
// envValsEqual compares two environment variable values, treating them as equal
// if they differ only in the presence of double quotes around substrings.
// This handles cases like PROXY_VARS where the legacy path strips inner quotes
// due to shell quoting collision while the scriptless path preserves them.
func envValsEqual(a, b string) bool {
if a == b {
return true
}
return stripDoubleQuotes(a) == stripDoubleQuotes(b)
}

func stripDoubleQuotes(s string) string {
return strings.ReplaceAll(s, "\"", "")
}

// parseEnvVarsFromNBCCmdContent extracts environment variable assignments from an NBC command string.
Expand Down Expand Up @@ -359,14 +399,14 @@
}

// parseEnvValue parses the value portion of a KEY=VALUE assignment starting at position i.
// It handles concatenated quoted and unquoted segments. Returns the parsed value and the new position.
// It handles concatenated quoted (single or double) and unquoted segments. Returns the parsed value and the new position.
func parseEnvValue(content string, i int) (string, int) {
n := len(content)
var value strings.Builder
for i < n {
switch {
case content[i] == '"':
// Quoted section: read until closing quote.
// Double-quoted section: read until closing double quote.
i++ // skip opening quote
for i < n && content[i] != '"' {
value.WriteByte(content[i])
Expand All @@ -375,6 +415,16 @@
if i < n {
i++ // skip closing quote
}
case content[i] == '\'':
// Single-quoted section: read until closing single quote.
i++ // skip opening quote
for i < n && content[i] != '\'' {
value.WriteByte(content[i])
i++
}
if i < n {
i++ // skip closing quote
}
case isDelimiter(content[i]):
return value.String(), i
default:
Expand All @@ -401,19 +451,28 @@
return (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'
}

// skipToken advances past the current non-whitespace token, respecting double-quoted sections.
// skipToken advances past the current non-whitespace token, respecting quoted sections.
func skipToken(content string, i int) int {
n := len(content)
for i < n && content[i] != ' ' && content[i] != '\t' && content[i] != '\n' && content[i] != ';' {
if content[i] == '"' {
switch {

Check failure on line 458 in aks-node-controller/app.go

View workflow job for this annotation

GitHub Actions / lint (aks-node-controller)

QF1002: could use tagged switch on content[i] (staticcheck)
case content[i] == '"':
i++
for i < n && content[i] != '"' {
i++
}
if i < n {
i++
}
} else {
case content[i] == '\'':
i++
for i < n && content[i] != '\'' {
i++
}
if i < n {
i++
}
default:
i++
}
}
Expand Down Expand Up @@ -450,6 +509,7 @@
// If both flags are provided, compare environments before proceeding.
// This is best-effort and should not block provisioning.
if flags.ProvisionConfig != "" && flags.NBCCmd != "" {
slog.Info("ProvisionConfig and NBCCmd both provided, comparing envs")
compareEnvs(ctx, flags, a.eventLogger)
}

Expand Down
6 changes: 6 additions & 0 deletions aks-node-controller/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,12 @@ func TestParseEnvVarsFromNBCCmdContent(t *testing.T) {
assert.Equal(t, "false", got["GPU_NEEDS_FABRIC_MANAGER"])
assert.Equal(t, "900", got["CSE_TIMEOUT"])
})

t.Run("single-quoted values", func(t *testing.T) {
content := `PROXY_VARS='export HTTPS_PROXY="https://proxy:8443"; export http_proxy="http://proxy:8080";'`
got := parseEnvVarsFromNBCCmdContent(content)
assert.Equal(t, `export HTTPS_PROXY="https://proxy:8443"; export http_proxy="http://proxy:8080";`, got["PROXY_VARS"])
})
}

// compareEnvsConfigEnv builds a CSE env map from the test provision config,
Expand Down
4 changes: 2 additions & 2 deletions aks-node-controller/helpers/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ const (
NetworkPluginKubenet = "kubenet"
NetworkPolicyAzure = "azure"
NetworkPolicyCalico = "calico"
LoadBalancerBasic = "basic"
LoadBalancerStandard = "standard"
LoadBalancerBasic = "Basic"
LoadBalancerStandard = "Standard"
VMSizeStandardDc2s = "Standard_DC2s"
VMSizeStandardDc4s = "Standard_DC4s"
DefaultLinuxUser = "azureuser"
Expand Down
19 changes: 7 additions & 12 deletions aks-node-controller/parser/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ func containerdConfigFromAKSNodeConfig(aksnodeconfig *aksnodeconfigv1.Configurat
return "", fmt.Errorf("AKSNodeConfig is nil")
}

// TODO: add containerdv2 support
// the containerd config template is different based on whether the node is with GPU or not.
_template := containerdConfigTemplate
if noGPU {
Expand Down Expand Up @@ -402,7 +403,7 @@ func getSysctlContent(s *aksnodeconfigv1.SysctlConfig) string {
m["vm.vfs_cache_pressure"] = s.GetVmVfsCachePressure()
}

return base64.StdEncoding.EncodeToString([]byte(createSortedKeyValuePairs(m, "\n")))
return base64.StdEncoding.EncodeToString([]byte(createSortedKeyValuePairs(m, "\n") + "\n"))
Comment thread
lilypan26 marked this conversation as resolved.
}

func getShouldConfigContainerdUlimits(u *aksnodeconfigv1.UlimitConfig) bool {
Expand Down Expand Up @@ -471,24 +472,18 @@ func getPortRangeEndValue(portRange string) int {

// createSortedKeyValuePairs creates a string with key=value pairs, sorted by key, with custom delimiter.
func createSortedKeyValuePairs[T any](m map[string]T, delimiter string) string {
keys := []string{}
keys := make([]string, 0, len(m))
for key := range m {
keys = append(keys, key)
}

// we are sorting the keys for deterministic output for readability and testing.
sort.Strings(keys)
var buf bytes.Buffer
i := 0
pairs := make([]string, 0, len(keys))
for _, key := range keys {
i++
// set the last delimiter to empty string
if i == len(keys) {
delimiter = ""
}
buf.WriteString(fmt.Sprintf("%s=%v%s", key, m[key], delimiter))
pairs = append(pairs, fmt.Sprintf("%s=%v", key, m[key]))
}
return buf.String()
return strings.Join(pairs, delimiter)
}

func getExcludeMasterFromStandardLB(lb *aksnodeconfigv1.LoadBalancerConfig) bool {
Expand Down Expand Up @@ -652,7 +647,7 @@ func marshalToJSON(v any) ([]byte, error) {
}

var rawMessage json.RawMessage = data
jsonByte, err := json.MarshalIndent(rawMessage, "", " ")
jsonByte, err := json.MarshalIndent(rawMessage, "", " ")
if err != nil {
log.Printf("error marshalling kubelet config file content: %v", err)
return nil, err
Expand Down
6 changes: 4 additions & 2 deletions aks-node-controller/parser/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,8 @@ net.ipv4.neigh.default.gc_thresh1=4096
net.ipv4.neigh.default.gc_thresh2=8192
net.ipv4.neigh.default.gc_thresh3=16384
net.ipv4.tcp_max_syn_backlog=16384
net.ipv4.tcp_retries2=8`)),
net.ipv4.tcp_retries2=8
`)),
},
{
name: "SysctlConfig with custom values",
Expand All @@ -187,7 +188,8 @@ net.ipv4.neigh.default.gc_thresh1=4096
net.ipv4.neigh.default.gc_thresh2=8192
net.ipv4.neigh.default.gc_thresh3=16384
net.ipv4.tcp_max_syn_backlog=9999
net.ipv4.tcp_retries2=8`)),
net.ipv4.tcp_retries2=8
`)),
},
}
for _, tt := range tests {
Expand Down
7 changes: 4 additions & 3 deletions aks-node-controller/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func executeBootstrapTemplate(inputContract *aksnodeconfigv1.Configuration) (str
func getCSEEnv(config *aksnodeconfigv1.Configuration) map[string]string {
cloudProviderSettings := getCloudProviderSettings(config)
env := map[string]string{
"PROVISION_OUTPUT": "/var/log/azure/cluster-provision.log",
"PROVISION_OUTPUT": "/var/log/azure/cluster-provision-cse-output.log",
"MOBY_VERSION": "",
"CLOUDPROVIDER_BACKOFF": fmt.Sprintf("%v", cloudProviderSettings.backoff),
"CLOUDPROVIDER_BACKOFF_MODE": cloudProviderSettings.backoffMode,
Expand All @@ -47,7 +47,7 @@ func getCSEEnv(config *aksnodeconfigv1.Configuration) map[string]string {
"CLOUDPROVIDER_RATELIMIT_BUCKET": fmt.Sprintf("%v", cloudProviderSettings.rateLimitBucket),
"CLOUDPROVIDER_RATELIMIT_BUCKET_WRITE": fmt.Sprintf("%v", cloudProviderSettings.rateLimitBucketWrite),
"CLI_TOOL": "ctr",
"NETWORK_MODE": "transparent",
"NETWORK_MODE": "",
"ADMINUSER": getLinuxAdminUsername(config.GetLinuxAdminUsername()),
"TENANT_ID": config.GetAuthConfig().GetTenantId(),
"KUBERNETES_VERSION": config.GetKubernetesVersion(),
Expand Down Expand Up @@ -194,7 +194,8 @@ func getCSEEnv(config *aksnodeconfigv1.Configuration) map[string]string {
"SERVICE_ACCOUNT_IMAGE_PULL_DEFAULT_TENANT_ID": config.GetServiceAccountImagePullProfile().GetDefaultTenantId(),
"IDENTITY_BINDINGS_LOCAL_AUTHORITY_SNI": config.GetServiceAccountImagePullProfile().GetLocalAuthoritySni(),
"CSE_TIMEOUT": getCSETimeout(config),
Comment thread
lilypan26 marked this conversation as resolved.
"SKIP_WAAGENT_HOLD": "true",
"SKIP_WAAGENT_HOLD": "false",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs to be true, because scriptless mode we always never hold waagent

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep this would be a difference compared to non scriptless path, may be we can have a list of keys which are ok to ignore

"NETWORK_ISOLATED_CLUSTER_TEST_MODE": "false", // temp: needs to be added to config
}

for i, cert := range config.CustomCaCerts {
Expand Down
4 changes: 2 additions & 2 deletions aks-node-controller/parser/templates/containerd.toml.gtpl
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ root = "{{.KubeletConfig.GetContainerDataDir}}"{{- end}}
{{- if .GetEnableArtifactStreaming }}
[proxy_plugins]
[proxy_plugins.overlaybd]
type = "snapshot"
address = "/run/overlaybd-snapshotter/overlaybd.sock"
type = "snapshot"
address = "/run/overlaybd-snapshotter/overlaybd.sock"
{{- end}}
{{- if .GetIsKata }}
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ root = "{{.KubeletConfig.GetContainerDataDir}}"{{- end}}
{{- if .GetEnableArtifactStreaming }}
[proxy_plugins]
[proxy_plugins.overlaybd]
type = "snapshot"
address = "/run/overlaybd-snapshotter/overlaybd.sock"
type = "snapshot"
address = "/run/overlaybd-snapshotter/overlaybd.sock"
{{- end}}
{{- if .GetIsKata }}
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
Expand Down
4 changes: 1 addition & 3 deletions aks-node-controller/parser/templates/localdns.toml.gtpl
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,11 @@ health-check.localdns.local:53 {
template ANY ANY internal.cloudapp.net {
match "^(?:[^.]+\.){4,}internal\.cloudapp\.net\.$"
rcode NXDOMAIN

fallthrough

}
template ANY ANY reddog.microsoft.com {
rcode NXDOMAIN
}
{{- end}}
}
{{- end}}
{{- end}}
Loading
Loading