diff --git a/e2e/validation.go b/e2e/validation.go index 3cf65423dd0..fadf8eab5e6 100644 --- a/e2e/validation.go +++ b/e2e/validation.go @@ -79,6 +79,7 @@ func ValidateCommonLinux(ctx context.Context, s *Scenario) { if !s.VHD.UnsupportedLocalDns && !config.Config.TestPreProvision && !s.VHDCaching { ValidateLocalDNSService(ctx, s, "enabled") ValidateLocalDNSResolution(ctx, s, "169.254.10.10") + ValidateLocalDNSIptablesRules(ctx, s) ValidateLocalDNSExporterMetrics(ctx, s) // Validate hosts plugin validators only if hosts plugin is explicitly enabled diff --git a/e2e/validators.go b/e2e/validators.go index 96597ada092..1cd5b3966cf 100644 --- a/e2e/validators.go +++ b/e2e/validators.go @@ -1494,6 +1494,36 @@ func ValidateLocalDNSResolution(ctx context.Context, s *Scenario, server string) assert.Contains(s.T, execResult.stdout, fmt.Sprintf("SERVER: %s", server)) } +// ValidateLocalDNSIptablesRules checks that the NOTRACK iptables rules for localdns are correctly +// applied in the raw table. These rules skip connection tracking for DNS traffic to localdns IPs +// to prevent conntrack table exhaustion on busy nodes. +func ValidateLocalDNSIptablesRules(ctx context.Context, s *Scenario) { + s.T.Helper() + script := `set -euo pipefail +failed=0 +# Check each rule individually to avoid multiline grep issues +for chain in OUTPUT PREROUTING; do + for proto in tcp udp; do + rule=$(sudo iptables -w -t raw -S "$chain" | tr -d '\r' | grep -m1 -- "-p ${proto}.*localdns: skip conntrack.*--dport 53.*NOTRACK" || true) + if [ -n "$rule" ]; then + echo "OK: $chain/$proto: $rule" + else + echo "FAIL: missing NOTRACK rule for $proto in $chain chain" + failed=1 + fi + done +done + +if [ "$failed" -ne 0 ]; then + echo "Dumping all raw table rules for debugging:" + sudo iptables -w -t raw -S | tr -d '\r' + exit 1 +fi + echo "PASS: all localdns NOTRACK iptables rules present" +` + execScriptOnVMForScenarioValidateExitCode(ctx, s, script, 0, "localdns iptables NOTRACK rules validation failed") +} + // ValidateLocalDNSHostsFile checks that /etc/localdns/hosts contains at least one IPv4 entry for each critical FQDN. // This validation approach avoids flakiness with CDN/frontdoor-backed FQDNs (like mcr.microsoft.com) whose A records // can rotate between queries. We verify presence, not exact IP matching. diff --git a/parts/linux/cloud-init/artifacts/localdns.sh b/parts/linux/cloud-init/artifacts/localdns.sh index 8d54c4f1430..a5ef935fe8b 100644 --- a/parts/linux/cloud-init/artifacts/localdns.sh +++ b/parts/linux/cloud-init/artifacts/localdns.sh @@ -419,11 +419,12 @@ start_localdns() { ${COREDNS_COMMAND} & # Wait until the PID file is created. + # Use 0.1s polling interval since CoreDNS typically creates the PID file in <100ms. local elapsed=0 while [ ! -f "${LOCALDNS_PID_FILE}" ]; do - sleep 1 + sleep 0.1 elapsed=$((elapsed + 1)) - if [ "$elapsed" -ge "$START_LOCALDNS_TIMEOUT" ]; then + if [ "$elapsed" -ge "$((START_LOCALDNS_TIMEOUT * 10))" ]; then echo "Timed out waiting for CoreDNS to create PID file at ${LOCALDNS_PID_FILE}." return 1 fi @@ -454,7 +455,7 @@ wait_for_localdns_ready() { echo "Localdns failed to come online after $timeout_duration seconds (timeout)." return 1 fi - sleep 1 + sleep 0.1 ((attempts++)) done echo "Localdns is online and ready to serve traffic." @@ -588,10 +589,19 @@ add_iptable_rules_to_skip_conntrack_from_pods(){ ip addr add ${LOCALDNS_CLUSTER_LISTENER_IP}/32 dev localdns # Add IPtables rules that skip conntrack for DNS connections coming from pods. + # Use iptables-restore to batch all rules in a single lock acquisition for performance. echo "Adding iptables rules to skip conntrack for queries to localdns." + local restore_input="*raw" for RULE in "${IPTABLES_RULES[@]}"; do - eval "${IPTABLES}" -A "${RULE}" + # Extract chain name and remainder, insert comment after chain to match legacy display order. + local chain="${RULE%% *}" + local rule_rest="${RULE#"$chain" }" + restore_input="${restore_input} +-A ${chain} -m comment --comment \"localdns: skip conntrack\" ${rule_rest}" done + restore_input="${restore_input} +COMMIT" + echo "${restore_input}" | iptables-restore -w --noflush } # Wait for localdns IP to be removed from resolv.conf after networkctl reload. @@ -1029,7 +1039,6 @@ replace_azurednsip_in_corefile || exit $ERR_LOCALDNS_FAIL # Build IPtable rules. # --------------------------------------------------------------------------------------------------------------------- -IPTABLES='iptables -w -t raw -m comment --comment "localdns: skip conntrack"' IPTABLES_RULES=() build_localdns_iptable_rules @@ -1059,7 +1068,7 @@ fi start_localdns || exit $ERR_LOCALDNS_FAIL # Wait to direct traffic to localdns until it's ready. -wait_for_localdns_ready 60 60 || exit $ERR_LOCALDNS_FAIL +wait_for_localdns_ready 600 60 || exit $ERR_LOCALDNS_FAIL # Disable DNS from DHCP and point the system at localdns. # -------------------------------------------------------------------------------------------------------------------- diff --git a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh index d61cf348413..e3ee1908758 100644 --- a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh @@ -758,7 +758,14 @@ EOF LOCALDNS_NODE_LISTENER_IP="10.0.0.1" LOCALDNS_CLUSTER_LISTENER_IP="10.0.0.2" IPTABLES_RULES=("raw -t raw -p udp --dport 53 -j NOTRACK" "raw -t raw -p tcp --dport 53 -j NOTRACK") - IPTABLES="echo iptables" + MOCK_BIN_DIR=$(mktemp -d) + cat > "${MOCK_BIN_DIR}/iptables-restore" << 'MOCK' +#!/bin/sh +echo "iptables-restore called with args: $*" +cat +MOCK + chmod +x "${MOCK_BIN_DIR}/iptables-restore" + export PATH="${MOCK_BIN_DIR}:${PATH}" } BeforeEach 'setup' #------------------------- add_iptable_rules_to_skip_conntrack_from_pods ------------------------------------- @@ -782,11 +789,14 @@ EOF ;; esac } - Path prepend "$(pwd)" When call add_iptable_rules_to_skip_conntrack_from_pods The output should include "Adding iptables rules to skip conntrack for queries to localdns." - The output should include "iptables -A raw -t raw -p udp --dport 53 -j NOTRACK" - The output should include "iptables -A raw -t raw -p tcp --dport 53 -j NOTRACK" + The output should include "*raw" + The output should include "-A raw -m comment --comment" + The output should include "-p udp" + The output should include "-p tcp" + The output should include "-j NOTRACK" + The output should include "COMMIT" End It 'should delete existing localdns interface' @@ -804,7 +814,6 @@ EOF esac } - Path prepend "$(pwd)" When call add_iptable_rules_to_skip_conntrack_from_pods The output should include "Interface localdns already exists, deleting it." The output should include "Deleting interface: link delete localdns"