Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/azurerm/agentless-gw/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ variable "base_directory" {

variable "cloud_init_timeout" {
type = number
default = 1200
default = 1800
description = "Max time to wait for the machine to start"
}

Expand Down
2 changes: 1 addition & 1 deletion modules/azurerm/hub/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ variable "dra_details" {

variable "cloud_init_timeout" {
type = number
default = 1200
default = 1800
description = "Max time to wait for the machine to start"
}

Expand Down
17 changes: 11 additions & 6 deletions modules/azurerm/sonar-base-instance/setup.tftpl
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,19 @@ function internet_access() {
# "Failed to download metadata for repo"), which otherwise poison the local
# cache and make every retry fail the same way.
function yum_retry() {
local max_attempts="$${YUM_RETRY_ATTEMPTS:-10}"
local attempt
for attempt in {1..10}; do
for attempt in $(seq 1 "$max_attempts"); do
if "$@"; then
return 0
fi
echo "Command failed (attempt $attempt/10): $*"
echo "Command failed (attempt $attempt/$max_attempts): $*"
echo "Cleaning yum/dnf cache and retrying..."
yum clean all || true
rm -rf /var/cache/yum /var/cache/dnf || true
sleep $((attempt < 6 ? attempt * 10 : 60))
done
echo "All $attempt attempts failed for: $*"
echo "All $max_attempts attempts failed for: $*"
echo "RHUI diagnostics:"
curl -sI https://rhui-1.microsoft.com 2>&1 | head -5 || true
return 1
Expand Down Expand Up @@ -88,9 +89,13 @@ function install_azcli_from_internet() {
# Azure RHUI returns 400 on rhel-*-baseos-rhui-rpms.
yum_retry rpm -Uvh --replacepkgs "$msrepo_url"

# Fall back to disabling RHUI if it's degraded; azure-cli's RHEL-side deps
# are already in the Azure RHEL PAYG base image.
yum_retry dnf install azure-cli -y \
# Primary path uses fewer attempts (3) so we fail over fast when RHUI is
# degraded -- a real RHUI incident lasts hours, retrying 10x just burns
# cloud-init time. Fallback keeps the default 10 attempts as last resort.
# --nobest lets dnf walk back to an older azure-cli whose python pin
# matches what's already in the Azure RHEL PAYG base image (python3.9)
# when the appstream repo is unreachable.
YUM_RETRY_ATTEMPTS=3 yum_retry dnf install azure-cli -y \
|| yum_retry dnf install azure-cli -y --disablerepo='*rhui*' --nobest

az login --identity --allow-no-subscriptions
Expand Down
2 changes: 1 addition & 1 deletion modules/azurerm/sonar-base-instance/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ variable "base_directory" {

variable "cloud_init_timeout" {
type = number
default = 900
default = 1800
description = "Max time to wait for the machine to start"
}

Expand Down
Loading