Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/sonar_upgrade.yml
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,14 @@ jobs:
run_postflight_validations = true
stop_on_failure = true

# Skip healthchecks that are expected to trip on a freshly-deployed POC
# environment and are not actionable for the upgrade:
# - sonargd-audit-directory: emits a WARNING when /imperva/data/sonargd/audit
# contains any files (threshold is 0). On a fresh DSF POC the directory
# always has at least one file, which would otherwise abort the upgrade
# during preflight validations.
ignore_healthcheck_checks = ["sonargd-audit-directory"]

}
EOF
cat ${UPGRADE_EXAMPLE_DIR}/main.tf
Expand Down
10 changes: 10 additions & 0 deletions examples/aws/sonar_upgrade/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,14 @@ module "sonar_upgrader" {
run_postflight_validations = true
stop_on_failure = true

# Optionally skip individual healthchecks that are not actionable for the upgrade.
# For example, on a freshly-deployed POC environment the "sonargd-audit-directory"
# healthcheck emits a WARNING ("Files count for /imperva/data/sonargd/audit (1)
# exceed threshold (0). This might indicate an ingestion issue.") which would
# otherwise abort the upgrade during preflight validations.
# ignore_healthcheck_checks = ["sonargd-audit-directory"]

# Alternatively, ignore all WARNING-level healthchecks (FAILUREs will still abort):
# ignore_healthcheck_warnings = true

}
23 changes: 20 additions & 3 deletions modules/azurerm/db-with-agent/os_params.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,24 @@ locals {
echo "Waiting for the lock to be released..."
sleep 1
done
sudo apt update -y
# apt_update_resilient is defined in setup.tftpl; falls back to a plain
# update if for any reason it is not available (defense in depth).
if declare -F apt_update_resilient >/dev/null; then
apt_update_resilient
else
sudo apt update -y
fi
EOF
database_installation_commands = {
PostgreSql = <<-EOF
command -v psql || sudo apt install postgresql -y
# apt_install_resilient is defined in setup.tftpl and retries on
# transient apt-mirror failures; fall back to a plain install if
# the helper is unavailable for any reason.
if declare -F apt_install_resilient >/dev/null; then
command -v psql || apt_install_resilient postgresql
else
command -v psql || sudo apt install postgresql -y
fi
if ! sudo systemctl is-active --quiet postgresql; then
sudo systemctl start postgresql.service
echo "PostgreSQL service started successfully."
Expand All @@ -27,7 +40,11 @@ locals {
fi
EOF
MySql = <<-EOF
command -v mysql || sudo apt install mysql-server -y
if declare -F apt_install_resilient >/dev/null; then
command -v mysql || apt_install_resilient mysql-server
else
command -v mysql || sudo apt install mysql-server -y
fi
if ! sudo systemctl is-active --quiet mysql; then
sudo systemctl start mysql.service
echo "MySQL service started successfully."
Expand Down
59 changes: 56 additions & 3 deletions modules/azurerm/db-with-agent/setup.tftpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
set -e
set -u
set -x
set -o pipefail
cd /root || exit 1

# Turn off interactive prompts and skip translations
Expand All @@ -11,16 +12,69 @@ if [ -x "$(command -v apt-get)" ]; then
sudo mkdir -p /etc/apt/apt.conf.d
cat <<EOF | sudo tee /etc/apt/apt.conf.d/99translations
Acquire::Languages "none";
EOF

# Configure APT to retry transient mirror failures (e.g. truncated InRelease,
# split-up-into-data-and-signature errors, brief TLS hangups).
cat <<EOF | sudo tee /etc/apt/apt.conf.d/80-retries
Acquire::Retries "5";
Acquire::http::Timeout "30";
Acquire::https::Timeout "30";
EOF
fi

# apt-get update with self-repair on corrupted /var/lib/apt/lists (we have
# observed transient failures from archive.ubuntu.com where InRelease files
# are written truncated, e.g. "Splitting up ... into data and signature failed").
apt_update_resilient() {
for attempt in 1 2 3 4 5; do
if sudo apt-get -o DPkg::Lock::Timeout=120 update; then
return 0
fi
echo "apt-get update failed (attempt $attempt). Repairing apt lists..."
sudo rm -rf /var/lib/apt/lists/*
sudo apt-get clean
sleep $((attempt * 5))
done
return 1
}

# apt-get install with retry-on-failure plus a list repair between attempts.
apt_install_resilient() {
for attempt in 1 2 3; do
if sudo apt-get install -y "$@"; then
return 0
fi
echo "apt-get install of '$*' failed (attempt $attempt). Recovering..."
apt_update_resilient || true
sleep $((attempt * 10))
done
return 1
}

# Clean and update the apt cache to avoid missing package list errors
sudo rm -rf /var/lib/apt/lists/*
sudo apt-get clean
sudo apt-get update
apt_update_resilient

function install_azcli_from_internet() {
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
# Note: --retry-all-errors is curl >= 7.71.0, but Ubuntu 20.04 ships
# curl 7.68.0. Relying on the outer attempt loop below for resilience
# avoids breaking the script on older curl (which exits 2 on unknown
# options, while `set -o pipefail` would otherwise hide that failure
# behind the pipe to `sudo bash`).
for attempt in 1 2 3; do
if curl -fsSL --retry 5 https://aka.ms/InstallAzureCLIDeb | sudo bash; then
break
fi
echo "Azure CLI install failed (attempt $attempt). Repairing apt lists..."
apt_update_resilient || true
sleep $((attempt * 10))
if [ "$attempt" = 3 ]; then
echo "Azure CLI install: giving up after 3 attempts"
return 1
fi
done
az login --identity --allow-no-subscriptions >/dev/null
}

Expand Down Expand Up @@ -56,4 +110,3 @@ install_azcli
install_agent_and_periodic_db_query_cronjob



39 changes: 35 additions & 4 deletions modules/azurerm/dra-admin/setup.tftpl
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,42 @@ set -x
exec > >(tee /var/log/user-data.log|logger -t user-data ) 2>&1
echo BEGIN

RHEL_MAJOR_VERSION=$(cat /etc/os-release | grep VERSION_ID | sed -E 's/^.*"([0-9]+)\.[0-9]+".*$/\1/')

# Retry a command on transient dnf/yum-mirror or RHUI failures (e.g. HTTP 400
# from rhui-{1,2,3}.microsoft.com, "Failed to download metadata for repo",
# brief TLS hangups). Cleans the dnf cache between attempts.
function dnf_retry(){
local attempt
for attempt in 1 2 3 4 5; do
if "$@"; then
return 0
fi
echo "Command failed (attempt $attempt): $*. Cleaning dnf cache and retrying..."
dnf clean all || true
rm -rf /var/cache/dnf || true
sleep $((attempt * 10))
done
return 1
}

function install-azure-cli(){
rpm --import https://packages.microsoft.com/keys/microsoft.asc
dnf install -y https://packages.microsoft.com/config/rhel/9.0/packages-microsoft-prod.rpm
dnf install -y https://packages.microsoft.com/config/rhel/8/packages-microsoft-prod.rpm
dnf install azure-cli -y
dnf_retry rpm --import https://packages.microsoft.com/keys/microsoft.asc
# Install only the packages-microsoft-prod RPM that matches the running
# RHEL major version. Previously we attempted both RHEL 8 and RHEL 9
# variants and tolerated the failing one with `|| true`, but combined
# with dnf_retry that wasted up to ~5 minutes of bounded retries on the
# impossible install -- enough to push past the downstream null_resource
# readiness timeout.
if [ "$RHEL_MAJOR_VERSION" = "9" ]; then
dnf_retry dnf install -y https://packages.microsoft.com/config/rhel/9.0/packages-microsoft-prod.rpm
elif [ "$RHEL_MAJOR_VERSION" = "8" ]; then
dnf_retry dnf install -y https://packages.microsoft.com/config/rhel/8/packages-microsoft-prod.rpm
else
echo "Unsupported RHEL version: $RHEL_MAJOR_VERSION"
exit 1
fi
dnf_retry dnf install azure-cli -y
az login --identity
}

Expand Down
39 changes: 35 additions & 4 deletions modules/azurerm/dra-analytics/setup.tftpl
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,48 @@ set -x
exec > >(tee /var/log/user-data.log|logger -t user-data ) 2>&1
echo BEGIN

RHEL_MAJOR_VERSION=$(cat /etc/os-release | grep VERSION_ID | sed -E 's/^.*"([0-9]+)\.[0-9]+".*$/\1/')

function wait-for-admin(){
while ! nc -z ${admin_server_private_ip} 8443; do
sleep 0.1
done
}

# Retry a command on transient dnf/yum-mirror or RHUI failures (e.g. HTTP 400
# from rhui-{1,2,3}.microsoft.com, "Failed to download metadata for repo",
# brief TLS hangups). Cleans the dnf cache between attempts.
function dnf_retry(){
local attempt
for attempt in 1 2 3 4 5; do
if "$@"; then
return 0
fi
echo "Command failed (attempt $attempt): $*. Cleaning dnf cache and retrying..."
dnf clean all || true
rm -rf /var/cache/dnf || true
sleep $((attempt * 10))
done
return 1
}

function install-azure-cli(){
rpm --import https://packages.microsoft.com/keys/microsoft.asc
dnf install -y https://packages.microsoft.com/config/rhel/9.0/packages-microsoft-prod.rpm
dnf install -y https://packages.microsoft.com/config/rhel/8/packages-microsoft-prod.rpm
dnf install azure-cli -y
dnf_retry rpm --import https://packages.microsoft.com/keys/microsoft.asc
# Install only the packages-microsoft-prod RPM that matches the running
# RHEL major version. Previously we attempted both RHEL 8 and RHEL 9
# variants and tolerated the failing one with `|| true`, but combined
# with dnf_retry that wasted up to ~5 minutes of bounded retries on the
# impossible install -- enough to push past the downstream null_resource
# readiness timeout.
if [ "$RHEL_MAJOR_VERSION" = "9" ]; then
dnf_retry dnf install -y https://packages.microsoft.com/config/rhel/9.0/packages-microsoft-prod.rpm
elif [ "$RHEL_MAJOR_VERSION" = "8" ]; then
dnf_retry dnf install -y https://packages.microsoft.com/config/rhel/8/packages-microsoft-prod.rpm
else
echo "Unsupported RHEL version: $RHEL_MAJOR_VERSION"
exit 1
fi
dnf_retry dnf install azure-cli -y
az login --identity
}

Expand Down
35 changes: 26 additions & 9 deletions modules/azurerm/sonar-base-instance/setup.tftpl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,25 @@ function internet_access() {
fi
}

# Retry a yum/dnf-based command with cache repair between attempts. Recovers
# from transient Microsoft RHUI failures (HTTP 400 on
# https://rhui-{1,2,3}.microsoft.com/.../repomd.xml, "All mirrors were tried",
# "Failed to download metadata for repo"), which otherwise poison the local
# cache and make every retry fail the same way.
function yum_retry() {
local attempt
for attempt in 1 2 3 4 5; do
if "$@"; then
return 0
fi
echo "Command failed (attempt $attempt): $*. Cleaning yum/dnf cache and retrying..."
yum clean all || true
rm -rf /var/cache/yum /var/cache/dnf || true
sleep $((attempt * 10))
done
return 1
}

function install_yum_dep_from_internet() {
if ! internet_access; then
echo "Error: No outbound internet access. Either enable outbound internet access, or make sure $@ is installed in the base ami"
Expand All @@ -38,12 +57,10 @@ function install_yum_dep_from_internet() {
local package="$1"
local package_name="$${2:-$1}"

# yum_retry takes care of cache repair between attempts, so a transient
# Microsoft RHUI 400 / repomd.xml download glitch heals automatically.
if ! yum list installed "$${package_name}"; then
yum install "$${package}" -y \
|| yum install "$${package}" -y \
|| yum install "$${package}" -y \
|| yum install "$${package}" -y \
|| yum install "$${package}" -y # trying x times since sometimes there is a glitch with the entitlement server
yum_retry yum install "$${package}" -y
fi
}

Expand All @@ -52,16 +69,16 @@ function install_azcli_from_internet() {
echo "Error: No outbound internet access. Either enable outbound internet access, or make sure az cli is installed in the base image"
exit 1
fi
rpm --import https://packages.microsoft.com/keys/microsoft.asc
yum_retry rpm --import https://packages.microsoft.com/keys/microsoft.asc
if [ "$RHEL_MAJOR_VERSION" -eq 8 ]; then
dnf install -y https://packages.microsoft.com/config/rhel/8/packages-microsoft-prod.rpm
yum_retry dnf install -y https://packages.microsoft.com/config/rhel/8/packages-microsoft-prod.rpm
elif [ "$RHEL_MAJOR_VERSION" -eq 9 ]; then
dnf install -y https://packages.microsoft.com/config/rhel/9.0/packages-microsoft-prod.rpm
yum_retry dnf install -y https://packages.microsoft.com/config/rhel/9.0/packages-microsoft-prod.rpm
else
echo "Unsupported RHEL version: $RHEL_MAJOR_VERSION"
exit 1
fi
dnf install azure-cli -y
yum_retry dnf install azure-cli -y
az login --identity --allow-no-subscriptions
}

Expand Down
Loading