Skip to content

Commit a93f832

Browse files
committed
fix locals
1 parent 18a3b0d commit a93f832

1 file changed

Lines changed: 110 additions & 87 deletions

File tree

deployments/aws/locals.tf

Lines changed: 110 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -5,129 +5,152 @@ locals {
55
}
66

77
locals {
8+
89
user_startup_script_realm = <<EOT
910
#!/bin/bash
11+
set -euo pipefail
12+
1013
export INFINIA_VERSION="${var.infinia_version}"
1114
export BASE_PKG_URL="https://storage.googleapis.com/ddn-redsetup-public"
1215
export RELEASE_TYPE=""
1316
export REL_DIST_PATH="ubuntu/24.04"
1417
export TARGET_ARCH=$(dpkg --print-architecture)
1518
export REL_PKG_URL="${var.base_pkg_url}/releases${var.release_type}/${var.rel_dist_path}"
16-
LOG_COMPLETE="/etc/red/phase_one_compelete"
19+
LOG_COMPLETE="/etc/red/phase_one_compelete" # keep original name
1720
LOG_FILE="/tmp/log"
1821
NODE_COUNT="${local.instance_count}"
19-
rm /etc/machine-id && sudo systemd-machine-id-setup
2022
21-
log_info() {
22-
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
23-
}
23+
log_info() { echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"; }
24+
25+
retry() { local n=1 max="$${2:-10}" sleep_s="$${3:-5}"; while ! eval "$1"; do [ $n -ge $max ] && return 1; log_info "retry $n/$max: $1"; n=$((n+1)); sleep "$sleep_s"; done; }
2426
2527
_check_inventory(){
26-
while true; do
27-
val1=$(redcli inventory show | grep Nodes | awk '{print $2}')
28-
val2=$(echo $NODE_COUNT )
29-
if [ "$val1" = "$val2" ]; then
30-
break
31-
else
32-
log_info "Waiting for nodes to join.."
33-
sleep 5
34-
fi
35-
done
28+
while true; do
29+
val1=$(redcli inventory show 2>/dev/null | grep -E '^[[:space:]]*Nodes:' | awk '{print $2}')
30+
val2=$(echo $NODE_COUNT)
31+
log_info "Inventory Nodes=$val1 want=$val2"
32+
if [ "$val1" = "$val2" ]; then
33+
break
34+
else
35+
sleep 5
36+
fi
37+
done
3638
}
3739
38-
if [ ! -f $LOG_COMPLETE ] ; then
39-
wget "${var.base_pkg_url}/releases${var.release_type}/${var.rel_dist_path}/redsetup_${var.infinia_version}_$(dpkg --print-architecture)${var.release_type}.deb?cache-time=$(date +%s)" -O /tmp/redsetup.deb
40-
apt install -y /tmp/redsetup.deb | tee -a $LOG_FILE
41-
rm -rf "/etc/red/deploy/config.lock" && redsetup -reset || log_info "Error running redsetup reset"
42-
log_info "Wait for self inventory " && sleep 60
43-
redsetup -realm-entry -realm-entry-secret ${local.admin_password} --admin-password ${local.admin_password} -ctrl-plane-ip $(hostname --ip-address) -skip-reboot | tee -a $LOG_FILE
40+
if [ ! -f "$LOG_COMPLETE" ] ; then
41+
# only realm: prepare machine-id on very first boot
42+
rm -f /etc/machine-id || true
43+
systemd-machine-id-setup
44+
45+
retry "wget '${var.base_pkg_url}/releases${var.release_type}/${var.rel_dist_path}/redsetup_${var.infinia_version}_$(dpkg --print-architecture)${var.release_type}.deb?cache-time=$(date +%s)' -O /tmp/redsetup.deb" 10 5
46+
apt-get update -y
47+
apt-get install -y /tmp/redsetup.deb | tee -a "$LOG_FILE"
48+
49+
rm -rf "/etc/red/deploy/config.lock" || true
50+
redsetup -reset || log_info "Warning: redsetup -reset returned non-zero"
51+
52+
log_info "Wait for self inventory"
53+
sleep 60
54+
55+
redsetup -realm-entry -realm-entry-secret ${local.admin_password} --admin-password ${local.admin_password} -ctrl-plane-ip $(hostname --ip-address) -skip-reboot | tee -a "$LOG_FILE"
56+
4457
log_info "reboot"
45-
touch $LOG_COMPLETE
46-
reboot -f
47-
else
48-
cd /tmp
49-
redcli user login realm_admin -p ${local.admin_password} || log_info "Error: redcli login failed"
50-
redcli inventory show > inventory.log
51-
grep -qi 'cpu' inventory.log || log_info "Still waiting for self inventory" && sleep 60
52-
53-
# First inventory init and compare
54-
redcli realm config generate && _check_inventory || log_info "Error: Failed to generate config"
55-
56-
# Regenerate when none realm joined the cluster
57-
redcli realm config generate || log_info "Error Generating config file"
58-
redcli realm config update -f realm_config.yaml || log_info "Error updating realm"
59-
redcli license install -a ${local.realm_license} -y | tee -a $LOG_FILE
60-
redcli cluster create c1 -S=false -z -f | tee -a "$LOG_FILE" || log_info "Error: failed to create cluster"
61-
systemctl disable cloudinit-rerun.service --now
62-
rm -rf /var/lib/apt/lists/*
63-
journalctl --rotate && journalctl --vacuum-time=1s
64-
systemctl disable cloudinit-rerun.service --now
65-
fi
58+
touch "$LOG_COMPLETE"
59+
reboot -f
60+
exit 0
61+
else
62+
cd /tmp
63+
redcli user login realm_admin -p ${local.admin_password} || { log_info "Error: redcli login failed"; exit 1; }
64+
65+
redcli inventory show > inventory.log || true
66+
grep -qi 'cpu' inventory.log || { log_info "Still waiting for self inventory"; sleep 60; }
67+
68+
# First inventory init and compare
69+
redcli realm config generate && _check_inventory || log_info "Error: Failed to generate config"
70+
71+
# Regenerate when none realm joined the cluster
72+
redcli realm config generate || log_info "Error Generating config file"
73+
redcli realm config update -f realm_config.yaml || log_info "Error updating realm"
74+
75+
# License then cluster
76+
redcli license install -a ${local.realm_license} -y | tee -a "$LOG_FILE" || log_info "License install returned non-zero"
77+
78+
# create cluster (idempotent with -f in your original)
79+
redcli cluster create c1 -S=false -z -f | tee -a "$LOG_FILE" || log_info "Error: failed to create cluster"
80+
81+
systemctl disable cloudinit-rerun.service --now || true
82+
rm -rf /var/lib/apt/lists/* || true
83+
journalctl --rotate || true
84+
journalctl --vacuum-time=1s || true
85+
fi
6686
EOT
6787
}
6888

89+
6990
locals {
7091
user_startup_script_none_realm = <<EOT
7192
#!/bin/bash
93+
set -euo pipefail
94+
7295
export INFINIA_VERSION="${var.infinia_version}"
7396
export BASE_PKG_URL="https://storage.googleapis.com/ddn-redsetup-public"
7497
export RELEASE_TYPE=""
7598
export REL_DIST_PATH="ubuntu/24.04"
7699
export TARGET_ARCH=$(dpkg --print-architecture)
77100
export REL_PKG_URL="${var.base_pkg_url}/releases${var.release_type}/${var.rel_dist_path}"
78-
LOG_COMPLETE="/etc/red/phase_one_compelete"
101+
LOG_COMPLETE="/etc/red/phase_one_compelete" # keep original name
79102
LOG_FILE="/tmp/log"
80103
REALM_IP="${aws_instance.infinia_realm[0].private_ip}"
81-
rm /etc/machine-id && sudo systemd-machine-id-setup
82-
log_info() {
83-
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
84-
}
85104
86-
retry_curl() {
87-
local retry_count=0
88-
local max_attempts=$((15 * 60 / 10)) # 15 minutes with 10-second intervals
89-
local realm_entry=$1
90-
91-
while [ $retry_count -lt $max_attempts ]; do
92-
log_info "Checking realm entry host: $realm_entry (Attempt $retry_count)"
93-
curl -k -s -o /tmp/curl_request.out https://$realm_entry:443/redsetup/v1/system/status
94-
if [ $? -eq 0 ]; then
95-
log_info "Success! Realm entry host is up."
96-
log_info "Waiting 120 seconds for stability..."
97-
sleep 130
98-
log_info "Rechecking realm entry host after 120 seconds."
99-
curl -k -s -o /tmp/curl_request.out https://$realm_entry:443/redsetup/v1/system/status
100-
if [ $? -eq 0 ]; then
101-
log_info "Realm entry host confirmed stable."
102-
return 0
103-
else
104-
log_info "Recheck failed. Retrying..."
105-
fi
106-
fi
107-
retry_count=$((retry_count + 1))
108-
sleep 10
109-
done
105+
log_info() { echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"; }
110106
111-
log_info "Max retries reached. Exiting."
112-
exit 1
113-
}
107+
retry_curl() {
108+
local retry_count=0
109+
local max_attempts=$((15 * 60 / 10)) # 15 minutes with 10-second intervals
110+
local realm_entry=$1
111+
while [ $retry_count -lt $max_attempts ]; do
112+
log_info "Checking realm entry host: $realm_entry (Attempt $retry_count)"
113+
if curl -k -s -o /tmp/curl_request.out https://$realm_entry:443/redsetup/v1/system/status; then
114+
log_info "Success! Realm entry host is up."
115+
log_info "Waiting 120 seconds for stability..."
116+
sleep 130
117+
log_info "Rechecking realm entry host after 120 seconds."
118+
if curl -k -s -o /tmp/curl_request.out https://$realm_entry:443/redsetup/v1/system/status; then
119+
log_info "Realm entry host confirmed stable."
120+
return 0
121+
fi
122+
fi
123+
retry_count=$((retry_count + 1))
124+
sleep 10
125+
done
126+
log_info "Max retries reached. Exiting."
127+
exit 1
128+
}
114129
130+
if [ ! -f "$LOG_COMPLETE" ] ; then
131+
# IMPORTANT: do NOT touch machine-id on non-realm nodes
115132
116-
if [ ! -f $LOG_COMPLETE ] ; then
117133
wget "${var.base_pkg_url}/releases${var.release_type}/${var.rel_dist_path}/redsetup_${var.infinia_version}_$(dpkg --print-architecture)${var.release_type}.deb?cache-time=$(date +%s)" -O /tmp/redsetup.deb
118-
apt install -y /tmp/redsetup.deb
119-
rm -rf "/etc/red/deploy/config.lock" && redsetup -reset || log_info "Error running redsetup reset"
120-
retry_curl $REALM_IP
121-
redsetup --realm-entry-address $REALM_IP --realm-entry-secret ${local.admin_password} -skip-reboot -skip-hardware-check
134+
apt-get update -y
135+
apt-get install -y /tmp/redsetup.deb
136+
137+
rm -rf "/etc/red/deploy/config.lock" || true
138+
redsetup -reset || log_info "Warning: redsetup -reset returned non-zero"
139+
140+
retry_curl "$REALM_IP"
141+
142+
redsetup --realm-entry-address "$REALM_IP" --realm-entry-secret ${local.admin_password} -skip-reboot -skip-hardware-check
143+
122144
log_info "reboot"
123-
touch $LOG_COMPLETE
124-
reboot -f
125-
else
126-
rm -rf /var/lib/apt/lists/*
127-
journalctl --rotate && journalctl --vacuum-time=1s
128-
systemctl disable cloudinit-rerun.service --now
129-
130-
fi
145+
touch "$LOG_COMPLETE"
146+
reboot -f
147+
exit 0
148+
else
149+
rm -rf /var/lib/apt/lists/* || true
150+
journalctl --rotate || true
151+
journalctl --vacuum-time=1s || true
152+
systemctl disable cloudinit-rerun.service --now || true
153+
fi
131154
EOT
132155
}
133156

0 commit comments

Comments
 (0)