@@ -5,129 +5,152 @@ locals {
55}
66
77locals {
8+
89 user_startup_script_realm = << EOT
910#!/bin/bash
11+ set -euo pipefail
12+
1013export INFINIA_VERSION="${ var . infinia_version } "
1114export BASE_PKG_URL="https://storage.googleapis.com/ddn-redsetup-public"
1215export RELEASE_TYPE=""
1316export REL_DIST_PATH="ubuntu/24.04"
1417export TARGET_ARCH=$(dpkg --print-architecture)
1518export REL_PKG_URL="${ var . base_pkg_url } /releases${ var . release_type } /${ var . rel_dist_path } "
16- LOG_COMPLETE="/etc/red/phase_one_compelete"
19+ LOG_COMPLETE="/etc/red/phase_one_compelete" # keep original name
1720LOG_FILE="/tmp/log"
1821NODE_COUNT="${ local . instance_count } "
19- rm /etc/machine-id && sudo systemd-machine-id-setup
2022
21- log_info() {
22- echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
23- }
23+ log_info() { echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"; }
24+
25+ retry() { local n=1 max="$${2:-10}" sleep_s="$${3:-5}"; while ! eval "$1"; do [ $n -ge $max ] && return 1; log_info "retry $n/$max: $1"; n=$((n+1)); sleep "$sleep_s"; done; }
2426
2527_check_inventory(){
26- while true; do
27- val1=$(redcli inventory show | grep Nodes | awk '{print $2}')
28- val2=$(echo $NODE_COUNT )
29- if [ " $val1" = " $val2" ]; then
30- break
31- else
32- log_info "Waiting for nodes to join.."
33- sleep 5
34- fi
35- done
28+ while true; do
29+ val1=$(redcli inventory show 2>/dev/null | grep -E '^[[:space:]]* Nodes:' | awk '{print $2}')
30+ val2=$(echo $NODE_COUNT)
31+ log_info "Inventory Nodes= $val1 want= $val2"
32+ if [ "$val1" = "$val2" ]; then
33+ break
34+ else
35+ sleep 5
36+ fi
37+ done
3638}
3739
38- if [ ! -f $LOG_COMPLETE ] ; then
39- wget "${ var . base_pkg_url } /releases${ var . release_type } /${ var . rel_dist_path } /redsetup_${ var . infinia_version } _$(dpkg --print-architecture)${ var . release_type } .deb?cache-time=$(date +%s)" -O /tmp/redsetup.deb
40- apt install -y /tmp/redsetup.deb | tee -a $LOG_FILE
41- rm -rf "/etc/red/deploy/config.lock" && redsetup -reset || log_info "Error running redsetup reset"
42- log_info "Wait for self inventory " && sleep 60
43- redsetup -realm-entry -realm-entry-secret ${ local . admin_password } --admin-password ${ local . admin_password } -ctrl-plane-ip $(hostname --ip-address) -skip-reboot | tee -a $LOG_FILE
40+ if [ ! -f "$LOG_COMPLETE" ] ; then
41+ # only realm: prepare machine-id on very first boot
42+ rm -f /etc/machine-id || true
43+ systemd-machine-id-setup
44+
45+ retry "wget '${ var . base_pkg_url } /releases${ var . release_type } /${ var . rel_dist_path } /redsetup_${ var . infinia_version } _$(dpkg --print-architecture)${ var . release_type } .deb?cache-time=$(date +%s)' -O /tmp/redsetup.deb" 10 5
46+ apt-get update -y
47+ apt-get install -y /tmp/redsetup.deb | tee -a "$LOG_FILE"
48+
49+ rm -rf "/etc/red/deploy/config.lock" || true
50+ redsetup -reset || log_info "Warning: redsetup -reset returned non-zero"
51+
52+ log_info "Wait for self inventory"
53+ sleep 60
54+
55+ redsetup -realm-entry -realm-entry-secret ${ local . admin_password } --admin-password ${ local . admin_password } -ctrl-plane-ip $(hostname --ip-address) -skip-reboot | tee -a "$LOG_FILE"
56+
4457 log_info "reboot"
45- touch $LOG_COMPLETE
46- reboot -f
47- else
48- cd /tmp
49- redcli user login realm_admin -p ${ local . admin_password } || log_info "Error: redcli login failed"
50- redcli inventory show > inventory.log
51- grep -qi 'cpu' inventory.log || log_info "Still waiting for self inventory" && sleep 60
52-
53- # First inventory init and compare
54- redcli realm config generate && _check_inventory || log_info "Error: Failed to generate config"
55-
56- # Regenerate when none realm joined the cluster
57- redcli realm config generate || log_info "Error Generating config file"
58- redcli realm config update -f realm_config.yaml || log_info "Error updating realm"
59- redcli license install -a ${ local . realm_license } -y | tee -a $LOG_FILE
60- redcli cluster create c1 -S=false -z -f | tee -a "$LOG_FILE" || log_info "Error: failed to create cluster"
61- systemctl disable cloudinit-rerun.service --now
62- rm -rf /var/lib/apt/lists/*
63- journalctl --rotate && journalctl --vacuum-time=1s
64- systemctl disable cloudinit-rerun.service --now
65- fi
58+ touch "$LOG_COMPLETE"
59+ reboot -f
60+ exit 0
61+ else
62+ cd /tmp
63+ redcli user login realm_admin -p ${ local . admin_password } || { log_info "Error: redcli login failed"; exit 1; }
64+
65+ redcli inventory show > inventory.log || true
66+ grep -qi 'cpu' inventory.log || { log_info "Still waiting for self inventory"; sleep 60; }
67+
68+ # First inventory init and compare
69+ redcli realm config generate && _check_inventory || log_info "Error: Failed to generate config"
70+
71+ # Regenerate when none realm joined the cluster
72+ redcli realm config generate || log_info "Error Generating config file"
73+ redcli realm config update -f realm_config.yaml || log_info "Error updating realm"
74+
75+ # License then cluster
76+ redcli license install -a ${ local . realm_license } -y | tee -a "$LOG_FILE" || log_info "License install returned non-zero"
77+
78+ # create cluster (idempotent with -f in your original)
79+ redcli cluster create c1 -S=false -z -f | tee -a "$LOG_FILE" || log_info "Error: failed to create cluster"
80+
81+ systemctl disable cloudinit-rerun.service --now || true
82+ rm -rf /var/lib/apt/lists/* || true
83+ journalctl --rotate || true
84+ journalctl --vacuum-time=1s || true
85+ fi
6686EOT
6787}
6888
89+
6990locals {
7091 user_startup_script_none_realm = << EOT
7192#!/bin/bash
93+ set -euo pipefail
94+
7295export INFINIA_VERSION="${ var . infinia_version } "
7396export BASE_PKG_URL="https://storage.googleapis.com/ddn-redsetup-public"
7497export RELEASE_TYPE=""
7598export REL_DIST_PATH="ubuntu/24.04"
7699export TARGET_ARCH=$(dpkg --print-architecture)
77100export REL_PKG_URL="${ var . base_pkg_url } /releases${ var . release_type } /${ var . rel_dist_path } "
78- LOG_COMPLETE="/etc/red/phase_one_compelete"
101+ LOG_COMPLETE="/etc/red/phase_one_compelete" # keep original name
79102LOG_FILE="/tmp/log"
80103REALM_IP="${ aws_instance . infinia_realm [0 ]. private_ip } "
81- rm /etc/machine-id && sudo systemd-machine-id-setup
82- log_info() {
83- echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
84- }
85104
86- retry_curl() {
87- local retry_count=0
88- local max_attempts=$((15 * 60 / 10)) # 15 minutes with 10-second intervals
89- local realm_entry=$1
90-
91- while [ $retry_count -lt $max_attempts ]; do
92- log_info "Checking realm entry host: $realm_entry (Attempt $retry_count)"
93- curl -k -s -o /tmp/curl_request.out https://$realm_entry:443/redsetup/v1/system/status
94- if [ $? -eq 0 ]; then
95- log_info "Success! Realm entry host is up."
96- log_info "Waiting 120 seconds for stability..."
97- sleep 130
98- log_info "Rechecking realm entry host after 120 seconds."
99- curl -k -s -o /tmp/curl_request.out https://$realm_entry:443/redsetup/v1/system/status
100- if [ $? -eq 0 ]; then
101- log_info "Realm entry host confirmed stable."
102- return 0
103- else
104- log_info "Recheck failed. Retrying..."
105- fi
106- fi
107- retry_count=$((retry_count + 1))
108- sleep 10
109- done
105+ log_info() { echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"; }
110106
111- log_info "Max retries reached. Exiting."
112- exit 1
113- }
107+ retry_curl() {
108+ local retry_count=0
109+ local max_attempts=$((15 * 60 / 10)) # 15 minutes with 10-second intervals
110+ local realm_entry=$1
111+ while [ $retry_count -lt $max_attempts ]; do
112+ log_info "Checking realm entry host: $realm_entry (Attempt $retry_count)"
113+ if curl -k -s -o /tmp/curl_request.out https://$realm_entry:443/redsetup/v1/system/status; then
114+ log_info "Success! Realm entry host is up."
115+ log_info "Waiting 120 seconds for stability..."
116+ sleep 130
117+ log_info "Rechecking realm entry host after 120 seconds."
118+ if curl -k -s -o /tmp/curl_request.out https://$realm_entry:443/redsetup/v1/system/status; then
119+ log_info "Realm entry host confirmed stable."
120+ return 0
121+ fi
122+ fi
123+ retry_count=$((retry_count + 1))
124+ sleep 10
125+ done
126+ log_info "Max retries reached. Exiting."
127+ exit 1
128+ }
114129
130+ if [ ! -f "$LOG_COMPLETE" ] ; then
131+ # IMPORTANT: do NOT touch machine-id on non-realm nodes
115132
116- if [ ! -f $LOG_COMPLETE ] ; then
117133 wget "${ var . base_pkg_url } /releases${ var . release_type } /${ var . rel_dist_path } /redsetup_${ var . infinia_version } _$(dpkg --print-architecture)${ var . release_type } .deb?cache-time=$(date +%s)" -O /tmp/redsetup.deb
118- apt install -y /tmp/redsetup.deb
119- rm -rf "/etc/red/deploy/config.lock" && redsetup -reset || log_info "Error running redsetup reset"
120- retry_curl $REALM_IP
121- redsetup --realm-entry-address $REALM_IP --realm-entry-secret ${ local . admin_password } -skip-reboot -skip-hardware-check
134+ apt-get update -y
135+ apt-get install -y /tmp/redsetup.deb
136+
137+ rm -rf "/etc/red/deploy/config.lock" || true
138+ redsetup -reset || log_info "Warning: redsetup -reset returned non-zero"
139+
140+ retry_curl "$REALM_IP"
141+
142+ redsetup --realm-entry-address "$REALM_IP" --realm-entry-secret ${ local . admin_password } -skip-reboot -skip-hardware-check
143+
122144 log_info "reboot"
123- touch $LOG_COMPLETE
124- reboot -f
125- else
126- rm -rf /var/lib/apt/lists/*
127- journalctl --rotate && journalctl --vacuum-time=1s
128- systemctl disable cloudinit-rerun.service --now
129-
130- fi
145+ touch "$LOG_COMPLETE"
146+ reboot -f
147+ exit 0
148+ else
149+ rm -rf /var/lib/apt/lists/* || true
150+ journalctl --rotate || true
151+ journalctl --vacuum-time=1s || true
152+ systemctl disable cloudinit-rerun.service --now || true
153+ fi
131154EOT
132155}
133156
0 commit comments