-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathdeploy.sh
More file actions
executable file
·306 lines (256 loc) · 10.5 KB
/
deploy.sh
File metadata and controls
executable file
·306 lines (256 loc) · 10.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
#!/bin/bash
set -euo pipefail
# mount points of the persistant volumes
FILES_MNT=/labkey_files
BACKUP_MNT=/backups
SPIN_MODULE="spin/2.0"
RANCHER_MAJOR_VERSION_REQUIRED=2
NAMESPACE="lims-24"
# default options to pass to kubectl
FLAGS="--namespace=${NAMESPACE}"
# location of backup directories on global file system (cori)
ROOT_BACKUP_DIR="/global/cfs/cdirs/metatlas/projects/lims_backups/pg_dump/lims-24"
# initialize variables to avoid errors
BACKUP_RESTORE=""
LABKEY=""
DEV=0
NEW=0
if [ ! -d "/global/cfs/cdirs" ]; then
>&2 echo "ERROR: You must be on a NERSC system to deploy."
exit 17
fi
# default to the most recent directory with a timestamp for a name
TIMESTAMP=$(ls -1pt "${ROOT_BACKUP_DIR}" | grep -E "^2[0-9]{11}/$" | sed '1!d' | tr -d '/')
while [[ "$#" -gt 0 ]]; do
case "$1" in
-b|--backup) BACKUP_RESTORE="$2"; shift ;;
-d|--dev) DEV="1" ;;
-l|--labkey) LABKEY="$2"; shift ;;
-n|--new) NEW="1" ;;
-t|--timestamp) TIMESTAMP="$2"; shift ;;
-h|--help)
echo -e "$0 [options]"
echo ""
echo " -h, --help show this command refernce"
echo " -b, --backup source of backup_restore image (required)"
echo " -d, --dev operate on the development cluster (defaults to production)"
echo " -l, --labkey source of labkey image (required)"
echo " -n, --new delete all resources in namespace and start new instances"
echo " Restores all data from backup."
echo " -t, --timestamp timestamp of the backup to use (defaults to most recent)"
exit 0
;;
*)echo "Unknown parameter passed: $1"; exit 1 ;;
esac
shift
done
function k8s_version() {
rancher kubectl version --short=true \
| grep '^Server Version:' \
| tr -d ' v' \
| cut -d: -f2
}
function required_flag_or_error() {
if [[ -z "$1" ]]; then
>&2 echo "ERROR: ${2}"
exit 1
fi
}
function file_exists_readable_not_empty_or_error () {
if [[ ! -e "$1" ]]; then
>&2 echo "ERROR: file ${1} does not exist."
exit 2
fi
if [[ ! -r "$1" ]]; then
>&2 echo "ERROR: file ${1} is not readable."
exit 2
fi
if [[ ! -s "$1" ]]; then
>&2 echo "ERROR: file ${1} is empty."
exit 2
fi
return 0
}
function file_safe_secret_or_error() {
if [ $(stat -c %a "$1") != 600 ] && [ $(stat -c %a "$1") != 660 ]; then
>&2 echo "ERROR: ${1} must have file permissions 600 or 660."
exit 3
fi
return 0
}
required_flag_or_error "$TIMESTAMP" "You are required to supply a backup timestamp via -t or --timestamp."
required_flag_or_error "$BACKUP_RESTORE" "You are required to supply a source for the backup_restore image via -b or --backup."
required_flag_or_error "$LABKEY" "You are required to supply a source for the labkey image via -l or --labkey."
# directory containing this script
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# root directory of this git repo
REPO_DIR="${SCRIPT_DIR}"
# Get dependency mo
MO_EXE="${SCRIPT_DIR}/lib/mo"
if [[ ! -x "${MO_EXE}" ]]; then
mkdir -p "$(dirname "$MO_EXE")"
curl -sSL https://git.io/get-mo -o "${MO_EXE}"
chmod +x "${MO_EXE}"
fi
# Get dependency kubeconform
KUBEVAL_EXE="${SCRIPT_DIR}/lib/kubeconform"
if [[ ! -x "${KUBEVAL_EXE}" ]]; then
mkdir -p "$(dirname "$KUBEVAL_EXE")"
pushd "$(dirname "$KUBEVAL_EXE")"
curl -sL https://github.com/yannh/kubeconform/releases/latest/download/kubeconform-linux-amd64.tar.gz \
| tar xvz "$(basename "$KUBEVAL_EXE")"
popd
fi
if [[ $DEV -eq 1 ]]; then
PROJECT="c-fwj56:p-lswtz" # development:m2650
export CLUSTER="development"
export SHORT_FQDN="metatlas-dev.lbl.gov"
else
PROJECT="c-tmq7p:p-gqfz8" # production cluster for m2650. Run 'rancher context switch' to get other values.
export CLUSTER="production"
export SHORT_FQDN="metatlas.lbl.gov"
fi
export LONG_FQDN="lb.lims-24.${CLUSTER}.svc.spin.nersc.org"
CERT_FILE="${SCRIPT_DIR}/.tls.${SHORT_FQDN}.pem"
KEY_FILE="${SCRIPT_DIR}/.tls.${SHORT_FQDN}.key"
SECRETS_FILE="${REPO_DIR}/.secrets"
# these are relative to the global filesystem:
ROOT_BACKUP_DIR="/global/cfs/cdirs/metatlas/projects/lims_backups/pg_dump/lims-24"
DB_BACKUP="${ROOT_BACKUP_DIR}/${TIMESTAMP}/labkey_db_${TIMESTAMP}"
FILES_BACKUP="${ROOT_BACKUP_DIR}/${TIMESTAMP}/labkey_files_${TIMESTAMP}.tar.gz"
# these are the backup file locations within the backup_restore container:
DB_BACKUP_INTERNAL="${BACKUP_MNT}/${TIMESTAMP}/labkey_db_${TIMESTAMP}"
FILES_BACKUP_INTERNAL="${BACKUP_MNT}/${TIMESTAMP}/labkey_files_${TIMESTAMP}.tar.gz"
if [[ "$NEW" -eq 1 ]]; then
file_exists_readable_not_empty_or_error "$DB_BACKUP"
file_exists_readable_not_empty_or_error "$FILES_BACKUP"
fi
file_exists_readable_not_empty_or_error "$SECRETS_FILE"
file_exists_readable_not_empty_or_error "$CERT_FILE"
file_exists_readable_not_empty_or_error "$KEY_FILE"
file_safe_secret_or_error "${SECRETS_FILE}"
file_safe_secret_or_error "${KEY_FILE}"
# variables for template substitutions by mo
export LABKEY_IMAGE_TAG="$LABKEY"
export BACKUP_RESTORE_IMAGE_TAG="$BACKUP_RESTORE"
DEPLOY_TMP="${SCRIPT_DIR}/deploy_tmp"
mkdir -p "$DEPLOY_TMP"
rm -rf "${DEPLOY_TMP:?}/*"
if declare -F module; then
module load "${SPIN_MODULE}"
fi
# does replacement of **exported** environment variables enclosed in double braces
# such as {{API_ROOT}}
echo "Validating deployment yaml files..."
for TEMPLATE in $(find "${SCRIPT_DIR}/" -name '*.yaml.template'); do
REPLACED_FILE="${DEPLOY_TMP}/$(basename ${TEMPLATE%.*})"
"${MO_EXE}" -u "${TEMPLATE}" > "${REPLACED_FILE}"
done
for YAML in $(find "${SCRIPT_DIR}/" -name '*.yaml' ! -name 'python*.yaml' ! -name 'R_*.yaml'); do
# lint the k8 yaml file
"${KUBEVAL_EXE}" -kubernetes-version "$(k8s_version)" "${YAML}"
done
# shellcheck source=.secrets
source "${SECRETS_FILE}"
if [[ -z "${POSTGRES_PASSWORD}" ]]; then
>&2 echo "ERROR: Envionmental variable POSTGRES_PASSWORD not defined in .secrets file."
exit 4
fi
if [[ -z "${MASTER_ENCRYPTION_KEY}" ]]; then
>&2 echo "ERROR: Envionmental variable MASTER_ENCRYPTION_KEY not defined in .secrets file."
exit 5
fi
if ! which rancher; then
>&2 echo "ERROR: Required program 'rancher' not found."
exit 6
fi
RANCHER_VERSION=$(rancher --version | sed -e 's/rancher version v\([0-9.]\+\)/\1/')
RANCHER_MAJOR_VERSION="${RANCHER_VERSION%%.*}"
if [[ "${RANCHER_MAJOR_VERSION}" -ne "${RANCHER_MAJOR_VERSION_REQUIRED}" ]]; then
>&2 echo "ERROR: rancher v${RANCHER_MAJOR_VERSION_REQUIRED}.x required, version v${RANCHER_VERSION} found."
exit 7
fi
if ! rancher project; then
>&2 echo "ERROR: No rancher authentication token is present."
exit 8
fi
rancher context switch "${PROJECT}"
if ! rancher inspect --type namespace "${NAMESPACE}"; then
rancher namespace create "${NAMESPACE}"
fi
echo "Cleaning up previous deployments and secrets..."
if [[ "$NEW" -eq 1 ]]; then
# clean up any existing resources to start a new deployment
rancher kubectl delete deployments,statefulsets,cronjobs,services,pods --all $FLAGS
rancher kubectl delete pvc --all $FLAGS
fi
rancher kubectl delete secret db $FLAGS || true
rancher kubectl delete secret labkey $FLAGS || true
rancher kubectl delete secret metatlas-cert $FLAGS || true
# start building up the new instance
echo "Create secrets for db, labkey, and tls..."
rancher kubectl create secret generic db $FLAGS \
"--from-literal=postgres_password=${POSTGRES_PASSWORD}"
rancher kubectl create secret generic labkey $FLAGS \
"--from-literal=master_encryption_key=${MASTER_ENCRYPTION_KEY}"
rancher kubectl create secret tls metatlas-cert $FLAGS \
"--cert=${CERT_FILE}" \
"--key=${KEY_FILE}"
if [[ "$NEW" -eq 1 ]]; then
echo "Create persistant volumes..."
## Create persistant volumes
rancher kubectl create --save-config $FLAGS -f "${REPO_DIR}/db/db-data.yaml"
rancher kubectl create --save-config $FLAGS -f "${REPO_DIR}/labkey/labkey-files.yaml"
#rancher kubectl apply $FLAGS -f "${REPO_DIR}/db/db-data.yaml"
#rancher kubectl apply $FLAGS -f "${REPO_DIR}/labkey/labkey-files.yaml"
fi
## Create database pod
echo "Create database pod..."
rancher kubectl apply $FLAGS -f "${REPO_DIR}/db/db.yaml"
## Create restore pods
echo "Create restore pods..."
rancher kubectl apply $FLAGS -f "${DEPLOY_TMP}/restore.yaml"
rancher kubectl apply $FLAGS -f "${DEPLOY_TMP}/restore-root.yaml"
rancher kubectl rollout status $FLAGS statefulset/db
if [[ "$NEW" -eq 1 ]]; then
## Restore labkey database
echo "Restore labkey database..."
echo "Waiting for restore pod to be available..."
rancher kubectl wait $FLAGS deployment.apps/restore --for=condition=available --timeout=60s
echo "Running restore.sh..."
rancher kubectl exec deployment.apps/restore $FLAGS -- /restore.sh "${DB_BACKUP_INTERNAL}"
# Restore labkey files
# The container that copies the archive from global filesystem to the
# persistant volume cannot be running as root and therefore cannot
# correctly set the ownership of the unarchived files. Therefore
# a second pod (restore-root) does not mount the global filesystem
# and can therefore untar the archive with the correct ownership.
echo "Labkey files mount point: $FILES_MNT"
echo "Subdirectory for files: $FILES_BACKUP_INTERNAL"
FILES_TEMP="${FILES_MNT}/$(basename "${FILES_BACKUP_INTERNAL}")"
echo "Applying permissions to restore-root pod..."
rancher kubectl wait $FLAGS deployment.apps/restore-root --for=condition=available --timeout=60s
rancher kubectl exec deployment.apps/restore-root $FLAGS -- rm -rf "${FILES_MNT}"/*
rancher kubectl exec deployment.apps/restore-root $FLAGS -- chmod 777 "${FILES_MNT}"
echo "Executing restore and restore-root..."
rancher kubectl wait $FLAGS deployment.apps/restore --for=condition=available --timeout=600s
rancher kubectl exec deployment.apps/restore $FLAGS -- cp "${FILES_BACKUP_INTERNAL}" "${FILES_TEMP}"
rancher kubectl exec deployment.apps/restore-root $FLAGS -- tar xzpf "${FILES_TEMP}" -C "${FILES_MNT}"
rancher kubectl exec deployment.apps/restore-root $FLAGS -- rm "${FILES_TEMP}"
fi
## Create labkey pod
echo "Creating labkey pod..."
rancher kubectl apply $FLAGS -f "${DEPLOY_TMP}/labkey.yaml"
## Create load balancer
echo "Creating load balancer..."
rancher kubectl apply $FLAGS -f "${DEPLOY_TMP}/lb.yaml"
## Create backup pod
echo "Creating backup pod..."
rancher kubectl apply $FLAGS -f "${DEPLOY_TMP}/backup.yaml"
# scale down the pods used for restoring
echo "Scaling down restore pods..."
rancher kubectl scale --replicas=0 deployment.apps/restore $FLAGS
rancher kubectl scale --replicas=0 deployment.apps/restore-root $FLAGS
echo "Cleaning up temporary files..."
rm -rf "${DEPLOY_TMP}"
echo "Deployment complete."