From d6cd48dc10c00712e648b2f68aef6fb36b8fc5d7 Mon Sep 17 00:00:00 2001
From: Exaviz <info@exaviz.com>
Date: Tue, 30 Jun 2026 19:00:11 -0400
Subject: [PATCH 1/3] feat: add backup and restore for Docker Compose

Add backup.sh, restore.sh, and a guide for backing up and restoring a
Docker Compose deployment.

- backup.sh takes a cold snapshot: stops the stack, archives the
  CockroachDB and MinIO volumes plus config and secrets, then restarts
- restore.sh replaces volumes and config from a backup, with a
  confirmation prompt and a stop-before-overwrite safeguard
- skip the rebuildable search index and transient event log by default
  (--full includes them), matching the Helm chart's backup scope
- document both in guides/backup-restore.md and link from the README

Addresses the recurring need for a documented Compose backup path
(upstream huly-selfhost #273, #223, #62).

Signed-off-by: Exaviz <info@exaviz.com>
---
 README.md                |  14 ++++
 backup.sh                | 138 +++++++++++++++++++++++++++++++++++++++
 guides/backup-restore.md |  86 ++++++++++++++++++++++++
 restore.sh               | 110 +++++++++++++++++++++++++++++++
 4 files changed, 348 insertions(+)
 create mode 100755 backup.sh
 create mode 100644 guides/backup-restore.md
 create mode 100755 restore.sh
diff --git a/README.md b/README.md
index 5aae414..2cd9fe0 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,20 @@ To update an existing self-hosted deployment to a new Huly version:
    docker compose up -d
    ```
 
+## Backups
+
+Use the `backup.sh` and `restore.sh` helpers to back up and restore a Docker Compose
+deployment (database, file store, and config). See
+[`guides/backup-restore.md`](guides/backup-restore.md) for details.
+
+> [!IMPORTANT]
+> Take a backup **before** any version upgrade.
+
+```bash
+./backup.sh                 # snapshot to ./backups/huly-backup-<timestamp>/
+./restore.sh backups/huly-backup-<timestamp>
+```
+
 ## Disable default content in new workspaces
 
 By default, Huly can initialize new workspaces with predefined content. To disable that behavior, set `INIT_REPO_DIR` in the `workspace` service to a non-existing path:
diff --git a/backup.sh b/backup.sh
new file mode 100755
index 0000000..e1c33e3
--- /dev/null
+++ b/backup.sh
@@ -0,0 +1,138 @@
+#!/usr/bin/env bash
+#
+# backup.sh - Offline backup of a Docker Compose deployment.
+#
+# Creates a timestamped, self-contained backup of everything needed to restore the
+# deployment: the CockroachDB data, the MinIO file store, and the local config and
+# secret files. The stack is stopped during the volume snapshot so the copy is
+# crash-consistent, then restarted. This is the recommended backup to take before a
+# version upgrade (see MIGRATION.md).
+#
+# The search index (elastic) and event log (redpanda) are skipped by default: they
+# are rebuilt automatically and are not required to restore. Use --full to include
+# them.
+#
+# Usage: ./backup.sh [--output=DIR] [--keep=N] [--full] [--help]
+
+set -euo pipefail
+
+cd "$(dirname "$0")"
+
+OUTPUT_DIR="./backups"
+KEEP=0
+FULL=false
+
+for arg in "$@"; do
+    case $arg in
+        --output=*) OUTPUT_DIR="${arg#*=}" ;;
+        --keep=*)   KEEP="${arg#*=}" ;;
+        --full)     FULL=true ;;
+        --help)
+            echo "Usage: $0 [OPTIONS]"
+            echo "Options:"
+            echo "  --output=DIR  Directory to write backups into (default: ./backups)"
+            echo "  --keep=N      Keep only the N most recent backups (default: keep all)"
+            echo "  --full        Also back up the search index (elastic) and event log (redpanda)"
+            echo "  --help        Show this help message"
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $arg"
+            echo "Use --help for usage information"
+            exit 1
+            ;;
+    esac
+done
+
+if docker compose version >/dev/null 2>&1; then
+    COMPOSE="docker compose"
+elif command -v docker-compose >/dev/null 2>&1; then
+    COMPOSE="docker-compose"
+else
+    echo "Error: docker compose is not available." >&2
+    exit 1
+fi
+
+# Print the host source (named volume or bind path) backing a service's mount.
+mount_source() {
+    # $1 = service name, $2 = mount destination inside the container
+    local cid
+    cid=$($COMPOSE ps -aq "$1" 2>/dev/null | head -1)
+    [ -n "$cid" ] || return 0
+    docker inspect "$cid" --format \
+        "{{range .Mounts}}{{if eq .Destination \"$2\"}}{{if .Name}}{{.Name}}{{else}}{{.Source}}{{end}}{{end}}{{end}}"
+}
+
+# Archive a volume or bind path into the backup directory.
+archive() {
+    # $1 = source (volume name or host path), $2 = output tar name, $3 = label
+    if [ -z "$1" ]; then
+        echo -e "  \033[33mskipping $3 (not found)\033[0m"
+        return 0
+    fi
+    echo "  - $3 -> $2"
+    docker run --rm -v "$1":/data:ro -v "$DEST_ABS":/backup alpine \
+        tar czf "/backup/$2" -C /data .
+}
+
+STAMP=$(date +%Y%m%d-%H%M%S)
+DEST="$OUTPUT_DIR/huly-backup-$STAMP"
+mkdir -p "$DEST/config"
+DEST_ABS=$(cd "$DEST" && pwd)
+
+echo -e "\033[1;34mResolving data volumes...\033[0m"
+CR_SRC=$(mount_source cockroach /cockroach/cockroach-data)
+FILES_SRC=$(mount_source minio /data)
+MONGO_SRC=$(mount_source mongodb /data/db)
+ELASTIC_SRC=$(mount_source elastic /usr/share/elasticsearch/data)
+REDPANDA_SRC=$(mount_source redpanda /var/lib/redpanda/data)
+
+echo "Stopping stack for a consistent snapshot..."
+$COMPOSE stop
+
+echo "Archiving data volumes..."
+archive "$CR_SRC" cockroach.tar.gz "CockroachDB"
+archive "$FILES_SRC" files.tar.gz "MinIO files"
+[ -n "$MONGO_SRC" ] && archive "$MONGO_SRC" mongodb.tar.gz "MongoDB (legacy)"
+if [ "$FULL" = true ]; then
+    archive "$ELASTIC_SRC" elastic.tar.gz "Elasticsearch index"
+    archive "$REDPANDA_SRC" redpanda.tar.gz "Redpanda log"
+fi
+
+echo "Copying config and secret files..."
+for f in .env huly.conf huly_v7.conf nginx.conf .huly.secret .cr.secret .rp.secret; do
+    [ -f "$f" ] && cp -p "$f" "$DEST/config/"
+done
+[ -d traefik ] && cp -rp traefik "$DEST/config/"
+
+echo "Restarting stack..."
+$COMPOSE start
+
+{
+    echo "created: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+    echo "full: $FULL"
+    grep -hE '^HULY_VERSION=' .env huly.conf huly_v7.conf 2>/dev/null | tail -1 || true
+    echo "archives:"
+    for a in "$DEST"/*.tar.gz; do
+        [ -f "$a" ] && echo "  - $(basename "$a")"
+    done
+} > "$DEST/manifest.txt"
+
+if [ "$KEEP" -gt 0 ]; then
+    echo "Pruning old backups, keeping $KEEP..."
+    # Timestamped names sort chronologically, and bash expands globs lexically,
+    # so this array is oldest-first.
+    shopt -s nullglob
+    existing=("$OUTPUT_DIR"/huly-backup-*/)
+    shopt -u nullglob
+    remove=$((${#existing[@]} - KEEP))
+    if [ "$remove" -gt 0 ]; then
+        for ((i = 0; i < remove; i++)); do
+            echo "  removing ${existing[i]}"
+            rm -rf "${existing[i]}"
+        done
+    fi
+fi
+
+echo -e "\033[1;32mBackup complete: $DEST\033[0m"
+du -sh "$DEST" | awk '{print "Total size: " $1}'
diff --git a/guides/backup-restore.md b/guides/backup-restore.md
new file mode 100644
index 0000000..7d3bd6e
--- /dev/null
+++ b/guides/backup-restore.md
@@ -0,0 +1,86 @@
+# Backup and Restore (Docker Compose)
+
+This guide covers backing up and restoring a Docker Compose deployment. For
+Kubernetes, the Helm chart ships scheduled backups under
+`helm/huly/templates/backup/` instead.
+
+> [!IMPORTANT]
+> Always take a backup **before upgrading** to a new version. See `MIGRATION.md`.
+
+## What gets backed up
+
+| Data | Source | Included by default |
+|---|---|---|
+| Database | CockroachDB volume (`cr_data`) | Yes |
+| Files / attachments | MinIO volume (`files`) | Yes |
+| Config and secrets | `.env`, `huly*.conf`, `nginx.conf`, `.huly.secret`, `.cr.secret`, `.rp.secret`, `traefik/` | Yes |
+| Legacy database | MongoDB volume (`mongodb`), if present | Yes, when detected |
+| Search index | Elasticsearch volume (`elastic`) | No - rebuilt automatically |
+| Event log | Redpanda volume (`redpanda`) | No - transient |
+
+The search index and event log are intentionally skipped: they are regenerated from
+the database and file store, so excluding them keeps backups small and restores fast.
+Use `--full` if you want them included anyway.
+
+## Backup
+
+`backup.sh` takes a **cold** snapshot: it stops the stack so the copy is
+crash-consistent, archives the data volumes, copies your config, and restarts the
+stack. Expect a short period of downtime for the duration of the snapshot.
+
+```bash
+./backup.sh                 # back up to ./backups/huly-backup-<timestamp>/
+./backup.sh --output=/mnt/backups
+./backup.sh --keep=7        # keep only the 7 most recent backups
+./backup.sh --full          # also include the search index and event log
+```
+
+Each backup is a self-contained directory:
+
+```
+backups/huly-backup-20260630-141500/
+  cockroach.tar.gz
+  files.tar.gz
+  config/
+    .env
+    huly_v7.conf
+    nginx.conf
+    ...
+  manifest.txt
+```
+
+Copy that directory off the server (to object storage or another host) for real
+disaster recovery - a backup that lives only on the same disk as the deployment is
+not a backup.
+
+## Restore
+
+> [!WARNING]
+> Restoring **replaces** the current data volumes and overwrites local config files.
+> Test a restore on a clean or non-production environment before you need it for
+> real.
+
+```bash
+./restore.sh backups/huly-backup-20260630-141500
+./restore.sh backups/huly-backup-20260630-141500 --yes   # skip the confirmation prompt
+```
+
+`restore.sh` creates the stack's volumes if they do not exist, writes the archived
+data back into them, restores the config files, and starts the stack. The search
+index rebuilds automatically over the first few minutes after start.
+
+## Verifying a backup
+
+A backup you have never restored is a guess, not a backup. Periodically:
+
+1. Spin up a throwaway host (or a separate project directory).
+2. Run `restore.sh` against a recent backup there.
+3. Confirm you can log in and see your workspaces.
+
+## Notes
+
+- Run these scripts from the repository root, next to `compose.yml`.
+- They use only `docker`, `docker compose`, and a temporary `alpine` container, so
+  there is nothing extra to install.
+- For zero-downtime logical backups, a future enhancement could mirror the hot
+  CockroachDB dump and `rclone` file sync already used by the Helm CronJobs.
diff --git a/restore.sh b/restore.sh
new file mode 100755
index 0000000..e5842fa
--- /dev/null
+++ b/restore.sh
@@ -0,0 +1,110 @@
+#!/usr/bin/env bash
+#
+# restore.sh - Restore a backup created by backup.sh into a Docker Compose deployment.
+#
+# WARNING: this REPLACES the current data volumes and overwrites local config files.
+# Stop and think before running it against a live deployment. Restore into a clean or
+# test environment first when you can.
+#
+# Usage: ./restore.sh BACKUP_DIR [--yes] [--help]
+
+set -euo pipefail
+
+cd "$(dirname "$0")"
+
+RESTORE_DIR=""
+ASSUME_YES=false
+
+for arg in "$@"; do
+    case $arg in
+        --yes)  ASSUME_YES=true ;;
+        --help)
+            echo "Usage: $0 BACKUP_DIR [OPTIONS]"
+            echo "Arguments:"
+            echo "  BACKUP_DIR    A backup directory created by backup.sh"
+            echo "Options:"
+            echo "  --yes         Do not prompt for confirmation"
+            echo "  --help        Show this help message"
+            exit 0
+            ;;
+        -*)
+            echo "Unknown option: $arg"
+            echo "Use --help for usage information"
+            exit 1
+            ;;
+        *) RESTORE_DIR="$arg" ;;
+    esac
+done
+
+if [ -z "$RESTORE_DIR" ] || [ ! -d "$RESTORE_DIR" ]; then
+    echo "Error: provide a valid backup directory. See --help." >&2
+    exit 1
+fi
+if [ ! -f "$RESTORE_DIR/manifest.txt" ]; then
+    echo "Error: $RESTORE_DIR does not look like a backup (no manifest.txt)." >&2
+    exit 1
+fi
+
+if docker compose version >/dev/null 2>&1; then
+    COMPOSE="docker compose"
+elif command -v docker-compose >/dev/null 2>&1; then
+    COMPOSE="docker-compose"
+else
+    echo "Error: docker compose is not available." >&2
+    exit 1
+fi
+
+SRC_ABS=$(cd "$RESTORE_DIR" && pwd)
+
+echo -e "\033[1;33mThis will overwrite current data and config from:\033[0m $SRC_ABS"
+cat "$RESTORE_DIR/manifest.txt"
+if [ "$ASSUME_YES" != true ]; then
+    read -r -p "Continue and overwrite the current deployment? (y/N): " ANSWER
+    case "${ANSWER:-N}" in
+        [Yy]*) ;;
+        *) echo "Aborted."; exit 0 ;;
+    esac
+fi
+
+# Ensure containers and volumes exist, then stop everything so the volumes are safe
+# to overwrite (never write to a volume while its container is running).
+echo "Preparing stack (creating containers, stopping the stack)..."
+$COMPOSE up --no-start
+$COMPOSE stop
+
+mount_source() {
+    local cid
+    cid=$($COMPOSE ps -aq "$1" 2>/dev/null | head -1)
+    [ -n "$cid" ] || return 0
+    docker inspect "$cid" --format \
+        "{{range .Mounts}}{{if eq .Destination \"$2\"}}{{if .Name}}{{.Name}}{{else}}{{.Source}}{{end}}{{end}}{{end}}"
+}
+
+restore_vol() {
+    # $1 = target source (volume name or host path), $2 = tar file in the backup
+    [ -f "$SRC_ABS/$2" ] || return 0
+    if [ -z "$1" ]; then
+        echo -e "  \033[33mcannot resolve target for $2, skipping\033[0m"
+        return 0
+    fi
+    echo "  - $2 -> $1"
+    docker run --rm -v "$1":/data -v "$SRC_ABS":/backup:ro alpine \
+        sh -c 'rm -rf /data/* /data/.[!.]* /data/..?* 2>/dev/null; tar xzf "/backup/'"$2"'" -C /data'
+}
+
+echo "Restoring data volumes..."
+restore_vol "$(mount_source cockroach /cockroach/cockroach-data)" cockroach.tar.gz
+restore_vol "$(mount_source minio /data)" files.tar.gz
+restore_vol "$(mount_source mongodb /data/db)" mongodb.tar.gz
+restore_vol "$(mount_source elastic /usr/share/elasticsearch/data)" elastic.tar.gz
+restore_vol "$(mount_source redpanda /var/lib/redpanda/data)" redpanda.tar.gz
+
+echo "Restoring config and secret files..."
+if [ -d "$RESTORE_DIR/config" ]; then
+    cp -rp "$RESTORE_DIR"/config/. .
+fi
+
+echo "Starting stack..."
+$COMPOSE up -d
+
+echo -e "\033[1;32mRestore complete.\033[0m The search index may take a few minutes to rebuild."

From 4a1abcd1bd9d4941948d2aebdca398e9abcd7ee9 Mon Sep 17 00:00:00 2001
From: Exaviz <info@exaviz.com>
Date: Tue, 30 Jun 2026 19:25:30 -0400
Subject: [PATCH 2/3] harden backup/restore for unattended and varied
 deployments

Two fixes found by running this against a real deployment:

- Resolve data volumes by mount destination as a fallback, not only by a
  hardcoded service name. Deployments that name the database service
  'cockroachdb' (instead of 'cockroach') were silently skipped, producing
  a backup with no database. Now scans all project containers for the
  destination when the expected service name is absent.
- Always restart the stack via an EXIT trap. With 'set -e', a failure
  while archiving (after the stack was stopped) exited without restarting,
  leaving the deployment down - unacceptable for a scheduled backup. The
  stack is now brought back up even on failure, and as soon as the volume
  archive finishes on success (minimizing downtime).

Also make volume resolution safe under 'set -e' (a not-found lookup no
longer aborts the script).

Signed-off-by: Exaviz <info@exaviz.com>
---
 backup.sh  | 57 +++++++++++++++++++++++++++++++++++++++++-------------
 restore.sh | 27 +++++++++++++++++++++-----
 2 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/backup.sh b/backup.sh
index e1c33e3..fdaef9f 100755
--- a/backup.sh
+++ b/backup.sh
@@ -4,9 +4,9 @@
 #
 # Creates a timestamped, self-contained backup of everything needed to restore the
 # deployment: the CockroachDB data, the MinIO file store, and the local config and
-# secret files. The stack is stopped during the volume snapshot so the copy is
-# crash-consistent, then restarted. This is the recommended backup to take before a
-# version upgrade (see MIGRATION.md).
+# secret files. The stack is stopped only while the volumes are archived so the copy
+# is crash-consistent, then restarted immediately. This is the recommended backup to
+# take before a version upgrade (see MIGRATION.md).
 #
 # The search index (elastic) and event log (redpanda) are skipped by default: they
 # are rebuilt automatically and are not required to restore. Use --full to include
@@ -53,14 +53,43 @@ else
     exit 1
 fi
 
-# Print the host source (named volume or bind path) backing a service's mount.
+# Always bring the stack back up, even if archiving fails partway through, so a
+# failed backup can never leave the deployment stopped.
+STOPPED=false
+restart_stack() {
+    if [ "$STOPPED" = true ]; then
+        echo "Ensuring the stack is running again..."
+        $COMPOSE start || echo -e "\033[31mWARNING: could not restart the stack - check 'docker compose ps'.\033[0m"
+        STOPPED=false
+    fi
+}
+trap restart_stack EXIT
+
+# Print the host source (named volume or bind path) backing the given mount
+# destination on a specific container, or nothing.
+source_on() {
+    # $1 = container id, $2 = mount destination
+    docker inspect "$1" --format \
+        "{{range .Mounts}}{{if eq .Destination \"$2\"}}{{if .Name}}{{.Name}}{{else}}{{.Source}}{{end}}{{end}}{{end}}" 2>/dev/null || true
+}
+
+# Resolve the volume backing a mount destination. Try the expected service name
+# first; if that service does not exist (deployments differ, e.g. "cockroach" vs
+# "cockroachdb"), scan every container in the project for the destination.
+# Always exits 0 (prints nothing when not found) so it is safe under `set -e`.
 mount_source() {
-    # $1 = service name, $2 = mount destination inside the container
-    local cid
-    cid=$($COMPOSE ps -aq "$1" 2>/dev/null | head -1)
-    [ -n "$cid" ] || return 0
-    docker inspect "$cid" --format \
-        "{{range .Mounts}}{{if eq .Destination \"$2\"}}{{if .Name}}{{.Name}}{{else}}{{.Source}}{{end}}{{end}}{{end}}"
+    # $1 = expected service name, $2 = mount destination
+    local cid src
+    cid=$($COMPOSE ps -aq "$1" 2>/dev/null | head -1 || true)
+    if [ -n "$cid" ]; then
+        src=$(source_on "$cid" "$2")
+        [ -n "$src" ] && { echo "$src"; return 0; }
+    fi
+    for cid in $($COMPOSE ps -aq 2>/dev/null || true); do
+        src=$(source_on "$cid" "$2")
+        [ -n "$src" ] && { echo "$src"; return 0; }
+    done
+    return 0
 }
 
 # Archive a volume or bind path into the backup directory.
@@ -89,6 +118,7 @@ REDPANDA_SRC=$(mount_source redpanda /var/lib/redpanda/data)
 
 echo "Stopping stack for a consistent snapshot..."
 $COMPOSE stop
+STOPPED=true
 
 echo "Archiving data volumes..."
 archive "$CR_SRC" cockroach.tar.gz "CockroachDB"
@@ -99,15 +129,16 @@ if [ "$FULL" = true ]; then
     archive "$REDPANDA_SRC" redpanda.tar.gz "Redpanda log"
 fi
 
+# Bring the stack back up as soon as the volumes are archived; the rest of the work
+# (copying config, manifest, pruning) does not need the stack stopped.
+restart_stack
+
 echo "Copying config and secret files..."
 for f in .env huly.conf huly_v7.conf nginx.conf .huly.secret .cr.secret .rp.secret; do
     [ -f "$f" ] && cp -p "$f" "$DEST/config/"
 done
 [ -d traefik ] && cp -rp traefik "$DEST/config/"
 
-echo "Restarting stack..."
-$COMPOSE start
-
 {
     echo "created: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
     echo "full: $FULL"
diff --git a/restore.sh b/restore.sh
index e5842fa..3a3849a 100755
--- a/restore.sh
+++ b/restore.sh
@@ -72,12 +72,29 @@ echo "Preparing stack (creating containers, stopping the stack)..."
 $COMPOSE up --no-start
 $COMPOSE stop
 
+source_on() {
+    # $1 = container id, $2 = mount destination
+    docker inspect "$1" --format \
+        "{{range .Mounts}}{{if eq .Destination \"$2\"}}{{if .Name}}{{.Name}}{{else}}{{.Source}}{{end}}{{end}}{{end}}" 2>/dev/null || true
+}
+
+# Resolve the volume backing a mount destination: try the expected service name
+# first, then scan every container in the project (so deployments that name the
+# service differently, e.g. "cockroach" vs "cockroachdb", still resolve).
+# Always exits 0 (prints nothing when not found) so it is safe under `set -e`.
 mount_source() {
-    local cid
-    cid=$($COMPOSE ps -aq "$1" 2>/dev/null | head -1)
-    [ -n "$cid" ] || return 0
-    docker inspect "$cid" --format \
-        "{{range .Mounts}}{{if eq .Destination \"$2\"}}{{if .Name}}{{.Name}}{{else}}{{.Source}}{{end}}{{end}}{{end}}"
+    # $1 = expected service name, $2 = mount destination
+    local cid src
+    cid=$($COMPOSE ps -aq "$1" 2>/dev/null | head -1 || true)
+    if [ -n "$cid" ]; then
+        src=$(source_on "$cid" "$2")
+        [ -n "$src" ] && { echo "$src"; return 0; }
+    fi
+    for cid in $($COMPOSE ps -aq 2>/dev/null || true); do
+        src=$(source_on "$cid" "$2")
+        [ -n "$src" ] && { echo "$src"; return 0; }
+    done
+    return 0
 }
 
 restore_vol() {

From 1ddda647035500dbe4176294d35c0bd61ff5197e Mon Sep 17 00:00:00 2001
From: Exaviz <info@exaviz.com>
Date: Wed, 1 Jul 2026 22:01:43 -0400
Subject: [PATCH 3/3] feat: optional offsite upload for backups (--offsite)

Add an --offsite flag to backup.sh that uploads each completed backup to
S3-compatible storage via rclone (Backblaze B2, DO Spaces, AWS S3, MinIO,
etc.), so backups are not stranded on the same host as the deployment.

- creds via a gitignored backup-offsite.env (example provided); env-var
  names match the Helm chart's backup CronJobs for consistency
- upload runs in a throwaway rclone container, nothing extra to install
- remote retention via bucket lifecycle policy (documented)
- backups/ and backup-offsite.env added to .gitignore
- documented in guides/backup-restore.md

Validated: rclone upload of a full backup (db + files + config + manifest)
to a MinIO target, bucket auto-created.

Signed-off-by: Exaviz <info@exaviz.com>
---
 .gitignore                 |  2 ++
 backup-offsite.env.example | 21 +++++++++++++++++++++
 backup.sh                  | 32 +++++++++++++++++++++++++++++++-
 guides/backup-restore.md   | 15 +++++++++++++++
 4 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 backup-offsite.env.example

diff --git a/.gitignore b/.gitignore
index baf53f3..29f0eab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,5 @@ huly_v7.conf
 .cr.secret
 .rp.secret
 .idea
+backup-offsite.env
+backups/
diff --git a/backup-offsite.env.example b/backup-offsite.env.example
new file mode 100644
index 0000000..d99bd30
--- /dev/null
+++ b/backup-offsite.env.example
@@ -0,0 +1,21 @@
+# backup-offsite.env.example
+#
+# Offsite target for `./backup.sh --offsite`. Copy this to `backup-offsite.env`
+# (which is gitignored) and fill in your S3-compatible storage details. Works with
+# Backblaze B2, DigitalOcean Spaces, AWS S3, Wasabi, MinIO, etc.
+#
+# A backup that lives only on the same host as the deployment is not disaster
+# recovery. `--offsite` pushes each completed backup to this bucket via rclone.
+
+BACKUP_S3_ENDPOINT=https://s3.us-west-000.backblazeb2.com
+BACKUP_S3_REGION=us-west-000
+BACKUP_S3_BUCKET=my-huly-backups
+BACKUP_S3_PATH_PREFIX=huly
+BACKUP_S3_ACCESS_KEY=your-access-key-id
+BACKUP_S3_SECRET_KEY=your-secret-key
+
+# Optional: rclone S3 provider (Other works for most; AWS, Minio, Wasabi, etc. also valid).
+# BACKUP_S3_PROVIDER=Other
+
+# Retention on the remote is best handled by a bucket lifecycle policy (expire objects
+# after N days), rather than by this script.
diff --git a/backup.sh b/backup.sh
index fdaef9f..e48d8ed 100755
--- a/backup.sh
+++ b/backup.sh
@@ -12,7 +12,7 @@
 # are rebuilt automatically and are not required to restore. Use --full to include
 # them.
 #
-# Usage: ./backup.sh [--output=DIR] [--keep=N] [--full] [--help]
+# Usage: ./backup.sh [--output=DIR] [--keep=N] [--full] [--offsite] [--help]
 
 set -euo pipefail
 
@@ -21,18 +21,21 @@ cd "$(dirname "$0")"
 OUTPUT_DIR="./backups"
 KEEP=0
 FULL=false
+OFFSITE=false
 
 for arg in "$@"; do
     case $arg in
         --output=*) OUTPUT_DIR="${arg#*=}" ;;
         --keep=*)   KEEP="${arg#*=}" ;;
         --full)     FULL=true ;;
+        --offsite)  OFFSITE=true ;;
         --help)
             echo "Usage: $0 [OPTIONS]"
             echo "Options:"
             echo "  --output=DIR  Directory to write backups into (default: ./backups)"
             echo "  --keep=N      Keep only the N most recent backups (default: keep all)"
             echo "  --full        Also back up the search index (elastic) and event log (redpanda)"
+            echo "  --offsite     After the local backup, upload it to S3-compatible storage (see backup-offsite.env.example)"
             echo "  --help        Show this help message"
             exit 0
             ;;
@@ -165,5 +168,32 @@ if [ "$KEEP" -gt 0 ]; then
     fi
 fi
 
+# Optional: push the completed backup to S3-compatible offsite storage via rclone.
+# A backup that lives only on the same host is not disaster recovery. Configure via
+# environment or a gitignored backup-offsite.env (see backup-offsite.env.example).
+if [ "$OFFSITE" = true ]; then
+    # shellcheck source=/dev/null
+    if [ -f ./backup-offsite.env ]; then . ./backup-offsite.env; fi
+    : "${BACKUP_S3_BUCKET:?--offsite needs BACKUP_S3_BUCKET (set env or create backup-offsite.env)}"
+    : "${BACKUP_S3_ACCESS_KEY:?--offsite needs BACKUP_S3_ACCESS_KEY}"
+    : "${BACKUP_S3_SECRET_KEY:?--offsite needs BACKUP_S3_SECRET_KEY}"
+    : "${BACKUP_S3_ENDPOINT:?--offsite needs BACKUP_S3_ENDPOINT}"
+    PREFIX="${BACKUP_S3_PATH_PREFIX:-huly}"
+    DEST_NAME=$(basename "$DEST")
+    echo "Uploading backup offsite to s3://${BACKUP_S3_BUCKET}/${PREFIX}/${DEST_NAME}/ ..."
+    if docker run --rm -v "$DEST_ABS":/data:ro \
+        -e RCLONE_S3_PROVIDER="${BACKUP_S3_PROVIDER:-Other}" \
+        -e RCLONE_S3_ENV_AUTH=false \
+        -e RCLONE_S3_ACCESS_KEY_ID="$BACKUP_S3_ACCESS_KEY" \
+        -e RCLONE_S3_SECRET_ACCESS_KEY="$BACKUP_S3_SECRET_KEY" \
+        -e RCLONE_S3_ENDPOINT="$BACKUP_S3_ENDPOINT" \
+        -e RCLONE_S3_REGION="${BACKUP_S3_REGION:-us-east-1}" \
+        rclone/rclone copy /data ":s3:${BACKUP_S3_BUCKET}/${PREFIX}/${DEST_NAME}/" -v; then
+        echo -e "\033[1;32mOffsite upload complete.\033[0m Set a bucket lifecycle policy for offsite retention."
+    else
+        echo -e "\033[31mWARNING: offsite upload failed - the local backup at $DEST is intact.\033[0m"
+    fi
+fi
+
 echo -e "\033[1;32mBackup complete: $DEST\033[0m"
 du -sh "$DEST" | awk '{print "Total size: " $1}'
diff --git a/guides/backup-restore.md b/guides/backup-restore.md
index 7d3bd6e..9b68a31 100644
--- a/guides/backup-restore.md
+++ b/guides/backup-restore.md
@@ -53,6 +53,21 @@ Copy that directory off the server (to object storage or another host) for real
 disaster recovery - a backup that lives only on the same disk as the deployment is
 not a backup.
 
+## Offsite copies (disaster recovery)
+
+A backup that lives only on the same host as the deployment is not disaster recovery: if
+the server is lost, so are the backups. Pass `--offsite` to also upload each backup to
+S3-compatible storage (Backblaze B2, DigitalOcean Spaces, AWS S3, Wasabi, MinIO, etc.):
+
+```bash
+cp backup-offsite.env.example backup-offsite.env   # then fill in your bucket + keys
+./backup.sh --offsite
+```
+
+`backup-offsite.env` is gitignored (it holds your keys). The upload runs `rclone` in a
+throwaway container, so there is nothing extra to install. For retention on the remote, set
+a bucket lifecycle policy (expire objects after N days) rather than pruning by hand.
+
 ## Restore
 
 > [!WARNING]