diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..cbceba8f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,37 @@ +# Build context is the repo root (combined frontend+backend image). Keep it +# small: only frontend/ and backend/ sources are needed, and their deps are +# installed inside the image. +**/node_modules +**/dist +**/.venv +**/.git +**/.DS_Store +**/*.log + +# Everything unrelated to the single-container build. +.git +.github +.venv +.husky +.vscode +.devcontainer +docs +backlog +experiment +google-docs-addon +prototype-uist +own-words +sandbox +scripts +__pycache__ +*.pyc + +# Backend runtime data must never be baked into the image (mounted at runtime). +backend/logs +backend/data +backend/.env + +# Frontend dev/test artifacts not needed for the production build. +frontend/playwright-report +frontend/test-results +frontend/tests diff --git a/.github/workflows/build-addin-image.yml b/.github/workflows/build-addin-image.yml new file mode 100644 index 00000000..87658c27 --- /dev/null +++ b/.github/workflows/build-addin-image.yml @@ -0,0 +1,72 @@ +name: Build add-in image + +# Builds the single-container production add-in image (Hono backend serving the +# built frontend + /api/*, from the repo-root Dockerfile) and pushes it to GHCR +# on every push to main, tagged with the commit SHA. CD (in the +# Infrastructure_k8s_* repo) pins a deploy by reading main's current SHA and +# selecting that image tag — same pattern as build-experiment-image.yml. +# +# This runs in parallel with the existing Jenkins docker-compose build during +# the deployment migration; either path produces a working image. +on: + push: + branches: + - main + paths: + - 'frontend/**' + - 'backend/**' + - 'Dockerfile' + - '.dockerignore' + - '.github/workflows/build-addin-image.yml' + workflow_dispatch: + +# Least privilege: read the repo to check it out, write packages to push to GHCR. +permissions: + contents: read + packages: write + +# One image per commit on main (CD may pin an intermediate SHA), so don't cancel +# in-progress builds for the same ref. +concurrency: + group: build-addin-${{ github.ref }} + cancel-in-progress: false + +jobs: + build: + name: Build and push to GHCR + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v7 + + - name: Log in to GHCR + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Buildx + uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 + + - name: Compute tags and labels + id: meta + uses: docker/metadata-action@80c7e94dd9b9319bd5eb7a0e0fe9291e23a2a2e9 # v6.1.0 + with: + images: ghcr.io/aitoolslab/writing-tools-addin + # Bare SHA tag is what CD pins to; latest is a convenience pointer. + tags: | + type=raw,value=${{ github.sha }} + type=raw,value=latest,enable={{is_default_branch}} + + - name: Build and push + uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0 + with: + # Repo root: the combined image needs both frontend/ and backend/. + context: . + file: Dockerfile + platforms: linux/amd64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..2f46dd3d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,42 @@ +# Single-container production image: the Hono backend serves both the built +# frontend (frontend/dist) and the /api/* routes. Build context is the repo +# ROOT (it needs both frontend/ and backend/). +# +# Local dev is unaffected — developers still run the Vite dev-server and +# `npm run dev` separately. This image is a deploy concern only. + +# 1) Build the frontend -> /frontend/dist (Vite). Includes the Google Docs +# sidebar bundle (dist/google-docs.bundle.js), same as the old frontend image. +FROM node:24-slim AS frontend +WORKDIR /frontend +COPY frontend/package.json frontend/package-lock.json ./ +RUN npm ci +COPY frontend/ ./ +RUN npm run build && npm run build:google-docs + +# 2) Build the backend -> /app/backend/dist (tsc). +FROM node:24-slim AS backend +WORKDIR /app/backend +COPY backend/package.json backend/package-lock.json* ./ +RUN npm install +COPY backend/tsconfig.json ./ +COPY backend/src ./src +RUN npm run build + +# 3) Runtime: Node serving the compiled backend + the frontend as static files. +FROM node:24-slim AS run +WORKDIR /app/backend +ENV NODE_ENV=production +# STATIC_ROOT (backend/src/static.ts) and LOG_DIR default to ./public and +# ./logs; compose sets LOG_DIR to the mounted data volume. +COPY backend/package.json backend/package-lock.json* ./ +RUN npm install --omit=dev +COPY --from=backend /app/backend/dist ./dist +# Frontend build output becomes the static root (STATIC_ROOT=./public). +COPY --from=frontend /frontend/dist ./public +# The Apps Script sidebar loads the Google Docs bundle by absolute URL at +# /gdocs/google-docs.bundle.js (its PROD_BASE points there), so expose it at +# that path in addition to the dist root. +COPY --from=frontend /frontend/dist/google-docs.bundle.js ./public/gdocs/google-docs.bundle.js +EXPOSE 5000 +CMD ["node", "dist/index.js"] diff --git a/backend/Dockerfile b/backend/Dockerfile deleted file mode 100644 index 7f16da96..00000000 --- a/backend/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -# Build stage -FROM node:24-slim AS build -WORKDIR /app/backend -COPY package.json package-lock.json* ./ -RUN npm install -COPY tsconfig.json ./ -COPY src ./src -RUN npm run build - -# Runtime stage -FROM node:24-slim AS run -WORKDIR /app/backend -ENV NODE_ENV=production -COPY package.json package-lock.json* ./ -RUN npm install --omit=dev -COPY --from=build /app/backend/dist ./dist - -# Study logs are written here; in deployment this path is a mounted volume. -RUN mkdir -p logs - -EXPOSE 5000 -CMD ["node", "dist/index.js"] diff --git a/backend/src/index.ts b/backend/src/index.ts index bffeedfd..02b19cb6 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -11,6 +11,7 @@ import { PORT, } from './config.js'; import { shutdownPosthog } from './posthog.js'; +import { serveFrontend } from './static.js'; if (!openaiApiKey()) { console.warn('OPENAI_API_KEY is not set; /api/openai/* requests will fail.'); @@ -66,6 +67,11 @@ if (auth && DEBUG) { } +// Serve the built frontend (production single-container deploy). Registered +// last so every /api/* route above — including the dynamic device/debug ones — +// takes precedence. No-op in local dev, where the static root doesn't exist. +serveFrontend(app); + const server = serve( { fetch: app.fetch, port: PORT, hostname: '0.0.0.0' }, (info) => console.log(`Backend listening on ${info.address}:${info.port}`), diff --git a/backend/src/static.ts b/backend/src/static.ts new file mode 100644 index 00000000..08b7b464 --- /dev/null +++ b/backend/src/static.ts @@ -0,0 +1,44 @@ +import { existsSync } from 'node:fs'; +import { serveStatic } from '@hono/node-server/serve-static'; +import type { Context, Hono } from 'hono'; + +// The built frontend (`frontend/dist`) is copied here in the production image +// (see the repo-root Dockerfile). `serveStatic` resolves this relative to the +// process cwd, which is /app/backend in the container. In local dev this +// directory doesn't exist and frontend serving is skipped entirely. +const STATIC_ROOT = process.env.STATIC_ROOT ?? './public'; + +// Vite emits content-hashed bundles and assets as `name-.` (the hash +// is 8 base64url chars: A-Za-z0-9_-), all under assets/. Those filenames change +// every build, so they're safe to cache forever. Note this is NOT webpack's +// `..` convention the old nginx.conf matched — that regex never matched a +// single Vite asset, silently downgrading them to the short cache below. +const HASHED = + /-[A-Za-z0-9_-]{8,}\.(js|css|png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|webp)$/; +const ASSET = /\.(js|css|png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|webp)$/; + +function setCacheHeaders(path: string, c: Context): void { + if (path.endsWith('.html') || path.endsWith('manifest.xml')) { + // CRITICAL correctness rule: HTML entry points reference hashed bundles + // by name, so a cached HTML pins clients to old bundles after a deploy. + // manifest.xml is fetched by Office and must stay fresh too. With hashed + // filenames this no-store is the *only* cache rule that affects + // correctness; the immutable/short rules below are pure optimization. + c.header('Cache-Control', 'no-store, must-revalidate'); + // Office silently refuses a manifest served as the wrong type. + if (path.endsWith('manifest.xml')) c.header('Content-Type', 'application/xml'); + } else if (HASHED.test(path)) { + c.header('Cache-Control', 'public, max-age=31536000, immutable'); + } else if (ASSET.test(path)) { + c.header('Cache-Control', 'public, max-age=3600, must-revalidate'); + } +} + +// Register static serving for the built frontend. Call this AFTER every /api/* +// route is registered so the API always wins; unmatched non-API GETs fall +// through to a 404 (the frontend is a multi-page app — there is no SPA +// index.html fallback). `serveStatic` serves index.html for `/`. +export function serveFrontend(app: Hono): void { + if (!existsSync(STATIC_ROOT)) return; + app.get('*', serveStatic({ root: STATIC_ROOT, onFound: setCacheHeaders })); +} diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index da48896c..23c92ca5 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -1,20 +1,15 @@ services: - frontend: - ports: - - "5001:80" - environment: - - NODE_ENV=production - + # Combined container takes over the old frontend public port (5001). backend: ports: - - "5000:5000" + - "5001:5000" environment: - PORT=5000 - DEBUG=True - OPENAI_API_KEY=${OPENAI_API_KEY} - LOG_SECRET=${LOG_SECRET:-} volumes: - - ./backend/logs:/app/backend/logs + # Single persistent volume: holds auth.db and logs/ (LOG_DIR points here). - ./backend/data:/app/backend/data experiment: diff --git a/docker-compose-prod.yml b/docker-compose-prod.yml index 0fa3642e..fe770ddd 100644 --- a/docker-compose-prod.yml +++ b/docker-compose-prod.yml @@ -1,12 +1,8 @@ services: - frontend: - ports: - - "19571:80" - environment: - - NODE_ENV=production - + # Combined container takes over the old frontend public port (19571). backend: - ports: [] + ports: + - "19571:5000" environment: - PORT=5000 - DEBUG=False @@ -14,8 +10,11 @@ services: - POSTHOG_PROJECT_TOKEN=${POSTHOG_PROJECT_TOKEN:-} - POSTHOG_HOST=${POSTHOG_HOST:-https://e.thoughtful-ai.com/} volumes: - - /opt/thoughtful/logs:/app/backend/logs - - /opt/thoughtful/auth:/app/backend/data + # Single persistent volume (auth.db + logs/). One-time host migration: + # mkdir -p /opt/thoughtful/data/logs + # mv /opt/thoughtful/auth/auth.db /opt/thoughtful/data/ + # mv /opt/thoughtful/logs/* /opt/thoughtful/data/logs/ + - /opt/thoughtful/data:/app/backend/data experiment: ports: diff --git a/docker-compose-staging.yml b/docker-compose-staging.yml index 93c491fb..e661a830 100644 --- a/docker-compose-staging.yml +++ b/docker-compose-staging.yml @@ -1,12 +1,8 @@ services: - frontend: - ports: - - "19573:80" - environment: - - NODE_ENV=production - + # Combined container takes over the old frontend public port (19573). backend: - ports: [] + ports: + - "19573:5000" environment: - PORT=5000 - DEBUG=False @@ -14,8 +10,11 @@ services: - POSTHOG_PROJECT_TOKEN=${POSTHOG_PROJECT_TOKEN:-} - POSTHOG_HOST=${POSTHOG_HOST:-https://e.thoughtful-ai.com/} volumes: - - /opt/thoughtful/staging-logs:/app/backend/logs - - /opt/thoughtful/staging-auth:/app/backend/data + # Single persistent volume (auth.db + logs/). One-time host migration: + # mkdir -p /opt/thoughtful/staging-data/logs + # mv /opt/thoughtful/staging-auth/auth.db /opt/thoughtful/staging-data/ + # mv /opt/thoughtful/staging-logs/* /opt/thoughtful/staging-data/logs/ + - /opt/thoughtful/staging-data:/app/backend/data experiment: ports: diff --git a/docker-compose.yml b/docker-compose.yml index af50db14..3b305256 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,18 +1,17 @@ services: - frontend: - build: - context: ./frontend - dockerfile: Dockerfile - depends_on: - - backend - restart: unless-stopped - + # Single public web container: the Hono backend serves both the built frontend + # (frontend/dist) and the /api/* routes. The old separate nginx `frontend` + # service is gone; the combined image is built from the repo-root Dockerfile. backend: build: - context: ./backend + context: . dockerfile: Dockerfile environment: - PORT=5000 + # Persist study logs and the auth SQLite DB under one mounted volume + # (/app/backend/data). auth.db already lives there; LOG_DIR moves logs + # under it too, so a single volume covers all persistent state. + - LOG_DIR=/app/backend/data/logs - OPENAI_API_KEY=${OPENAI_API_KEY} - LOG_SECRET=${LOG_SECRET} - POSTHOG_PROJECT_TOKEN=${POSTHOG_PROJECT_TOKEN:-} diff --git a/docs/single-container-consolidation.md b/docs/single-container-consolidation.md index 65e787c4..3a508dd1 100644 --- a/docs/single-container-consolidation.md +++ b/docs/single-container-consolidation.md @@ -1,248 +1,235 @@ -# Single-Container Consolidation Spec +# Single-Container Consolidation + +**Status:** implemented (branch `single-container-consolidation`). This document +describes the design as built and the one-time deploy migration it requires. **Goal:** collapse the two production containers (nginx serving `frontend/dist` + reverse-proxying `/api/`, and the Hono backend) into **one container** where the Hono server serves both the built frontend static files **and** the `/api/*` routes. -This is a deployment/topology change only. It does **not** change app behavior, the -API, the frontend source, or the local dev workflow. Write it on a branch layered on -`claude/backend-hono-migration-QE061` (the FastAPI→Hono migration), or after it merges. - -This document is the complete brief. Follow it precisely — the cache-header rules in -particular are easy to get subtly wrong and will pin users to stale builds if botched. +This is a deployment/topology change. App behaviour, the API, and the local dev +workflow are unchanged. The cache-header rules are easy to get subtly wrong, so §4 +explains exactly which rule actually matters. --- -## 1. Current state (before) +## 1. Before -- `frontend` container: nginx (`frontend/Dockerfile`, `frontend/nginx.conf`) serves - `frontend/dist` with tiered caching + SPA fallback, and proxies `location /api/` to - `http://backend:5000`. Public web entry. Exposed ports: prod `19571:80`, staging - `19573:80`, dev `5001:80`. -- `backend` container: Hono on Node (`backend/`), listens on `5000`. Not publicly - exposed in prod/staging (only reachable via nginx). -- `experiment` container: separate Next.js app. **Leave it completely untouched.** -- Orchestration: `docker-compose.yml` (base) + `docker-compose-{dev,staging,prod}.yml` - overrides, built/deployed by `Jenkinsfile`. +- `frontend` container: nginx (`frontend/Dockerfile`, `frontend/nginx.conf`) served + `frontend/dist` and proxied `location /api/` to `http://backend:5000`. Public web + entry. Ports: prod `19571:80`, staging `19573:80`, dev `5001:80`. +- `backend` container: Hono on Node, listens on `5000`. Not publicly exposed in + prod/staging (only reachable via nginx). +- `experiment` container: separate Next.js app. **Left completely untouched.** +- Two persistent volumes on the backend: study **logs** and the auth **SQLite DB**. +- Orchestration: `docker-compose.yml` + `docker-compose-{dev,staging,prod}.yml`, + built/deployed by `Jenkinsfile`; new GitHub Actions build runs in parallel (§9). -## 2. Target state (after) +## 2. After - **One** web container = the Hono server, serving `frontend/dist` + `/api/*`. It is the public entry point and takes over the frontend's external ports. - `frontend` service removed from compose. `frontend/Dockerfile` and `frontend/nginx.conf` deleted (frontend **source** stays — it's still built). +- **One** persistent volume per environment (§7). - `experiment` unchanged. -## 3. Frontend build facts you must rely on +## 3. Frontend build facts (Vite — `npm run build` in `frontend/`) + +The frontend migrated from webpack to **Vite** (`frontend/vite.config.ts`). The build +that the image relies on: + +- Output directory: **`frontend/dist`** (`build.outDir`). +- **Content-hashed bundles land under `dist/assets/`** as `name-.`, where + `` is **8 base64url chars** (`A-Za-z0-9_-`), e.g. `index-CFpNvIDr.js`, + `editor-C-cTUUZm.css`, `c1-BBI24REH.png`. This is **not** webpack's `name..ext` + convention — see the cache-rule warning in §4. +- HTML entry files emitted to `dist/` (fixed names, **not** hashed): `index.html`, + `taskpane.html`, `editor.html`, `logs.html`, `popup.html`, `commands.html`. The app + is a **multi-page app** (`appType: 'mpa'`) — there is **no** SPA index.html fallback. +- `publicDir: 'public'` is copied verbatim into `dist/`: `manifest.xml`, the public + site HTML (`privacypolicy.html`, `support.html`, `longDescription.html`, + `seniorProject2024.html`), `styles.css`, and all of `public/assets/*` (non-hashed + images like `logo.png`, `calvin-logo.webp`, `slides.svg`). +- `manifest.xml` is fetched by Office and must be served as `application/xml` (or + `text/xml`) and **not** cached. A `closeBundle` plugin in `vite.config.ts` rewrites + it for production (strips `-dev`, swaps the dev id/URL for the prod ones). +- **Google Docs bundle:** `npm run build:google-docs` + (`vite.google-docs.config.ts`) emits a single self-contained IIFE, + `dist/google-docs.bundle.js`, that the Apps Script sidebar loads by absolute URL. + The image serves it at **`/gdocs/google-docs.bundle.js`** (the sidebar's `PROD_BASE`) + as well as the dist root. + +> **Bug fixed during this work:** the google-docs lib build (run after `vite build` +> into the same `dist/` with `emptyOutDir: false`) defaulted to `publicDir: 'public'` +> and therefore re-copied the **raw** `public/manifest.xml` over the prod-transformed +> one — shipping a `localhost:3000` / `-dev` manifest. The old nginx image had this +> bug too. `vite.google-docs.config.ts` now sets `publicDir: false`. Verified: the +> built image serves the prod manifest (`app.thoughtful-ai.com`, prod id `…2508`). + +## 4. Cache-header rules (the one that matters) + +With content-hashed filenames, the **only correctness-critical rule is: never cache +HTML or `manifest.xml`.** If an HTML entry point is cached, clients keep referencing +old hashed bundles after a deploy and the app breaks. The immutable/short rules below +are pure performance — getting them slightly wrong cannot pin anyone to a stale build +(the filename changes every build). + +Implemented in `backend/src/static.ts` via `serveStatic`'s `onFound` hook: -From `frontend/webpack.config.js` (production: `npm run build` in `frontend/`): +| File class | Match | `Cache-Control` | +|---|---|---| +| **`*.html` and `manifest.xml`** | ends with `.html` / `manifest.xml` | `no-store, must-revalidate` (**critical**) | +| Content-hashed assets | `-<8+ base64url chars>.` (js/css/png/jpg/jpeg/gif/svg/ico/woff/woff2/ttf/webp) | `public, max-age=31536000, immutable` | +| Other static assets | same exts, no hash segment | `public, max-age=3600, must-revalidate` | -- Output directory: **`frontend/dist`** (webpack default; the old FastAPI server - served `../frontend/dist`). -- Hashed bundles: `[name].[contenthash].js` and `[name].[contenthash].css` - (contenthash is ~20 lowercase hex chars). Images/fonts: `assets/[name].[contenthash][ext]`. -- HTML entry files emitted to `dist/` (fixed names, **not** content-hashed): - `taskpane.html`, `editor.html`, `logs.html`, `popup.html`, `commands.html`. -- Copied verbatim into `dist/` via CopyWebpackPlugin: everything in `src/static/*` - (**including `index.html`**, plus `privacypolicy.html`, `support.html`, - `longDescription.html`, `seniorProject2024.html`), all of `assets/*`, and - `manifest.xml` (with prod string transforms applied). -- `manifest.xml` (Office add-in manifest) is fetched by Office and must be served with - `Content-Type: application/xml` (or `text/xml`) and **not** cached. +```ts +const HASHED = /-[A-Za-z0-9_-]{8,}\.(js|css|png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|webp)$/; +const ASSET = /\.(js|css|png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|webp)$/; +``` -## 4. Cache-header rules to replicate (THE critical part) +> The deleted `nginx.conf` matched hashes with `\.[a-f0-9]{8,}\.` — webpack's hex +> convention. That regex matched **zero** Vite assets, so every hashed bundle silently +> got the 1-hour rule instead of `immutable`. The new rule uses Vite's `-.` +> shape. (No `public/assets/*` filename matches it, so there are no false positives.) +> +> Compression: nginx had no `gzip on;`, so nothing to match — none added. Never +> compress the SSE response if compression is added later. -These mirror `frontend/nginx.conf` exactly. The nginx regex for "immutable" assets is: -`\.[a-f0-9]{8,}\.(js|css|png|jpg|jpeg|gif|ico|woff|woff2|ttf)$` +## 5. Serving logic (Hono) -| File class | Match | `Cache-Control` | -|---|---|---| -| Content-hashed assets | filename contains `.<8+ hex>.` then a static ext (js/css/png/jpg/jpeg/gif/ico/woff/woff2/ttf) | `public, max-age=31536000, immutable` | -| Other static assets (non-hashed js/css/img/fonts) | same exts but no hash segment | `public, max-age=3600, must-revalidate` | -| **All `*.html`** | `.html` | `no-store, must-revalidate` | -| `manifest.xml` | `manifest.xml` | `no-store, must-revalidate` | +`backend/src/static.ts` exports `serveFrontend(app)`. `backend/src/index.ts` calls it +**after every `/api/*` route is registered — including the dynamically-added +`/api/device` and `/api/debug/*` routes**, so the API always wins. Registration order: + +1. All `/api/*` routes (unchanged), incl. the dynamic device/debug ones in `index.ts`. +2. `serveFrontend(app)` → `app.get('*', serveStatic({ root: STATIC_ROOT, onFound }))`. + +Notes / gotchas (all verified against the running image): +- `STATIC_ROOT` defaults to `./public`, resolved against the container WORKDIR + `/app/backend`. `serveFrontend` is a **no-op when that directory is absent** (local + dev), so the backend stays quiet there. +- `serveStatic` serves `index.html` for `/`, sets correct `Content-Type` (incl. + `application/xml` for the manifest, also forced in `onFound`), and on a miss falls + through to a **404** — correct for an MPA (no SPA fallback). `/api/does-not-exist` + → 404, not index.html. +- The OpenAI SSE route returns the upstream `Response` body directly; static serving + (GET `*`, registered after) never wraps the POST proxy. Tokens stream live (no nginx + buffering in the path anymore). + +## 6. Combined Dockerfile (repo root) + +`Dockerfile` at the **repo root** (build context must be the root — the image needs +both `frontend/` and `backend/`). Three stages: + +1. `frontend` — `npm ci` + `npm run build && npm run build:google-docs` → `/frontend/dist`. +2. `backend` — `npm install` + `npm run build` (tsc) → `/app/backend/dist`. +3. `run` — `node:24-slim`, `npm install --omit=dev`, then copies: + - `--from=backend dist → ./dist` + - `--from=frontend /frontend/dist → ./public` (the static root) + - `--from=frontend …/google-docs.bundle.js → ./public/gdocs/google-docs.bundle.js` + +WORKDIR stays `/app/backend`. A root **`.dockerignore`** keeps the context small +(excludes `node_modules`, `.venv`, `experiment/`, `own-words/`, `backend/data`, +`backend/logs`, etc.). `EXPOSE 5000`; `CMD ["node", "dist/index.js"]`. The runtime +`PORT` defaults to **8000** in code, so compose **must** set `PORT=5000`. + +## 7. Persistent storage — single volume + +Both pieces of persistent state now live under **one** mounted directory, +`/app/backend/data`: +- `auth.db` — already at `/app/backend/data/auth.db` (fixed path in `auth.ts`). +- study logs — moved here via `LOG_DIR=/app/backend/data/logs` (set in compose base). + `logging.ts` already `mkdir -p`s the dir on first write and tolerates its absence on + read, so a fresh empty volume just works (verified). + +Compose volume mounts (each environment, single line): +- dev: `./backend/data:/app/backend/data` +- staging: `/opt/thoughtful/staging-data:/app/backend/data` +- prod: `/opt/thoughtful/data:/app/backend/data` + +**One-time host migration (staging/prod), before first deploy of this change:** + +```sh +# prod (staging: swap the paths for staging-data / staging-logs / staging-auth) +mkdir -p /opt/thoughtful/data/logs +mv /opt/thoughtful/auth/auth.db /opt/thoughtful/data/ +mv /opt/thoughtful/logs/* /opt/thoughtful/data/logs/ +``` -> nginx today only `no-store`s `index.html` and leaves the other entry HTMLs on default -> heuristic caching. That is a latent bug — `taskpane.html` etc. reference hashed -> bundles and must never be stale. **In the consolidated server, `no-store` ALL -> `*.html`.** This is intentionally stricter than nginx and is correct. +## 8. docker-compose changes -## 5. Serving logic (Hono) +- **`docker-compose.yml`**: deleted the `frontend` service; pointed `backend.build` at + the root `Dockerfile` (`context: .`); added `LOG_DIR=/app/backend/data/logs`. Kept + all existing env (OpenAI/PostHog/Better-Auth). `experiment` untouched. +- Overrides — move the old frontend public port onto `backend`, single data volume: + - dev: `backend.ports: ["5001:5000"]`, volume `./backend/data:/app/backend/data`. + - staging: `backend.ports: ["19573:5000"]`, volume `…/staging-data:/app/backend/data`. + - prod: `backend.ports: ["19571:5000"]`, volume `…/data:/app/backend/data`. +- `depends_on: backend` removed with the frontend service. -Order matters. Register in this exact order in `backend/src/app.ts` (or a new -`static.ts` mounted after the API routes): +## 9. CI/CD -1. **All existing `/api/*` routes first** (unchanged). -2. **Static file serving** for the built frontend, with the cache headers above. -3. **SPA fallback last**: for any GET that didn't match a file or an `/api` route, - return `index.html` (this is nginx's `try_files $uri $uri/ /index.html`). +- **GitHub Actions (new):** `.github/workflows/build-addin-image.yml` builds the + combined image from the root `Dockerfile` and pushes to + `ghcr.io/aitoolslab/writing-tools-addin`, SHA-tagged + `latest`, on push to `main` + (paths: `frontend/**`, `backend/**`, `Dockerfile`, `.dockerignore`, the workflow). + Mirrors `build-experiment-image.yml`; CD (Infrastructure_k8s_* repo) pins the SHA. +- **Jenkins (kept for now):** `Jenkinsfile` still runs `docker compose … build`/`up` + and keeps working with the consolidated compose — it builds the combined image and + brings up one fewer service. Runs **in parallel** with GHA during the deployment + migration; remove it once CD fully moves to the GHCR image. +- Other workflows unchanged: `add-in.yml` (lint), `frontend-tests.yml` (Vitest + + Playwright), `build-experiment-image.yml`. -Use `serveStatic` from `@hono/node-server/serve-static` (already the runtime; no new -dep). Set its `root` to wherever `dist` is copied in the image (see §6 — recommend -`./public` relative to the container WORKDIR `/app/backend`). Set cache headers via the -`onFound` hook, and add a final fallback handler: +## 10. Deleted files -```ts -import { serveStatic } from '@hono/node-server/serve-static'; -import { readFile } from 'node:fs/promises'; - -const STATIC_ROOT = process.env.STATIC_ROOT ?? './public'; // dist copied here - -const HASHED = /\.[a-f0-9]{8,}\.(js|css|png|jpg|jpeg|gif|ico|woff|woff2|ttf)$/; -const ASSET = /\.(js|css|png|jpg|jpeg|gif|ico|woff|woff2|ttf)$/; - -function setCacheHeaders(path: string, c: Context) { - if (path.endsWith('.html') || path.endsWith('manifest.xml')) { - c.header('Cache-Control', 'no-store, must-revalidate'); - } else if (HASHED.test(path)) { - c.header('Cache-Control', 'public, max-age=31536000, immutable'); - } else if (ASSET.test(path)) { - c.header('Cache-Control', 'public, max-age=3600, must-revalidate'); - } -} - -// AFTER all app.post('/api/...') / app.get('/api/...') routes: -app.get( - '*', - serveStatic({ root: STATIC_ROOT, onFound: (path, c) => setCacheHeaders(path, c) }), -); - -// SPA fallback: serve index.html for unmatched non-API GETs. -app.get('*', async (c) => { - if (c.req.path.startsWith('/api/')) return c.notFound(); - const html = await readFile(`${STATIC_ROOT}/index.html`, 'utf8'); - c.header('Cache-Control', 'no-store, must-revalidate'); - return c.html(html); -}); -``` +- `frontend/Dockerfile`, `frontend/nginx.conf` (no nginx in the path; Hono/Node does + not buffer the SSE response). +- `backend/Dockerfile` — orphaned by the consolidation. The root `Dockerfile`'s + `backend` stage replicates it, and compose's `backend.build` now points at the root + Dockerfile, so nothing referenced it anymore. + +## 11. Local dev is unchanged + +Developers still run `frontend/` `npm run dev-server` (Vite dev-server on :3000 with +its `/api` → backend proxy) and the backend's `npm run dev` (:8000). The single +container is a production/deploy concern only; do not wire the dev-server through it. + +## 12. Verification (run against the built image) + +Build & run locally (note `PORT=5000`): -Notes / gotchas: -- `serveStatic`'s `root` is resolved relative to `process.cwd()` (= `/app/backend` in - the container). Confirm the path; a wrong root silently 404s every asset. -- Verify `serveStatic` sets correct `Content-Type` for `.js`/`.css`/`.html`/`.xml`. If - `manifest.xml` comes back as `application/octet-stream`, set it explicitly in `onFound`. -- Keep the OpenAI SSE route untouched; serving static must not wrap or buffer it. -- Compression: nginx currently has **no `gzip on;`**, so there's nothing to match — - do **not** add compression to reach parity. (Optional later: `hono/compress` or a - webpack pre-compression plugin. Never compress the SSE response.) - -## 6. Combined Dockerfile - -The image must build the frontend (webpack) and the backend (tsc), then run Node with -`dist` available as static files. Because it needs both `frontend/` and `backend/`, the -build context must be the **repo root**. Create a root-level `Dockerfile`: - -```dockerfile -# 1) Build frontend -> /frontend/dist -FROM node:24-slim AS frontend -WORKDIR /frontend -COPY frontend/package.json frontend/package-lock.json* ./ -RUN npm install -COPY frontend/ ./ -RUN npm run build # outputs /frontend/dist - -# 2) Build backend -> /app/backend/dist -FROM node:24-slim AS backend -WORKDIR /app/backend -COPY backend/package.json backend/package-lock.json* ./ -RUN npm install -COPY backend/tsconfig.json ./ -COPY backend/src ./src -RUN npm run build - -# 3) Runtime -FROM node:24-slim AS run -WORKDIR /app/backend -ENV NODE_ENV=production -COPY backend/package.json backend/package-lock.json* ./ -RUN npm install --omit=dev -COPY --from=backend /app/backend/dist ./dist -COPY --from=frontend /frontend/dist ./public # static root (STATIC_ROOT=./public) -RUN mkdir -p logs # study-log volume mount target -EXPOSE 5000 -CMD ["node", "dist/index.js"] +```sh +docker build -t writing-tools-addin:test -f Dockerfile . +docker run --rm -p 5099:5000 -e PORT=5000 -e LOG_DIR=/app/backend/data/logs writing-tools-addin:test ``` -- Keep WORKDIR `/app/backend` so the existing log volume mount `/app/backend/logs` - stays valid (do not change the compose volume targets). -- If `frontend/webpack.config.js` needs HTTPS cert generation only for dev-server, the - production `npm run build` path must not require it (it uses `env.WEBPACK_BUILD`). - Confirm `npm run build` works headless in the image; if it tries to read dev certs, - pass `--env WEBPACK_BUILD` or adjust. - -## 7. docker-compose changes - -In `docker-compose.yml` (base): -- **Delete the `frontend` service.** -- Point the `backend` service build at the new root Dockerfile: - ```yaml - backend: - build: - context: . - dockerfile: Dockerfile - ``` -- Keep its env (`PORT=5000`, `OPENAI_API_KEY`, `LOG_SECRET`, `POSTHOG_*`). - -In the override files, move the public port from the old frontend service onto -`backend`, and drop the frontend block: -- `docker-compose-prod.yml`: `backend.ports: ["19571:5000"]` (was frontend `19571:80`). - Keep `volumes: /opt/thoughtful/logs:/app/backend/logs`. -- `docker-compose-staging.yml`: `backend.ports: ["19573:5000"]`. Keep - `/opt/thoughtful/staging-logs:/app/backend/logs`. -- `docker-compose-dev.yml`: `backend.ports: ["5001:5000"]` (replaces frontend `5001:80` - and the old `5000:5000`). Keep `./backend/logs:/app/backend/logs`. -- Remove `depends_on: backend` (the frontend service that had it is gone). -- Leave the `experiment` service and its ports/volumes exactly as-is. - -## 8. Files to delete - -- `frontend/Dockerfile` -- `frontend/nginx.conf` -(The `proxy_buffering off` SSE tweak in nginx.conf becomes irrelevant — there is no -nginx in the path anymore. Hono/Node does not buffer the streamed response.) - -## 9. Jenkins - -`Jenkinsfile` only runs `docker compose ... build` / `up`, so it keeps working with no -edits. Sanity-check: the combined image is bigger and the build now compiles the -frontend inside the backend image — confirm build time/host disk are acceptable. The -commented-out `Test`/`Lint` stages reference the old Python backend; ignore them. - -## 10. Local dev is unchanged - -Developers still run `frontend/`'s `npm run dev-server` (webpack dev-server on :3000 -with its `/api` → backend proxy) and the backend's `npm run dev` (:8000). The -single-container static serving is a production/deploy concern only. Do not wire the -dev-server through the container. - -## 11. Verification - -Build and run the combined image locally (compose dev), then: - -1. `curl -i http://localhost:5001/api/ping` → 200 `{ "timestamp": ... }`. -2. `curl -i http://localhost:5001/` → `index.html`, header - `Cache-Control: no-store, must-revalidate`. -3. `curl -i http://localhost:5001/taskpane.html` → 200, `no-store`. -4. `curl -i` a hashed bundle (grab a real filename from `dist`, e.g. - `/taskpane..js`) → `Cache-Control: public, max-age=31536000, immutable`, - correct `Content-Type: application/javascript`. -5. `curl -i http://localhost:5001/manifest.xml` → XML content-type, `no-store`. -6. `curl -i http://localhost:5001/some/spa/route` (no file) → returns `index.html` - (200), **not** a 404; and `/api/does-not-exist` returns 404, **not** index.html. -7. Exercise Draft/Revise/Chat against this single origin and confirm SSE tokens stream - live (no nginx now; the OpenAI proxy must still flush incrementally). -8. Load the add-in in Word against the deployed origin and confirm `manifest.xml`, - `taskpane.html`, and assets all load. - -## 12. Failure modes to watch - -- **Stale builds:** any `*.html` served with caching → users keep old hashed bundles - and the app breaks after a deploy. `no-store` all HTML. -- **Fallback shadowing the API:** if the `*` static/fallback is registered before the - `/api` routes, API calls return `index.html`. API routes must come first; the - fallback must exclude `/api/`. -- **Wrong static root:** `serveStatic({ root })` resolved against the wrong cwd → every - asset 404s. Verify against `/app/backend/public`. +Checked (all passing): +1. `GET /api/ping` → 200 `{ "timestamp": … }`. +2. `GET /` → `index.html`, `Cache-Control: no-store, must-revalidate`. +3. `GET /taskpane.html` → 200, `no-store`. +4. `GET /assets/-.js` → `public, max-age=31536000, immutable`, JS type. +5. `GET /manifest.xml` → `application/xml`, `no-store`, **prod-transformed** + (`app.thoughtful-ai.com`, id `…2508`, no `-dev`). +6. `GET /some/unknown/route` → **404** (MPA); `GET /api/does-not-exist` → 404. +7. `GET /gdocs/google-docs.bundle.js` → 200. +8. `POST /api/log` writes to `/app/backend/data/logs/.jsonl` (single volume). +9. Backend `npm test` (13 passing) and frontend `npm run test:build` green. + +Still to validate in a real deploy: SSE streaming end-to-end with a live +`OPENAI_API_KEY`, and loading the add-in in Word against the deployed origin. + +## 13. Failure modes to watch + +- **Stale builds:** any `*.html` served with caching → users keep old hashed bundles. + `no-store` all HTML + manifest. (The only correctness-critical rule.) +- **`PORT` unset:** the server defaults to 8000; compose must set `PORT=5000` or the + published port maps to nothing. +- **Fallback shadowing the API:** static `*` must register **after** all `/api/*` + routes, including the dynamic device/debug ones in `index.ts`. +- **Manifest clobbered by the google-docs build:** keep `publicDir: false` in + `vite.google-docs.config.ts` (see §3). +- **Forgotten host migration:** without §7's `mv`, the new single volume starts empty + — auth sessions and prior logs appear to vanish. - **manifest MIME:** Office silently refuses a manifest served as the wrong type. -- **Build context too narrow:** the backend image can't see `frontend/` unless the - build context is the repo root. diff --git a/frontend/Dockerfile b/frontend/Dockerfile deleted file mode 100644 index 8f2a9d0e..00000000 --- a/frontend/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -# Build stage -FROM node:24-slim AS build - -WORKDIR /app - -COPY package.json package-lock.json ./ - -RUN npm ci - -COPY . ./ - -RUN npm run build && npm run build:google-docs && ls -la - -# Production stage -FROM nginx:alpine - -COPY --from=build /app/dist /usr/share/nginx/html -# Google Docs sidebar bundle, served at /gdocs/google-docs.bundle.js so the -# Apps Script sidebar can load it via an absolute URL (it can't run a 1.5MB -# inline script). The sidebar's PROD_BASE points here. The Vite build emits -# it into dist/, so copy that single file into the /gdocs/ path. -COPY --from=build /app/dist/google-docs.bundle.js /usr/share/nginx/html/gdocs/google-docs.bundle.js - -COPY nginx.conf /etc/nginx/conf.d/default.conf - -EXPOSE 80 - -# Fails because backend doesn't exist at image-build time -#RUN ["nginx", "-t"] - -CMD ["nginx", "-g", "daemon off;"] diff --git a/frontend/nginx.conf b/frontend/nginx.conf deleted file mode 100644 index 2aecfb55..00000000 --- a/frontend/nginx.conf +++ /dev/null @@ -1,45 +0,0 @@ -server { - listen 80; - server_name localhost; - - - # Serve static assets with long cache lifetime ONLY if filename contains an 8+ hex hash (e.g., .[a-f0-9]{8,}.) - location ~* "\.[a-f0-9]{8,}\.(js|css|png|jpg|jpeg|gif|ico|woff|woff2|ttf)$" { - root /usr/share/nginx/html; - add_header Cache-Control "public, max-age=31536000, immutable"; - try_files $uri =404; - } - - # Serve other static assets with a short cache lifetime (1 hour) - location ~* "\.(js|css|png|jpg|jpeg|gif|ico|woff|woff2|ttf)$" { - root /usr/share/nginx/html; - add_header Cache-Control "public, max-age=3600, must-revalidate"; - try_files $uri =404; - } - - # Never cache index.html (or main HTML entrypoint) - location = /index.html { - root /usr/share/nginx/html; - add_header Cache-Control "no-store, must-revalidate"; - try_files $uri =404; - } - - # Fallback for all other routes (SPA) - location / { - root /usr/share/nginx/html; - try_files $uri $uri/ /index.html; - } - -location /api/ { - proxy_pass http://backend:5000; - proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection 'upgrade'; - proxy_set_header Host $host; - proxy_cache_bypass $http_upgrade; - - # Stream SSE (OpenAI proxy) through without buffering so tokens arrive live. - proxy_buffering off; - proxy_cache off; -} -} \ No newline at end of file diff --git a/frontend/vite.google-docs.config.ts b/frontend/vite.google-docs.config.ts index a8d910c3..9b8dadf4 100644 --- a/frontend/vite.google-docs.config.ts +++ b/frontend/vite.google-docs.config.ts @@ -16,6 +16,12 @@ export default defineConfig(({ mode }) => { return { plugins: [react()], + // This lib build runs after `vite build` into the same dist/ (emptyOutDir + // is false). Disable publicDir copying so it does NOT re-copy public/ over + // the main build's output — in particular the prod-transformed + // dist/manifest.xml, which would otherwise be clobbered with the raw dev + // manifest (localhost:3000 / -dev id). + publicDir: false, resolve: { alias: { '@': path.resolve(__dirname, './src')