Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 83 additions & 52 deletions pnpm-lock.yaml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions services/headroom-compress/.dev.vars.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
HEADROOM_BEARER_TOKEN=dev-headroom-token
51 changes: 51 additions & 0 deletions services/headroom-compress/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Headroom Compress Worker

Worker-gated Cloudflare Container deployment for Headroom compression only.

Public surface:

- `GET /readyz`
- `POST /v1/compress`

All other Headroom routes return `404` at Worker layer before container fetch.

Required secret:

```bash
pnpm exec wrangler secrets-store secret create HEADROOM_BEARER_TOKEN
```

Build and push pinned source image from a native amd64 builder:

```bash
pnpm run container:build
```

Current deployed fallback tag is `0.27.0-ghcr9f5f0de`, mirrored from the
published `v0.27.0` amd64 image digest because local arm64 Docker cannot build
Headroom's amd64 Rust extension under QEMU.

Deploy:

```bash
pnpm run deploy
```

Smoke test:

```bash
curl --fail https://headroom.kiloapps.io/readyz

curl --fail https://headroom.kiloapps.io/v1/compress \
-H "authorization: Bearer $HEADROOM_BEARER_TOKEN" \
-H "content-type: application/json" \
--data '{"model":"kilo/anthropic/claude-sonnet-4.6","messages":[{"role":"user","content":"hello"}],"config":{"compress_user_messages":true}}'
```

Benchmark compression:

```bash
pnpm run benchmark:compression -- --case logs --repeat 3
pnpm run benchmark:compression -- --list-cases
pnpm run benchmark:compression -- --fixture ./messages.json --json --output report.json
```
98 changes: 98 additions & 0 deletions services/headroom-compress/container-build-context/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# syntax=docker/dockerfile:1.7

ARG HEADROOM_REF=da1a3973ed79d89617087ec315e77fb82356c03b
ARG TARGETPLATFORM=linux/amd64
ARG PYTHON_VERSION=3.13
ARG PYTHON_SITE_PACKAGES=/usr/local/lib/python${PYTHON_VERSION}/site-packages

FROM --platform=${TARGETPLATFORM} alpine:3.22 AS source
ARG HEADROOM_REF
RUN apk add --no-cache ca-certificates curl tar
WORKDIR /src
RUN curl -fsSL "https://github.com/headroomlabs-ai/headroom/archive/${HEADROOM_REF}.tar.gz" \
| tar -xz --strip-components=1

FROM --platform=${TARGETPLATFORM} python:${PYTHON_VERSION}-slim AS builder

ARG PYTHON_SITE_PACKAGES

RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
curl \
g++ \
patchelf \
&& rm -rf /var/lib/apt/lists/*

ENV CARGO_HOME=/usr/local/cargo \
RUSTUP_HOME=/usr/local/rustup \
PATH=/usr/local/cargo/bin:${PATH}
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
| sh -s -- -y --no-modify-path --profile minimal -c rustfmt -c clippy --default-toolchain 1.95.0

WORKDIR /build
COPY --from=source /src/pyproject.toml /src/uv.lock /src/README.md ./
COPY --from=source /src/Cargo.toml /src/Cargo.lock /src/rust-toolchain.toml ./
COPY --from=source /src/crates/ crates/
COPY --from=source /src/headroom/ headroom/

ARG HEADROOM_EXTRAS=proxy,code
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cargo/registry \
--mount=type=cache,target=/build/target \
python -m pip install ".[${HEADROOM_EXTRAS}]"

RUN cd /tmp && python -c "from headroom._core import DiffCompressor, SmartCrusher; print(f'headroom core OK: {DiffCompressor.__name__}, {SmartCrusher.__name__}')"

RUN --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/build/target \
cargo build --release --locked --bin headroom-proxy && \
cp target/release/headroom-proxy /usr/local/bin/headroom-proxy

FROM --platform=${TARGETPLATFORM} python:${PYTHON_VERSION}-slim AS runtime

ARG RUNTIME_USER=nonroot
ARG RUNTIME_HOME=/home/nonroot
ARG PYTHON_SITE_PACKAGES

RUN apt-get update && \
apt-get install -y --no-install-recommends ca-certificates curl && \
rm -rf /var/lib/apt/lists/*

COPY --from=builder ${PYTHON_SITE_PACKAGES} ${PYTHON_SITE_PACKAGES}
COPY --from=builder /usr/local/bin/headroom /usr/local/bin/headroom
COPY --from=builder /usr/local/bin/headroom-proxy /usr/local/bin/headroom-proxy

RUN mkdir -p ${RUNTIME_HOME}/.cache/huggingface ${RUNTIME_HOME}/.headroom /data && \
if [ "$RUNTIME_USER" = "nonroot" ]; then \
groupadd --gid 1000 nonroot && \
useradd --uid 1000 --gid nonroot --create-home nonroot && \
chown -R nonroot:nonroot /data ${RUNTIME_HOME}; \
fi

USER ${RUNTIME_USER}
WORKDIR ${RUNTIME_HOME}

ENV HEADROOM_HOST=0.0.0.0 \
HEADROOM_PORT=8787 \
HEADROOM_IN_DOCKER=1 \
HF_HOME=${RUNTIME_HOME}/.cache/huggingface \
TRANSFORMERS_CACHE=${RUNTIME_HOME}/.cache/huggingface \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
OMP_NUM_THREADS=1

RUN HEADROOM_STATELESS=true \
HEADROOM_TELEMETRY=off \
HEADROOM_SKIP_UPSTREAM_CHECK=1 \
HEADROOM_MODEL_LIMITS='{"context_limits":{"kilo/anthropic/claude-sonnet-4.6":1000000}}' \
python -c "from headroom import compress; text=('alpha beta gamma delta epsilon zeta eta theta\\n' * 256); result=compress([{'role':'user','content':text}], model='kilo/anthropic/claude-sonnet-4.6', model_limit=1000000, compress_user_messages=True, protect_recent=0, min_tokens_to_compress=10); print(f'headroom warmup OK: {result.tokens_before}->{result.tokens_after}')"

EXPOSE 8787

HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
CMD ["curl", "--fail", "--silent", "http://127.0.0.1:8787/readyz"]

ENTRYPOINT ["headroom", "proxy"]
CMD ["--host", "0.0.0.0", "--port", "8787"]
15 changes: 15 additions & 0 deletions services/headroom-compress/container/HEADROOM_PIN.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Headroom Pin

- Upstream repo: `https://github.com/headroomlabs-ai/headroom`
- Commit: `da1a3973ed79d89617087ec315e77fb82356c03b`
- Version: `0.27.0`
- Preferred source-build image tag: `headroom-compress:0.27.0-da1a397`
- Deployed fallback image tag: `headroom-compress:0.27.0-ghcr9f5f0de`
- Deployed fallback source: `ghcr.io/chopratejas/headroom@sha256:9f5f0de34dbb4c2ba2b60ebba9bb2c28c9a07664629f3c1c0e9ea86cead62631`
- Cloudflare Registry image: `registry.cloudflare.com/e115e769bcdd4c3d66af59d3332cb394/headroom-compress:0.27.0-ghcr9f5f0de`
- Platform: `linux/amd64`

Build from `../container-build-context/Dockerfile`. Do not deploy `latest`.

On arm64 Docker Desktop, the pinned source build currently fails because amd64
Rust tooling segfaults under QEMU. Use a native amd64 builder for source builds.
23 changes: 23 additions & 0 deletions services/headroom-compress/container/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Headroom Container

Build pinned Cloudflare Container image from source. This requires a native amd64
builder because Headroom builds Rust extensions:

```bash
cd services/headroom-compress
pnpm run container:build
```

Confirm pushed image:

```bash
pnpm exec wrangler containers images list --filter headroom-compress --json
```

Deploy only the pinned tag referenced in `wrangler.jsonc`. Record returned digest in release notes before deploy.

Current fallback image was mirrored from:

```text
ghcr.io/chopratejas/headroom@sha256:9f5f0de34dbb4c2ba2b60ebba9bb2c28c9a07664629f3c1c0e9ea86cead62631
```
30 changes: 30 additions & 0 deletions services/headroom-compress/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"name": "headroom-compress",
"version": "1.0.0",
"private": true,
"type": "module",
"scripts": {
"dev": "wrangler dev",
"deploy": "wrangler deploy",
"tail": "wrangler tail",
"types": "wrangler types --include-runtime=false --env-file /dev/null",
"typecheck": "tsgo --noEmit",
"lint": "pnpm -w exec oxlint --config .oxlintrc.json services/headroom-compress/src",
"test": "vitest run",
"container:build": "scripts/build-headroom-image.sh",
"benchmark:compression": "node scripts/benchmark-compression.mjs"
},
"dependencies": {
"@cloudflare/containers": "0.3.7",
"@kilocode/worker-utils": "workspace:*",
"hono": "catalog:"
},
"devDependencies": {
"@cloudflare/workers-types": "catalog:",
"@types/node": "catalog:",
"@typescript/native-preview": "catalog:",
"typescript": "catalog:",
"vitest": "catalog:",
"wrangler": "catalog:"
}
}
Loading
Loading