diff --git a/.env.example b/.env.example index fcd8091..365bcaa 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,8 @@ # Database configuration DB_DRIVER=postgres +# ⚠️ SECURITY WARNING: sslmode=disable is for DEVELOPMENT ONLY +# Production MUST use sslmode=require or sslmode=verify-full +# See docs/operations/security-hardening.md for guidance DB_CONNECTION_STRING=postgres://user:password@localhost:5432/mydb?sslmode=disable DB_MAX_OPEN_CONNECTIONS=25 DB_MAX_IDLE_CONNECTIONS=5 @@ -20,9 +23,29 @@ METRICS_NAMESPACE=secrets # Generate a new master key using: ./bin/app create-master-key # Each key must be exactly 32 bytes (256 bits), base64-encoded # Format: id1:base64key1,id2:base64key2 (comma-separated for multiple keys) +# ⚠️ SECURITY WARNING: Store master keys in secrets manager in production +# Never commit master keys to source control MASTER_KEYS=default:bEu+O/9NOFAsWf1dhVB9aprmumKhhBcE6o7UPVmI43Y= ACTIVE_MASTER_KEY_ID=default +# Authentication configuration +# Token expiration in seconds (default: 14400 = 4 hours) +# Migration note: Prior to v0.5.0, default was 86400 (24 hours) +AUTH_TOKEN_EXPIRATION_SECONDS=14400 + +# Rate limiting configuration +# Protects against abuse and denial-of-service attacks +RATE_LIMIT_ENABLED=true +RATE_LIMIT_REQUESTS_PER_SEC=10.0 +RATE_LIMIT_BURST=20 + +# CORS configuration +# ⚠️ SECURITY WARNING: CORS is disabled by default for server-to-server API +# Enable only if browser-based access is required +# Never use "*" for CORS_ALLOW_ORIGINS in production +CORS_ENABLED=false +CORS_ALLOW_ORIGINS= + # Worker configuration WORKER_INTERVAL=5 WORKER_BATCH_SIZE=10 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b73f937..7c3becd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -90,6 +90,14 @@ jobs: exit 1 fi + - name: Release docs link guard (PRs) + if: github.event_name == 'pull_request' + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} + PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} + PR_HEAD_SHA: ${{ github.sha }} + run: python3 docs/tools/check_release_docs_links.py + - name: Markdown lint uses: DavidAnson/markdownlint-cli2-action@v20 with: @@ -103,7 +111,22 @@ jobs: run: python3 docs/tools/check_example_shapes.py - name: Docs metadata checks - run: python3 docs/tools/check_docs_metadata.py + run: | + set -euo pipefail + + if [ "${{ github.event_name }}" = "pull_request" ]; then + export DOCS_CHANGED_FILES="$(git diff --name-only "${{ github.event.pull_request.base.sha }}" "${{ github.sha }}")" + else + export DOCS_CHANGED_FILES="" + fi + + python3 docs/tools/check_docs_metadata.py + + - name: OpenAPI validation + run: | + set -euo pipefail + python3 -m pip install --disable-pip-version-check --no-cache-dir openapi-spec-validator==0.7.1 + python3 -m openapi_spec_validator docs/openapi.yaml - name: Markdown link check (offline) uses: lycheeverse/lychee-action@v2 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..5b3d515 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,124 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.5.0] - 2026-02-19 + +### Added +- Per-client rate limiting for authenticated endpoints (default: 10 req/sec, burst 20) +- Configurable CORS support (disabled by default) +- Comprehensive security hardening documentation (`docs/operations/security-hardening.md`) +- Rate limiting configuration via `RATE_LIMIT_ENABLED`, `RATE_LIMIT_REQUESTS_PER_SEC`, `RATE_LIMIT_BURST` +- CORS configuration via `CORS_ENABLED`, `CORS_ALLOW_ORIGINS` + +### Changed +- **BREAKING**: Default token expiration reduced from 24 hours to 4 hours (86400 → 14400 seconds) +- Updated environment variables documentation with security warnings +- Updated production deployment guide with security hardening reference + +### Migration Notes + +**Token Expiration Change:** +If you rely on the previous default token expiration of 24 hours, explicitly set `AUTH_TOKEN_EXPIRATION_SECONDS=86400` in your environment configuration. Otherwise, tokens will now expire after 4 hours by default. + +**Review Client Token Refresh Logic:** +Ensure your client applications handle token refresh before expiration. The shorter default expiration improves security but may require updating client-side token refresh logic if you were relying on the previous 24-hour default. + +**Database SSL/TLS:** +If you are using `sslmode=disable` (PostgreSQL) or `tls=false` (MySQL) in production, this is insecure. Update your `DB_CONNECTION_STRING` to use `sslmode=require` or `sslmode=verify-full` (PostgreSQL) or `tls=true` or `tls=custom` (MySQL). See `docs/operations/security-hardening.md` for guidance. + +### Security +- Added database SSL/TLS configuration warnings in documentation +- Added reverse proxy TLS requirements in documentation +- Added master key storage security guidance +- Added metrics endpoint protection recommendations + +### Documentation +- Added `docs/operations/security-hardening.md` with comprehensive security guidance +- Updated `docs/configuration/environment-variables.md` with new variables and security warnings +- Updated `.env.example` with security warnings for development-only configurations +- Updated `docs/getting-started/docker.md` and `docs/getting-started/local-development.md` with security warnings +- Updated `docs/concepts/security-model.md` with production recommendations +- Updated `README.md` with security hardening link + +## [0.4.1] - 2026-02-19 + +### Fixed +- Policy matcher now supports mid-path wildcard patterns (e.g., `/v1/transit/keys/*/rotate`) +- Mid-path `*` wildcard now matches exactly one path segment +- Trailing wildcard `/*` behavior remains greedy for nested subpaths + +### Documentation +- Added policy path-matching behavior documentation +- Added policy migration examples for wildcard patterns +- Added policy review checklist for operators + +## [0.4.0] - 2026-02-18 + +### Added +- Tokenization API for token generation, detokenization, validation, and revocation +- Tokenization key management (create, rotate, delete) +- Deterministic and non-deterministic tokenization support +- Token TTL and revocation capabilities +- Token metadata support (non-encrypted) +- CLI commands for tokenization key management +- Expired token cleanup command (`clean-expired-tokens`) + +### Documentation +- Added `docs/api/tokenization.md` with API reference +- Added tokenization examples in curl, Python, JavaScript, and Go +- Added tokenization monitoring and operations guidance +- Added tokenization migration verification guide + +## [0.3.0] - 2026-02-16 + +### Added +- OpenTelemetry metrics collection with Prometheus-compatible `/metrics` endpoint +- Configurable metrics namespace via `METRICS_NAMESPACE` +- Metrics enable/disable toggle via `METRICS_ENABLED` +- HTTP request metrics (total requests, duration, status codes) +- Cryptographic operation metrics (secret operations, transit operations, audit log operations) + +### Documentation +- Added `docs/operations/monitoring.md` with Prometheus and Grafana quickstart +- Added metrics naming contract and endpoint documentation +- Added production hardening guidance for securing `/metrics` endpoint + +## [0.2.0] - 2026-02-14 + +### Added +- Audit log retention cleanup command (`clean-audit-logs`) +- Dry-run mode for audit log cleanup +- JSON and text output formats for cleanup commands + +### Documentation +- Added audit log retention cleanup runbook +- Added CLI reference documentation +- Updated production operations guide with retention workflows + +## [0.1.0] - 2026-02-14 + +### Added +- Envelope encryption with Master Key → KEK → DEK → Data hierarchy +- Transit encryption API (encrypt/decrypt as a service) +- Token-based authentication and capability-based authorization +- Versioned secrets storage by path +- Audit logging with request correlation +- Support for PostgreSQL and MySQL databases +- Support for AES-GCM and ChaCha20-Poly1305 encryption algorithms +- Health and readiness endpoints +- Client management API (create, get, update, delete) +- Master key and KEK management CLI commands +- Docker image distribution + +### Documentation +- Initial documentation structure +- API reference documentation +- Getting started guides (Docker and local development) +- Operations guides (production deployment, key management) +- Example code (curl, Python, JavaScript, Go) +- Security model documentation +- Architecture documentation diff --git a/README.md b/README.md index 1ffba33..483a61b 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Secrets is inspired by **HashiCorp Vault** ❤️, but it is intentionally **muc The default way to run Secrets is the published Docker image: ```bash -docker pull allisson/secrets:v0.4.1 +docker pull allisson/secrets:v0.5.0 ``` Use pinned tags for reproducible setups. `latest` is also available for fast iteration. @@ -29,12 +29,15 @@ Then follow the Docker setup guide in [docs/getting-started/docker.md](docs/gett 1. 🐳 **Run with Docker image (recommended)**: [docs/getting-started/docker.md](docs/getting-started/docker.md) 2. 💻 **Run locally for development**: [docs/getting-started/local-development.md](docs/getting-started/local-development.md) -## 🆕 What's New in v0.4.1 +## 🆕 What's New in v0.5.0 -- 🐛 Fixed policy path matching for authorization with mid-path wildcards (for example `/v1/transit/keys/*/rotate`) -- ✅ Added stronger policy-matching coverage for wildcard edge cases and common role templates -- 📘 Added bugfix release notes: [docs/releases/v0.4.1.md](docs/releases/v0.4.1.md) -- 📦 Updated pinned Docker docs/examples to `allisson/secrets:v0.4.1` +- 🛡️ Added per-client rate limiting for authenticated API endpoints +- 🌐 Added configurable CORS support (disabled by default) +- ⏱️ Changed default token expiration from 24h to 4h for stronger security +- 🔐 Added comprehensive security hardening guide: [docs/operations/security-hardening.md](docs/operations/security-hardening.md) +- 📘 Added release notes: [docs/releases/v0.5.0.md](docs/releases/v0.5.0.md) +- ⬆️ Added upgrade guide: [docs/releases/v0.5.0-upgrade.md](docs/releases/v0.5.0-upgrade.md) +- 📦 Updated pinned Docker docs/examples to `allisson/secrets:v0.5.0` ## 📚 Docs Map @@ -45,7 +48,9 @@ Then follow the Docker setup guide in [docs/getting-started/docker.md](docs/gett - 🧰 **Troubleshooting**: [docs/getting-started/troubleshooting.md](docs/getting-started/troubleshooting.md) - ✅ **Smoke test script**: [docs/getting-started/smoke-test.md](docs/getting-started/smoke-test.md) - 🧪 **CLI commands reference**: [docs/cli/commands.md](docs/cli/commands.md) -- 🚀 **v0.4.1 release notes**: [docs/releases/v0.4.1.md](docs/releases/v0.4.1.md) +- 🚀 **v0.5.0 release notes**: [docs/releases/v0.5.0.md](docs/releases/v0.5.0.md) +- ⬆️ **v0.5.0 upgrade guide**: [docs/releases/v0.5.0-upgrade.md](docs/releases/v0.5.0-upgrade.md) +- 🔁 **Release compatibility matrix**: [docs/releases/compatibility-matrix.md](docs/releases/compatibility-matrix.md) - **By Topic** - ⚙️ **Environment variables**: [docs/configuration/environment-variables.md](docs/configuration/environment-variables.md) @@ -53,18 +58,28 @@ Then follow the Docker setup guide in [docs/getting-started/docker.md](docs/gett - 🔒 **Security model**: [docs/concepts/security-model.md](docs/concepts/security-model.md) - 📘 **Glossary**: [docs/concepts/glossary.md](docs/concepts/glossary.md) - 🔑 **Key management operations**: [docs/operations/key-management.md](docs/operations/key-management.md) +- 🔐 **Security hardening**: [docs/operations/security-hardening.md](docs/operations/security-hardening.md) - 📊 **Monitoring and metrics**: [docs/operations/monitoring.md](docs/operations/monitoring.md) +- 🧯 **Operator drills**: [docs/operations/operator-drills.md](docs/operations/operator-drills.md) +- 🚀 **Production rollout golden path**: [docs/operations/production-rollout.md](docs/operations/production-rollout.md) - 🚑 **Failure playbooks**: [docs/operations/failure-playbooks.md](docs/operations/failure-playbooks.md) - 🏭 **Production deployment**: [docs/operations/production.md](docs/operations/production.md) - 🛠️ **Development and testing**: [docs/development/testing.md](docs/development/testing.md) +- 🗺️ **Docs architecture map**: [docs/development/docs-architecture-map.md](docs/development/docs-architecture-map.md) - 🤝 **Docs contributing**: [docs/contributing.md](docs/contributing.md) - 🗒️ **Docs changelog**: [docs/CHANGELOG.md](docs/CHANGELOG.md) +Release note location: + +- Project release notes are in [CHANGELOG.md](CHANGELOG.md) +- Documentation process/history notes are in [docs/CHANGELOG.md](docs/CHANGELOG.md) + - **API Reference** - 🔐 **Auth API**: [docs/api/authentication.md](docs/api/authentication.md) - 👤 **Clients API**: [docs/api/clients.md](docs/api/clients.md) - 📘 **Policy cookbook**: [docs/api/policies.md](docs/api/policies.md) - 🗂️ **Capability matrix**: [docs/api/capability-matrix.md](docs/api/capability-matrix.md) +- 🚨 **Error decision matrix**: [docs/api/error-decision-matrix.md](docs/api/error-decision-matrix.md) - 📦 **Secrets API**: [docs/api/secrets.md](docs/api/secrets.md) - 🚄 **Transit API**: [docs/api/transit.md](docs/api/transit.md) - 🎫 **Tokenization API**: [docs/api/tokenization.md](docs/api/tokenization.md) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 1371e18..4fbf842 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -2,6 +2,40 @@ > Last updated: 2026-02-19 +## 2026-02-19 (docs v10 - v0.5.0 security hardening release prep) + +- Added comprehensive security hardening guide: `docs/operations/security-hardening.md` +- Updated docs metadata source (`docs/metadata.json`) to `current_release: v0.5.0` +- Added release notes page: `docs/releases/v0.5.0.md` and promoted it as current in docs indexes +- Updated environment variables documentation with rate limiting and CORS configuration +- Added security warnings for database SSL/TLS requirements (production vs development) +- Added migration note for token expiration default change (24h → 4h) +- Updated `.env.example` with new configuration options and security warnings +- Added security warnings to Docker and local development getting-started guides +- Updated production deployment guide with security hardening reference +- Updated security model with comprehensive production recommendations +- Added security hardening link to root README and docs indexes +- Updated current-release references from v0.4.1 to v0.5.0 while preserving historical links +- Added upgrade guide: `docs/releases/v0.5.0-upgrade.md` +- Added API rate limiting reference: `docs/api/rate-limiting.md` +- Updated API endpoint docs with `429` behavior and rate-limiting cross-links +- Expanded troubleshooting with `429` and CORS/preflight diagnostics +- Added retry/backoff examples for `429` handling in curl, Python, JavaScript, and Go example docs +- Added rate-limiting production presets in environment variables documentation +- Added docs release checklist: `docs/development/docs-release-checklist.md` +- Added OpenAPI validation step in CI workflow +- Added production rollout golden path runbook: `docs/operations/production-rollout.md` +- Added API error decision matrix: `docs/api/error-decision-matrix.md` +- Added release compatibility matrix: `docs/releases/compatibility-matrix.md` +- Added persona-oriented policy templates and references in `docs/api/policies.md` +- Expanded monitoring guide with rate-limit Prometheus queries and alert examples +- Added CORS smoke checks (copy/paste) to troubleshooting guide +- Added quarterly operator drills runbook: `docs/operations/operator-drills.md` +- Added dashboard artifact templates under `docs/operations/dashboards/` +- Added docs architecture map: `docs/development/docs-architecture-map.md` +- Added release docs CI guard: `docs/tools/check_release_docs_links.py` + workflow integration +- Expanded policy smoke tests with pre-deploy automation wrapper pattern + ## 2026-02-19 (docs v9 - v0.4.1 bugfix release prep) - Added release notes page: `docs/releases/v0.4.1.md` and promoted it as current in docs indexes diff --git a/docs/README.md b/docs/README.md index e57fbc3..d8b68f9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -18,8 +18,9 @@ Welcome to the full documentation for Secrets. Pick a path and dive in 🚀 1. Start with Docker guide: [getting-started/docker.md](getting-started/docker.md) 2. Validate end-to-end setup: [getting-started/smoke-test.md](getting-started/smoke-test.md) -3. Apply production hardening checklist: [operations/production.md](operations/production.md) -4. Use runbook hub for rollout and incidents: [operations/runbook-index.md](operations/runbook-index.md) +3. Follow rollout runbook: [operations/production-rollout.md](operations/production-rollout.md) +4. Apply production hardening checklist: [operations/production.md](operations/production.md) +5. Use runbook hub for rollout and incidents: [operations/runbook-index.md](operations/runbook-index.md) ## 📖 Documentation by Topic @@ -28,12 +29,16 @@ Welcome to the full documentation for Secrets. Pick a path and dive in 🚀 - 🔒 [concepts/security-model.md](concepts/security-model.md) - 📘 [concepts/glossary.md](concepts/glossary.md) - 🔑 [operations/key-management.md](operations/key-management.md) +- 🚀 [operations/production-rollout.md](operations/production-rollout.md) - 📊 [operations/monitoring.md](operations/monitoring.md) +- 🧯 [operations/operator-drills.md](operations/operator-drills.md) - 🏭 [operations/production.md](operations/production.md) - 🚑 [operations/failure-playbooks.md](operations/failure-playbooks.md) - 🧪 [operations/policy-smoke-tests.md](operations/policy-smoke-tests.md) - 🧭 [operations/runbook-index.md](operations/runbook-index.md) - 🛠️ [development/testing.md](development/testing.md) +- 🧾 [development/docs-release-checklist.md](development/docs-release-checklist.md) +- 🗺️ [development/docs-architecture-map.md](development/docs-architecture-map.md) - 🤝 [contributing.md](contributing.md) - 🗒️ [CHANGELOG.md](CHANGELOG.md) @@ -53,23 +58,28 @@ Welcome to the full documentation for Secrets. Pick a path and dive in 🚀 - 👤 [api/clients.md](api/clients.md) - 📘 [api/policies.md](api/policies.md) - 🗂️ [api/capability-matrix.md](api/capability-matrix.md) +- 🚨 [api/error-decision-matrix.md](api/error-decision-matrix.md) - 📦 [api/secrets.md](api/secrets.md) - 🚄 [api/transit.md](api/transit.md) - 🎫 [api/tokenization.md](api/tokenization.md) - 📜 [api/audit-logs.md](api/audit-logs.md) +- 🚦 [api/rate-limiting.md](api/rate-limiting.md) - 🧱 [api/response-shapes.md](api/response-shapes.md) - 🧩 [api/versioning-policy.md](api/versioning-policy.md) - 📄 [openapi.yaml](openapi.yaml) OpenAPI scope note: -- `openapi.yaml` is a baseline subset for common API flows in `v0.4.1` +- `openapi.yaml` is a baseline subset for common API flows in `v0.5.0` - Full endpoint behavior is documented in the endpoint pages under `docs/api/` -- Tokenization endpoints are included in `openapi.yaml` for `v0.4.1` +- Tokenization endpoints are included in `openapi.yaml` for `v0.5.0` ## 🚀 Releases -- 📦 [releases/v0.4.1.md](releases/v0.4.1.md) +- 📦 [releases/v0.5.0.md](releases/v0.5.0.md) +- ⬆️ [releases/v0.5.0-upgrade.md](releases/v0.5.0-upgrade.md) +- 🔁 [releases/compatibility-matrix.md](releases/compatibility-matrix.md) +- 📦 [releases/v0.4.1.md](releases/v0.4.1.md) (historical) - 📦 [releases/v0.4.0.md](releases/v0.4.0.md) (historical) - 📦 [releases/v0.3.0.md](releases/v0.3.0.md) (historical) - 📦 [releases/v0.2.0.md](releases/v0.2.0.md) (historical) diff --git a/docs/api/audit-logs.md b/docs/api/audit-logs.md index 43a834b..43d4f55 100644 --- a/docs/api/audit-logs.md +++ b/docs/api/audit-logs.md @@ -26,7 +26,7 @@ Capability reference: | Endpoint | Success | Common error statuses | | --- | --- | --- | -| `GET /v1/audit-logs` | `200` | `401`, `403`, `422` | +| `GET /v1/audit-logs` | `200` | `401`, `403`, `422`, `429` | Query parameters: @@ -99,6 +99,7 @@ Example: - `401 Unauthorized`: missing/invalid bearer token - `403 Forbidden`: caller lacks `read` capability for `/v1/audit-logs` - `422 Unprocessable Entity`: invalid query values (offset/limit/timestamps) +- `429 Too Many Requests`: per-client rate limit exceeded ## Error Payload Examples @@ -172,6 +173,8 @@ curl -s "http://localhost:8080/v1/audit-logs?limit=100" \ ## See also - [Authentication API](authentication.md) +- [API error decision matrix](error-decision-matrix.md) +- [API rate limiting](rate-limiting.md) - [Clients API](clients.md) - [Policies cookbook](policies.md) - [Route shape vs policy shape](policies.md#route-shape-vs-policy-shape) diff --git a/docs/api/authentication.md b/docs/api/authentication.md index dcccfed..2334008 100644 --- a/docs/api/authentication.md +++ b/docs/api/authentication.md @@ -68,6 +68,11 @@ Expected result: token request returns `201 Created`, authenticated clients requ - `403 Forbidden`: inactive client - `422 Unprocessable Entity`: malformed request +Rate limiting note: + +- `POST /v1/token` is not rate-limited by application middleware +- Protected endpoints called with issued tokens can return `429 Too Many Requests` + ## Error Payload Examples Representative error payloads (exact messages may vary): @@ -117,6 +122,8 @@ Representative error payloads (exact messages may vary): ## See also - [Clients API](clients.md) +- [API error decision matrix](error-decision-matrix.md) +- [API rate limiting](rate-limiting.md) - [Policies cookbook](policies.md) - [Capability matrix](capability-matrix.md) - [Audit logs API](audit-logs.md) diff --git a/docs/api/capability-matrix.md b/docs/api/capability-matrix.md index b894e41..c6f50b9 100644 --- a/docs/api/capability-matrix.md +++ b/docs/api/capability-matrix.md @@ -61,6 +61,7 @@ For complete matcher semantics and unsupported forms, see [Policies cookbook](po ## See also - [Policies cookbook](policies.md) +- [API error decision matrix](error-decision-matrix.md) - [Authentication API](authentication.md) - [Clients API](clients.md) - [Secrets API](secrets.md) diff --git a/docs/api/clients.md b/docs/api/clients.md index 0ea2578..af6b7dc 100644 --- a/docs/api/clients.md +++ b/docs/api/clients.md @@ -33,11 +33,11 @@ Capability mapping: | Endpoint | Success | Common error statuses | | --- | --- | --- | -| `POST /v1/clients` | `201` | `401`, `403`, `409`, `422` | -| `GET /v1/clients` | `200` | `401`, `403`, `422` | -| `GET /v1/clients/:id` | `200` | `401`, `403`, `404`, `422` | -| `PUT /v1/clients/:id` | `200` | `401`, `403`, `404`, `409`, `422` | -| `DELETE /v1/clients/:id` | `204` | `401`, `403`, `404`, `422` | +| `POST /v1/clients` | `201` | `401`, `403`, `409`, `422`, `429` | +| `GET /v1/clients` | `200` | `401`, `403`, `422`, `429` | +| `GET /v1/clients/:id` | `200` | `401`, `403`, `404`, `422`, `429` | +| `PUT /v1/clients/:id` | `200` | `401`, `403`, `404`, `409`, `422`, `429` | +| `DELETE /v1/clients/:id` | `204` | `401`, `403`, `404`, `422`, `429` | ## Create Client @@ -75,6 +75,7 @@ Example success status: `200 OK`. - `404 Not Found`: client ID not found - `409 Conflict`: unique constraint conflicts - `422 Unprocessable Entity`: invalid request/query payload +- `429 Too Many Requests`: per-client rate limit exceeded ## Error Payload Examples @@ -146,6 +147,8 @@ Expected result: create returns `201 Created` with one-time `secret`; list retur ## See also - [Authentication API](authentication.md) +- [API error decision matrix](error-decision-matrix.md) +- [API rate limiting](rate-limiting.md) - [Policies cookbook](policies.md) - [Capability matrix](capability-matrix.md) - [Audit logs API](audit-logs.md) diff --git a/docs/api/error-decision-matrix.md b/docs/api/error-decision-matrix.md new file mode 100644 index 0000000..3081bdc --- /dev/null +++ b/docs/api/error-decision-matrix.md @@ -0,0 +1,40 @@ +# 🚨 API Error Decision Matrix + +> Last updated: 2026-02-19 +> Applies to: API v1 + +Use this matrix to triage API failures quickly and choose the next action. + +## Decision Matrix + +| Status | Meaning | Common causes | First action | +| --- | --- | --- | --- | +| `401 Unauthorized` | Authentication failed | Missing/invalid Bearer token, invalid client credentials, expired token | Re-issue token and verify `Authorization: Bearer ` | +| `403 Forbidden` | Authenticated but not allowed | Policy/capability mismatch for request path | Check policy path + required capability mapping | +| `404 Not Found` | Route/resource missing | Wrong endpoint shape, unknown resource ID/key/path | Verify endpoint path shape first, then resource existence | +| `409 Conflict` | Resource state conflict | Duplicate create (for example existing transit key name) | Switch to rotate/update flow or use unique resource name | +| `422 Unprocessable Entity` | Validation failed | Invalid JSON/body/query, bad base64, malformed ciphertext contract | Validate payload and endpoint-specific contract | +| `429 Too Many Requests` | Request throttled | Per-client rate limit exceeded | Respect `Retry-After` and retry with backoff + jitter | + +## Fast Triage Order + +1. Check status code class (`401/403/404/409/422/429`) +2. Validate route shape (to avoid misreading `404` as policy issue) +3. Validate token/authn (`401`) before policy/authz (`403`) +4. Validate payload contract (`422`) using endpoint docs +5. For `429`, apply retry policy and reassess client concurrency + +## Capability mismatch quick map (`403`) + +- `GET /v1/secrets/*path` requires `decrypt` +- `POST /v1/secrets/*path` requires `encrypt` +- `POST /v1/transit/keys/:name/rotate` requires `rotate` +- `POST /v1/tokenization/detokenize` requires `decrypt` +- `GET /v1/audit-logs` requires `read` + +## See also + +- [Capability matrix](capability-matrix.md) +- [Policies cookbook](policies.md) +- [API rate limiting](rate-limiting.md) +- [Troubleshooting](../getting-started/troubleshooting.md) diff --git a/docs/api/policies.md b/docs/api/policies.md index a803b63..8f98220 100644 --- a/docs/api/policies.md +++ b/docs/api/policies.md @@ -11,6 +11,7 @@ Ready-to-use policy templates for common service roles. - [Path matching behavior](#path-matching-behavior) - [Route shape vs policy shape](#route-shape-vs-policy-shape) - [Policy review checklist before deploy](#policy-review-checklist-before-deploy) +- [Persona policy templates](#persona-policy-templates) - [1) Read-only service](#1-read-only-service) - [2) CI writer](#2-ci-writer) - [3) Transit encrypt-only service](#3-transit-encrypt-only-service) @@ -20,6 +21,7 @@ Ready-to-use policy templates for common service roles. - [7) Key operator](#7-key-operator) - [8) Tokenization operator](#8-tokenization-operator) - [Copy-safe split-role snippets](#copy-safe-split-role-snippets) +- [Pre-deploy policy automation](#pre-deploy-policy-automation) - [Policy mismatch example (wrong vs fixed)](#policy-mismatch-example-wrong-vs-fixed) - [Common policy mistakes](#common-policy-mistakes) - [Best practices](#best-practices) @@ -95,6 +97,26 @@ Endpoint capability intent (quick map, condensed from [Capability matrix](capabi | `GET /v1/secrets/*path`, `POST /v1/transit/keys/:name/decrypt`, `POST /v1/tokenization/detokenize` | `decrypt` | | `POST /v1/transit/keys/:name/rotate`, `POST /v1/tokenization/keys/:name/rotate` | `rotate` | +## Persona policy templates + +Use these as starter profiles for common operational personas. + +| Persona | Primary scope | Starter policy section | +| --- | --- | --- | +| Secrets reader | Read existing secrets only | [1) Read-only service](#1-read-only-service) | +| Secrets writer | CI/CD publish path | [2) CI writer](#2-ci-writer) | +| Transit encrypt worker | Encrypt-only workloads | [3) Transit encrypt-only service](#3-transit-encrypt-only-service) | +| Transit decrypt worker | Controlled decrypt runtime | [4) Transit decrypt-only service](#4-transit-decrypt-only-service) | +| Audit/compliance reader | Audit log retrieval | [5) Audit log reader](#5-audit-log-reader) | +| Key operator | Transit/tokenization key lifecycle | [7) Key operator](#7-key-operator) + [8) Tokenization operator](#8-tokenization-operator) | +| Break-glass admin | Emergency broad access | [6) Break-glass admin (emergency)](#6-break-glass-admin-emergency) | + +Persona composition tips: + +- Prefer one persona per client credential +- Keep encrypt/decrypt split across separate clients where possible +- Reserve wildcard `*` for short-lived emergency workflows only + ## 1) Read-only service Use when a service only reads existing secrets. @@ -285,6 +307,37 @@ Secrets write-only workload (`encrypt` only): ] ``` +## Pre-deploy policy automation + +Use this pre-deploy gate in CI to reject obvious policy mistakes before rollout. + +```bash +#!/usr/bin/env bash +set -euo pipefail + +POLICY_JSON_PATH="${1:-policy.json}" + +# 1) Basic JSON validation +jq empty "$POLICY_JSON_PATH" + +# 2) Reject unsupported wildcard forms in path segments +if jq -e '.[] | select(.path | test("\*\*|\w-\*|\*\w"))' "$POLICY_JSON_PATH" >/dev/null; then + echo "unsupported wildcard pattern found in policy path" + exit 1 +fi + +# 3) Ensure capabilities are from allowed set +ALLOWED='["read","write","delete","encrypt","decrypt","rotate"]' +if jq -e --argjson allowed "$ALLOWED" '.[] | .capabilities[] | select(($allowed | index(.)) == null)' "$POLICY_JSON_PATH" >/dev/null; then + echo "unsupported capability found" + exit 1 +fi + +echo "policy static checks: PASS" +``` + +For runtime allow/deny assertions, run [Policy smoke tests](../operations/policy-smoke-tests.md). + ## Policy mismatch example (wrong vs fixed) Wrong policy (insufficient capability for secret reads): @@ -335,6 +388,7 @@ Also verify path matching, for example `/v1/secrets/app/prod/*` if you want tigh ## See also - [Authentication API](authentication.md) +- [API error decision matrix](error-decision-matrix.md) - [Clients API](clients.md) - [Capability matrix](capability-matrix.md) - [Secrets API](secrets.md) diff --git a/docs/api/rate-limiting.md b/docs/api/rate-limiting.md new file mode 100644 index 0000000..a2f11fa --- /dev/null +++ b/docs/api/rate-limiting.md @@ -0,0 +1,65 @@ +# 🚦 API Rate Limiting + +> Last updated: 2026-02-19 +> Applies to: API v1 + +Secrets enforces per-client rate limiting for authenticated API routes when +`RATE_LIMIT_ENABLED=true` (default). + +## Scope + +Rate limiting scope matrix: + +| Route group/endpoint | Rate limited | Notes | +| --- | --- | --- | +| `/v1/clients/*` | Yes | Requires Bearer auth | +| `/v1/audit-logs` | Yes | Requires Bearer auth | +| `/v1/secrets/*` | Yes | Requires Bearer auth | +| `/v1/transit/*` | Yes | Requires Bearer auth | +| `/v1/tokenization/*` | Yes | Requires Bearer auth | +| `POST /v1/token` | No | Token issuance route | +| `GET /health` | No | Liveness checks | +| `GET /ready` | No | Readiness checks | +| `GET /metrics` | No | Prometheus scraping | + +## Defaults + +```dotenv +RATE_LIMIT_ENABLED=true +RATE_LIMIT_REQUESTS_PER_SEC=10.0 +RATE_LIMIT_BURST=20 +``` + +## Response behavior + +When a request exceeds the allowed rate, the API returns: + +- Status: `429 Too Many Requests` +- Header: `Retry-After: ` +- Body: + +```json +{ + "error": "rate_limit_exceeded", + "message": "Too many requests. Please retry after the specified delay." +} +``` + +## Client retry guidance + +- Respect `Retry-After` before retrying +- Use exponential backoff with jitter +- Avoid synchronized retries across many workers +- Reduce per-client burst and concurrency where possible + +## Distinguishing `403` vs `429` + +- `403 Forbidden`: policy/capability denies access +- `429 Too Many Requests`: request was authenticated/authorized but throttled + +## See also + +- [Environment variables](../configuration/environment-variables.md) +- [API error decision matrix](error-decision-matrix.md) +- [Response shapes](response-shapes.md) +- [Troubleshooting](../getting-started/troubleshooting.md) diff --git a/docs/api/response-shapes.md b/docs/api/response-shapes.md index b733102..3acad43 100644 --- a/docs/api/response-shapes.md +++ b/docs/api/response-shapes.md @@ -150,10 +150,22 @@ Common error categories: - `unauthorized` - `forbidden` +- `rate_limit_exceeded` - `validation_error` - `not_found` - `conflict` +Representative rate-limit payload (`429 Too Many Requests`): + +```json +{ + "error": "rate_limit_exceeded", + "message": "Too many requests. Please retry after the specified delay." +} +``` + +Rate-limit responses include a `Retry-After` header in seconds. + Representative conflict payload (for example duplicate transit key create): ```json @@ -166,6 +178,8 @@ Representative conflict payload (for example duplicate transit key create): ## See also - [Authentication API](authentication.md) +- [API rate limiting](rate-limiting.md) +- [API error decision matrix](error-decision-matrix.md) - [Clients API](clients.md) - [Secrets API](secrets.md) - [Transit API](transit.md) diff --git a/docs/api/secrets.md b/docs/api/secrets.md index 484449e..987011c 100644 --- a/docs/api/secrets.md +++ b/docs/api/secrets.md @@ -23,9 +23,9 @@ All endpoints require Bearer authentication. | Endpoint | Success | Common error statuses | | --- | --- | --- | -| `POST /v1/secrets/*path` | `201` | `401`, `403`, `422` | -| `GET /v1/secrets/*path` | `200` | `401`, `403`, `404` | -| `DELETE /v1/secrets/*path` | `204` | `401`, `403`, `404` | +| `POST /v1/secrets/*path` | `201` | `401`, `403`, `422`, `429` | +| `GET /v1/secrets/*path` | `200` | `401`, `403`, `404`, `429` | +| `DELETE /v1/secrets/*path` | `204` | `401`, `403`, `404`, `429` | ## Create or Update Secret @@ -85,6 +85,7 @@ Delete returns `204 No Content`. - `403 Forbidden`: caller lacks required capability for the path - `404 Not Found`: secret path not found (or soft-deleted in current context) - `422 Unprocessable Entity`: invalid request body +- `429 Too Many Requests`: per-client rate limit exceeded ## Error Payload Examples @@ -160,6 +161,8 @@ Wildcard matcher semantics reference: ## See also - [Authentication API](authentication.md) +- [API error decision matrix](error-decision-matrix.md) +- [API rate limiting](rate-limiting.md) - [Policies cookbook](policies.md) - [Capability matrix](capability-matrix.md) - [Response shapes](response-shapes.md) diff --git a/docs/api/tokenization.md b/docs/api/tokenization.md index fb33d48..4f6b0f7 100644 --- a/docs/api/tokenization.md +++ b/docs/api/tokenization.md @@ -14,7 +14,7 @@ with optional deterministic behavior and token lifecycle management. OpenAPI coverage note: -- Tokenization endpoint coverage is included in `docs/openapi.yaml` for `v0.4.1` +- Tokenization endpoint coverage is included in `docs/openapi.yaml` for `v0.5.0` - This page remains the most detailed contract reference with examples and operational guidance All endpoints require `Authorization: Bearer `. @@ -50,13 +50,13 @@ Capability mapping: | Endpoint | Success | Common error statuses | | --- | --- | --- | -| `POST /v1/tokenization/keys` | `201` | `401`, `403`, `409`, `422` | -| `POST /v1/tokenization/keys/:name/rotate` | `201` | `401`, `403`, `404`, `422` | -| `DELETE /v1/tokenization/keys/:id` | `204` | `401`, `403`, `404`, `422` | -| `POST /v1/tokenization/keys/:name/tokenize` | `201` | `401`, `403`, `404`, `422` | -| `POST /v1/tokenization/detokenize` | `200` | `401`, `403`, `404`, `422` | -| `POST /v1/tokenization/validate` | `200` | `401`, `403`, `422` | -| `POST /v1/tokenization/revoke` | `204` | `401`, `403`, `404`, `422` | +| `POST /v1/tokenization/keys` | `201` | `401`, `403`, `409`, `422`, `429` | +| `POST /v1/tokenization/keys/:name/rotate` | `201` | `401`, `403`, `404`, `422`, `429` | +| `DELETE /v1/tokenization/keys/:id` | `204` | `401`, `403`, `404`, `422`, `429` | +| `POST /v1/tokenization/keys/:name/tokenize` | `201` | `401`, `403`, `404`, `422`, `429` | +| `POST /v1/tokenization/detokenize` | `200` | `401`, `403`, `404`, `422`, `429` | +| `POST /v1/tokenization/validate` | `200` | `401`, `403`, `422`, `429` | +| `POST /v1/tokenization/revoke` | `204` | `401`, `403`, `404`, `422`, `429` | ## Create Tokenization Key @@ -274,6 +274,8 @@ If data must remain confidential at rest, keep it in encrypted plaintext payload ## See also - [Authentication](authentication.md) +- [API error decision matrix](error-decision-matrix.md) +- [API rate limiting](rate-limiting.md) - [Policies](policies.md) - [Capability matrix](capability-matrix.md) - [CLI Commands](../cli/commands.md) diff --git a/docs/api/transit.md b/docs/api/transit.md index 64a1139..15af2e3 100644 --- a/docs/api/transit.md +++ b/docs/api/transit.md @@ -50,11 +50,11 @@ Wildcard matcher semantics reference: | Endpoint | Success | Common error statuses | | --- | --- | --- | -| `POST /v1/transit/keys` | `201` | `401`, `403`, `409`, `422` | -| `POST /v1/transit/keys/:name/rotate` | `200` | `401`, `403`, `404`, `422` | -| `POST /v1/transit/keys/:name/encrypt` | `200` | `401`, `403`, `404`, `422` | -| `POST /v1/transit/keys/:name/decrypt` | `200` | `401`, `403`, `404`, `422` | -| `DELETE /v1/transit/keys/:id` | `204` | `401`, `403`, `404`, `422` | +| `POST /v1/transit/keys` | `201` | `401`, `403`, `409`, `422`, `429` | +| `POST /v1/transit/keys/:name/rotate` | `200` | `401`, `403`, `404`, `422`, `429` | +| `POST /v1/transit/keys/:name/encrypt` | `200` | `401`, `403`, `404`, `422`, `429` | +| `POST /v1/transit/keys/:name/decrypt` | `200` | `401`, `403`, `404`, `422`, `429` | +| `DELETE /v1/transit/keys/:id` | `204` | `401`, `403`, `404`, `422`, `429` | ## Create Transit Key @@ -164,16 +164,17 @@ Example decrypt response (`200 OK`): - `404 Not Found`: key missing or soft deleted - `409 Conflict`: key already exists on create - `422 Unprocessable Entity`: malformed request payload, invalid blob format, or invalid ciphertext base64 +- `429 Too Many Requests`: per-client rate limit exceeded ## Endpoint Error Matrix -| Endpoint | 401 | 403 | 404 | 409 | 422 | -| --- | --- | --- | --- | --- | --- | -| `POST /v1/transit/keys` | missing/invalid token | missing `write` capability | - | key name already initialized (`version=1`) | invalid create payload | -| `POST /v1/transit/keys/:name/rotate` | missing/invalid token | missing `rotate` capability | key name not found | - | invalid rotate payload | -| `POST /v1/transit/keys/:name/encrypt` | missing/invalid token | missing `encrypt` capability | key name not found | - | `plaintext` missing/invalid base64 | -| `POST /v1/transit/keys/:name/decrypt` | missing/invalid token | missing `decrypt` capability | key/version not found | - | malformed `:` | -| `DELETE /v1/transit/keys/:id` | missing/invalid token | missing `delete` capability | key ID not found | - | invalid UUID | +| Endpoint | 401 | 403 | 404 | 409 | 422 | 429 | +| --- | --- | --- | --- | --- | --- | --- | +| `POST /v1/transit/keys` | missing/invalid token | missing `write` capability | - | key name already initialized (`version=1`) | invalid create payload | per-client rate limit exceeded | +| `POST /v1/transit/keys/:name/rotate` | missing/invalid token | missing `rotate` capability | key name not found | - | invalid rotate payload | per-client rate limit exceeded | +| `POST /v1/transit/keys/:name/encrypt` | missing/invalid token | missing `encrypt` capability | key name not found | - | `plaintext` missing/invalid base64 | per-client rate limit exceeded | +| `POST /v1/transit/keys/:name/decrypt` | missing/invalid token | missing `decrypt` capability | key/version not found | - | malformed `:` | per-client rate limit exceeded | +| `DELETE /v1/transit/keys/:id` | missing/invalid token | missing `delete` capability | key ID not found | - | invalid UUID | per-client rate limit exceeded | ## Error Payload Examples @@ -276,6 +277,8 @@ Expected result: key creation returns `201 Created`; encrypt returns `200 OK` wi ## See also - [Authentication API](authentication.md) +- [API error decision matrix](error-decision-matrix.md) +- [API rate limiting](rate-limiting.md) - [Policies cookbook](policies.md) - [Capability matrix](capability-matrix.md) - [Response shapes](response-shapes.md) diff --git a/docs/api/versioning-policy.md b/docs/api/versioning-policy.md index 086ef1b..60b5d99 100644 --- a/docs/api/versioning-policy.md +++ b/docs/api/versioning-policy.md @@ -11,16 +11,17 @@ This page defines compatibility expectations for HTTP API changes. - Existing endpoint paths and JSON field names are treated as stable unless explicitly deprecated - OpenAPI source of truth: `docs/openapi.yaml` -## OpenAPI Coverage (v0.4.1) +## OpenAPI Coverage (v0.5.0) - `docs/openapi.yaml` is a baseline subset focused on high-traffic/common integration flows -- `docs/openapi.yaml` includes tokenization endpoint coverage in `v0.4.1` +- `docs/openapi.yaml` includes tokenization endpoint coverage in `v0.5.0` +- `docs/openapi.yaml` includes `429 Too Many Requests` response modeling for protected routes - Endpoint pages in `docs/api/*.md` define full public behavior for covered operations - Endpoints may exist in runtime before they are expanded in OpenAPI detail ## App Version vs API Version -- Application release `v0.4.1` is pre-1.0 software and may evolve quickly +- Application release `v0.5.0` is pre-1.0 software and may evolve quickly - API v1 path contract (`/v1/*`) remains the compatibility baseline for consumers - Breaking API behavior changes require explicit documentation and migration notes @@ -71,6 +72,7 @@ Non-breaking telemetry examples: ## See also - [Authentication API](authentication.md) +- [API error decision matrix](error-decision-matrix.md) - [Response shapes](response-shapes.md) - [Contributing guide](../contributing.md) - [Documentation changelog](../CHANGELOG.md) diff --git a/docs/cli/commands.md b/docs/cli/commands.md index dd6d54f..7d83e8f 100644 --- a/docs/cli/commands.md +++ b/docs/cli/commands.md @@ -12,10 +12,10 @@ Local binary: ./bin/app [flags] ``` -Docker image (v0.4.1): +Docker image (v0.5.0): ```bash -docker run --rm --env-file .env allisson/secrets:v0.4.1 [flags] +docker run --rm --env-file .env allisson/secrets:v0.5.0 [flags] ``` ## Core Runtime @@ -33,7 +33,7 @@ Local: Docker: ```bash -docker run --rm --network secrets-net --env-file .env -p 8080:8080 allisson/secrets:v0.4.1 server +docker run --rm --network secrets-net --env-file .env -p 8080:8080 allisson/secrets:v0.5.0 server ``` ### `migrate` @@ -49,7 +49,7 @@ Local: Docker: ```bash -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 migrate +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 migrate ``` ## Key Management @@ -71,7 +71,7 @@ Local: Docker: ```bash -docker run --rm allisson/secrets:v0.4.1 create-master-key --id default +docker run --rm allisson/secrets:v0.5.0 create-master-key --id default ``` ### `create-kek` @@ -91,7 +91,7 @@ Local: Docker: ```bash -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 create-kek --algorithm aes-gcm +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 create-kek --algorithm aes-gcm ``` ### `rotate-kek` @@ -111,7 +111,7 @@ Local: Docker: ```bash -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 rotate-kek --algorithm aes-gcm +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 rotate-kek --algorithm aes-gcm ``` After master key or KEK rotation, restart API server instances so they load updated key material. @@ -138,7 +138,7 @@ Examples: --deterministic \ --algorithm aes-gcm -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 \ +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 \ create-tokenization-key --name payment-cards --format luhn-preserving --deterministic --algorithm aes-gcm ``` @@ -162,7 +162,7 @@ Examples: --deterministic \ --algorithm chacha20-poly1305 -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 \ +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 \ rotate-tokenization-key --name payment-cards --format luhn-preserving --deterministic --algorithm chacha20-poly1305 ``` @@ -186,7 +186,7 @@ Examples: ./bin/app clean-expired-tokens --days 30 --format text # Docker form -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 \ +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 \ clean-expired-tokens --days 30 --dry-run --format json ``` @@ -269,7 +269,7 @@ Examples: ./bin/app clean-audit-logs --days 90 --format text # Docker form -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 \ +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 \ clean-audit-logs --days 90 --dry-run --format json ``` diff --git a/docs/concepts/security-model.md b/docs/concepts/security-model.md index f0f13a7..f65c4a2 100644 --- a/docs/concepts/security-model.md +++ b/docs/concepts/security-model.md @@ -35,13 +35,21 @@ Secrets is designed for practical defense-in-depth around secret storage and cry ## ✅ Production recommendations -- Use HTTPS/TLS everywhere -- Store master keys in KMS/HSM/secure secret manager +- Use HTTPS/TLS everywhere (run behind reverse proxy with TLS termination) +- Store master keys in KMS/HSM/secure secret manager (never in source control) - Apply least-privilege policies per client and path - Rotate KEKs and client credentials regularly - Alert on repeated denied authorization attempts - Separate `encrypt` and `decrypt` clients for tokenization and transit when possible - Prefer non-deterministic tokenization unless deterministic matching is an explicit requirement +- Enable rate limiting to protect against abuse and denial-of-service attacks +- Use short token expiration times appropriate for your threat model (default: 4 hours) +- Enable database SSL/TLS in production (`sslmode=require` or `sslmode=verify-full`) +- Restrict network access to `/metrics` endpoint +- Forward audit logs to SIEM/log aggregation for long-term retention +- Disable CORS unless browser-based access is explicitly required + +For comprehensive production security guidance, see [Security Hardening Guide](../operations/security-hardening.md). ## ⚠️ Known limitations @@ -58,6 +66,8 @@ Secrets is designed for practical defense-in-depth around secret storage and cry ## See also +- [Security hardening guide](../operations/security-hardening.md) +- [Production deployment](../operations/production.md) - [Architecture](architecture.md) - [Authentication API](../api/authentication.md) - [Policies cookbook](../api/policies.md) diff --git a/docs/configuration/environment-variables.md b/docs/configuration/environment-variables.md index 9217497..f92c3f2 100644 --- a/docs/configuration/environment-variables.md +++ b/docs/configuration/environment-variables.md @@ -1,41 +1,220 @@ # ⚙️ Environment Variables -> Last updated: 2026-02-16 +> Last updated: 2026-02-19 Secrets is configured through environment variables. ## Core configuration ```dotenv +# Database configuration DB_DRIVER=postgres DB_CONNECTION_STRING=postgres://user:password@localhost:5432/mydb?sslmode=disable DB_MAX_OPEN_CONNECTIONS=25 DB_MAX_IDLE_CONNECTIONS=5 DB_CONN_MAX_LIFETIME=5 +# Server configuration SERVER_HOST=0.0.0.0 SERVER_PORT=8080 LOG_LEVEL=info +# Master key configuration MASTER_KEYS=default:BASE64_32_BYTE_KEY ACTIVE_MASTER_KEY_ID=default -AUTH_TOKEN_EXPIRATION_SECONDS=86400 +# Authentication configuration +AUTH_TOKEN_EXPIRATION_SECONDS=14400 +# Rate limiting configuration +RATE_LIMIT_ENABLED=true +RATE_LIMIT_REQUESTS_PER_SEC=10.0 +RATE_LIMIT_BURST=20 + +# CORS configuration +CORS_ENABLED=false +CORS_ALLOW_ORIGINS= + +# Metrics configuration METRICS_ENABLED=true METRICS_NAMESPACE=secrets ``` -## Notes +## Database configuration + +### DB_DRIVER + +Database driver to use. Supported values: `postgres`, `mysql`. + +### DB_CONNECTION_STRING + +Database connection string. + +**⚠️ Security Warning:** `sslmode=disable` (PostgreSQL) and `tls=false` (MySQL) are for **development only**. Production deployments **must** use encrypted connections: + +**PostgreSQL production:** + +```dotenv +# Minimum: encrypted connection +DB_CONNECTION_STRING=postgres://user:password@db.example.com:5432/secrets?sslmode=require + +# Recommended: encrypted connection with certificate verification +DB_CONNECTION_STRING=postgres://user:password@db.example.com:5432/secrets?sslmode=verify-full&sslrootcert=/path/to/ca.crt +``` + +**MySQL production:** + +```dotenv +# Minimum: encrypted connection +DB_CONNECTION_STRING=user:password@tcp(db.example.com:3306)/secrets?tls=true + +# Recommended: encrypted connection with certificate verification +DB_CONNECTION_STRING=user:password@tcp(db.example.com:3306)/secrets?tls=custom +``` + +See [Security Hardening Guide](../operations/security-hardening.md#2-database-security) for complete guidance. + +### DB_MAX_OPEN_CONNECTIONS + +Maximum number of open database connections (default: `25`). + +### DB_MAX_IDLE_CONNECTIONS + +Maximum number of idle database connections (default: `5`). + +### DB_CONN_MAX_LIFETIME + +Maximum lifetime of a connection in minutes (default: `5`). + +## Server configuration + +### SERVER_HOST + +Host address to bind the HTTP server (default: `0.0.0.0`). + +### SERVER_PORT + +Port to bind the HTTP server (default: `8080`). + +### LOG_LEVEL + +Logging level. Supported values: `debug`, `info`, `warn`, `error` (default: `info`). + +## Master key configuration + +### MASTER_KEYS + +Comma-separated list of master keys in format `id1:base64key1,id2:base64key2`. -- 🔐 `MASTER_KEYS` format is `id1:base64key1,id2:base64key2` - 📏 Each master key must represent exactly 32 bytes (256 bits) -- ⭐ `ACTIVE_MASTER_KEY_ID` selects which master key encrypts new KEKs -- ⏱️ `AUTH_TOKEN_EXPIRATION_SECONDS` defaults to 24h behavior when set to `86400` -- 📊 `METRICS_ENABLED` enables/disables OpenTelemetry metrics collection (default: `true`) -- 🏷️ `METRICS_NAMESPACE` sets the prefix for all metric names (default: `secrets`) -- 📉 When `METRICS_ENABLED=false`, HTTP metrics middleware and the `/metrics` route are disabled -- 🔄 After changing `MASTER_KEYS` or `ACTIVE_MASTER_KEY_ID`, restart API servers to load new values +- 🔐 Store in secrets manager, never commit to source control +- 🔄 After changing `MASTER_KEYS`, restart API servers to load new values + +**Example:** + +```dotenv +MASTER_KEYS=default:A1B2C3D4E5F6G7H8I9J0K1L2M3N4O5P6Q7R8S9T0U1V2W3X4Y5Z6== +``` + +### ACTIVE_MASTER_KEY_ID + +ID of the master key to use for encrypting new KEKs (default: `default`). + +- ⭐ Must match one of the IDs in `MASTER_KEYS` +- 🔄 After changing `ACTIVE_MASTER_KEY_ID`, restart API servers to load new value + +## Authentication configuration + +### AUTH_TOKEN_EXPIRATION_SECONDS + +Token expiration time in seconds (default: `14400` - 4 hours). + +**⚠️ Migration Note:** Prior to v0.5.0, the default was 86400 seconds (24 hours). Review your token expiration settings and client refresh logic when upgrading from v0.4.x. + +**Recommended settings:** + +- High-security environments: `3600` (1 hour) +- Standard deployments: `14400` (4 hours) - **default** +- Low-security environments: `86400` (24 hours) + +## Rate limiting configuration + +### RATE_LIMIT_ENABLED + +Enable per-client rate limiting (default: `true`). + +**Security Note:** Rate limiting protects against abuse and denial-of-service attacks. Disable only for testing or if rate limiting is handled at a different layer. + +### RATE_LIMIT_REQUESTS_PER_SEC + +Maximum requests per second per authenticated client (default: `10.0`). + +**Recommended settings:** + +- High-volume API: `50.0` +- Standard application: `10.0` - **default** +- Sensitive operations: `1.0` + +### RATE_LIMIT_BURST + +Burst capacity for rate limiting (default: `20`). + +Allows clients to temporarily exceed `RATE_LIMIT_REQUESTS_PER_SEC` up to the burst limit. + +**Example:** With `RATE_LIMIT_REQUESTS_PER_SEC=10.0` and `RATE_LIMIT_BURST=20`, a client can make 20 requests instantly, then sustain 10 requests/second. + +### Production presets (starting points) + +| Profile | RATE_LIMIT_REQUESTS_PER_SEC | RATE_LIMIT_BURST | Typical use case | +| --- | --- | --- | --- | +| Conservative | `5.0` | `10` | Admin-heavy or sensitive workloads | +| Standard (default) | `10.0` | `20` | Most service-to-service integrations | +| High-throughput | `50.0` | `100` | High-volume internal API clients | + +Tune based on observed `429` rates and client retry behavior. + +## CORS configuration + +### CORS_ENABLED + +Enable Cross-Origin Resource Sharing (default: `false`). + +**⚠️ Security Warning:** CORS is **disabled by default** because Secrets is designed as a server-to-server API. Enable only if browser-based access is required (e.g., single-page applications). Consider using a backend-for-frontend (BFF) pattern instead of exposing the API directly to browsers. + +### CORS_ALLOW_ORIGINS + +Comma-separated list of allowed origins for CORS requests. + +**Security Best Practices:** + +- Never use `*` (wildcard) in production +- List exact origins: `https://app.example.com,https://admin.example.com` +- Include protocol, domain, and port +- Review and prune origins quarterly + +**Example:** + +```dotenv +CORS_ENABLED=true +CORS_ALLOW_ORIGINS=https://app.example.com,https://admin.example.com +``` + +## Metrics configuration + +### METRICS_ENABLED + +Enable OpenTelemetry metrics collection (default: `true`). + +- 📊 When enabled, exposes `/metrics` endpoint in Prometheus format +- 📉 When disabled, HTTP metrics middleware and `/metrics` route are disabled + +**⚠️ Security Warning:** If metrics are enabled, restrict access to the `/metrics` endpoint using network policies or reverse proxy authentication. Never expose `/metrics` to the public internet. + +### METRICS_NAMESPACE + +Prefix for all metric names (default: `secrets`). + +**Example:** With `METRICS_NAMESPACE=secrets`, metrics are named `secrets_http_requests_total`, `secrets_http_request_duration_seconds`, etc. ## Master key generation @@ -51,8 +230,9 @@ docker run --rm allisson/secrets:latest create-master-key --id default ## See also +- [Security hardening guide](../operations/security-hardening.md) +- [Production operations](../operations/production.md) - [Monitoring](../operations/monitoring.md) - [Docker getting started](../getting-started/docker.md) - [Local development](../getting-started/local-development.md) -- [Production operations](../operations/production.md) - [Testing guide](../development/testing.md) diff --git a/docs/contributing.md b/docs/contributing.md index e0bb6e0..eb6ac49 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -144,5 +144,7 @@ CI includes an API/docs guard for pull requests: - [Documentation index](README.md) - [Testing guide](development/testing.md) +- [Docs release checklist](development/docs-release-checklist.md) +- [Docs architecture map](development/docs-architecture-map.md) - [Changelog](CHANGELOG.md) - [Local development](getting-started/local-development.md) diff --git a/docs/development/docs-architecture-map.md b/docs/development/docs-architecture-map.md new file mode 100644 index 0000000..2277da8 --- /dev/null +++ b/docs/development/docs-architecture-map.md @@ -0,0 +1,47 @@ +# 🗺️ Docs Architecture Map + +> Last updated: 2026-02-19 + +This page defines canonical vs supporting docs to reduce duplication and drift. + +## Canonical Sources + +| Topic | Canonical document | +| --- | --- | +| Release and API label metadata | `docs/metadata.json` | +| API contract subset | `docs/openapi.yaml` | +| Capability-to-endpoint mapping | `docs/api/capability-matrix.md` | +| Authorization path matcher semantics | `docs/api/policies.md` | +| Runtime env configuration | `docs/configuration/environment-variables.md` | +| Production security posture | `docs/operations/security-hardening.md` | +| Release narrative | `docs/releases/vX.Y.Z.md` | + +## Supporting Documents + +| Area | Supporting docs | +| --- | --- | +| Onboarding | `docs/getting-started/*.md` | +| Endpoint behavior details | `docs/api/*.md` | +| Operations runbooks | `docs/operations/*.md` | +| Integration snippets | `docs/examples/*.md` | +| Docs process and governance | `docs/contributing.md`, `docs/development/*.md` | + +## Sync Rules + +1. Update canonical source first +2. Propagate essential deltas to supporting docs +3. Update `docs/CHANGELOG.md` for significant docs updates +4. Run docs checks before merge + +## Drift Signals + +- Endpoint docs disagree with capability matrix +- Release references disagree with `docs/metadata.json` +- Examples use old response/error semantics +- Troubleshooting behavior diverges from runbooks + +## See also + +- [Documentation contributing guide](../contributing.md) +- [Docs release checklist](docs-release-checklist.md) +- [Documentation index](../README.md) diff --git a/docs/development/docs-release-checklist.md b/docs/development/docs-release-checklist.md new file mode 100644 index 0000000..8f75d59 --- /dev/null +++ b/docs/development/docs-release-checklist.md @@ -0,0 +1,65 @@ +# 🧾 Docs Release Checklist + +> Last updated: 2026-02-19 + +Use this checklist for each release (`vX.Y.Z`) to keep docs consistent and navigable. + +## 1) Metadata and release labels + +- Update `docs/metadata.json`: + - `current_release` + - `last_docs_refresh` +- Ensure `README.md` and `docs/README.md` reflect the same current release + +## 2) Release pages + +- Add release notes: `docs/releases/vX.Y.Z.md` +- Add upgrade guide when behavior/defaults change: `docs/releases/vX.Y.Z-upgrade.md` +- Update release compatibility matrix: `docs/releases/compatibility-matrix.md` +- Promote new release links in docs indexes and operator runbooks + +## 3) API contract and examples + +- Update endpoint docs under `docs/api/*.md` for behavior/status changes +- Update `docs/openapi.yaml` for request/response changes +- Include `429` + `Retry-After` contract where protected routes can throttle +- Update at least curl plus one SDK/runtime example (`python`, `javascript`, or `go`) + +## 4) Operations and runbooks + +- Update `docs/getting-started/*` for default/config changes +- Update `docs/getting-started/troubleshooting.md` for new failure modes +- Update `docs/operations/*` guidance for production impact + +## 5) Changelogs and navigation + +- Update project changelog (`CHANGELOG.md`) for release-level behavior +- Update docs changelog (`docs/CHANGELOG.md`) for docs scope/process updates +- Verify links from: + - `README.md` + - `docs/README.md` + - `docs/operations/runbook-index.md` + +## 6) Validation before merge + +Run: + +```bash +make docs-lint +make docs-check-examples +make docs-check-metadata +``` + +CI should also validate: + +- markdown lint and link checks +- docs metadata consistency +- OpenAPI validity +- release docs link guard for new `docs/releases/vX.Y.Z.md` additions + +## See also + +- [Documentation contributing guide](../contributing.md) +- [Documentation changelog](../CHANGELOG.md) +- [API compatibility policy](../api/versioning-policy.md) +- [Production rollout golden path](../operations/production-rollout.md) diff --git a/docs/examples/curl.md b/docs/examples/curl.md index eb45d47..ca9a6c3 100644 --- a/docs/examples/curl.md +++ b/docs/examples/curl.md @@ -30,6 +30,46 @@ TOKEN=$(curl -s -X POST "$BASE_URL/v1/token" \ -d "{\"client_id\":\"$CLIENT_ID\",\"client_secret\":\"$CLIENT_SECRET\"}" | jq -r .token) ``` +## 1.1) Optional retry wrapper for `429` + +```bash +request_with_retry() { + local method="$1" + local url="$2" + local body="${3:-}" + local attempt=0 + + while [ "$attempt" -lt 5 ]; do + attempt=$((attempt + 1)) + local headers_file + headers_file=$(mktemp) + + local status + if [ -n "$body" ]; then + status=$(curl -s -o /tmp/resp.json -D "$headers_file" -w "%{http_code}" -X "$method" "$url" \ + -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" -d "$body") + else + status=$(curl -s -o /tmp/resp.json -D "$headers_file" -w "%{http_code}" -X "$method" "$url" \ + -H "Authorization: Bearer $TOKEN") + fi + + if [ "$status" != "429" ]; then + rm -f "$headers_file" + cat /tmp/resp.json + return 0 + fi + + local retry_after + retry_after=$(awk 'tolower($1)=="retry-after:" {print $2}' "$headers_file" | tr -d '\r') + rm -f "$headers_file" + sleep "${retry_after:-1}" + done + + echo "request failed after retries" >&2 + return 1 +} +``` + ## 2) Write secret ```bash @@ -143,6 +183,7 @@ Deterministic caveat: - Missing `Bearer` prefix in `Authorization` header - Using create repeatedly for same transit key name instead of rotate after `409` - Sending token in URL path for tokenization lifecycle endpoints (the API expects token in JSON body) +- Ignoring `429` and retrying immediately instead of honoring `Retry-After` ## See also @@ -150,3 +191,4 @@ Deterministic caveat: - [Secrets API](../api/secrets.md) - [Transit API](../api/transit.md) - [Clients API](../api/clients.md) +- [API rate limiting](../api/rate-limiting.md) diff --git a/docs/examples/go.md b/docs/examples/go.md index 8fc44f9..6762880 100644 --- a/docs/examples/go.md +++ b/docs/examples/go.md @@ -27,8 +27,11 @@ import ( "encoding/json" "fmt" "io" + "math/rand" "net/http" "os" + "strconv" + "time" ) var baseURL = envOrDefault("BASE_URL", "http://localhost:8080") @@ -107,7 +110,7 @@ func createSecret(token, path, value string) error { req.Header.Set("Content-Type", "application/json") req.Header.Set("Authorization", "Bearer "+token) - resp, err := http.DefaultClient.Do(req) + resp, err := doWithRetry(req) if err != nil { return err } @@ -120,6 +123,42 @@ func createSecret(token, path, value string) error { return nil } +func doWithRetry(req *http.Request) (*http.Response, error) { + client := http.DefaultClient + + for attempt := 0; attempt < 5; attempt++ { + cloned := req.Clone(req.Context()) + if req.GetBody != nil { + body, err := req.GetBody() + if err != nil { + return nil, err + } + cloned.Body = body + } + + resp, err := client.Do(cloned) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusTooManyRequests { + return resp, nil + } + + retryAfter := 1 + if value := resp.Header.Get("Retry-After"); value != "" { + if parsed, err := strconv.Atoi(value); err == nil && parsed > 0 { + retryAfter = parsed + } + } + _ = resp.Body.Close() + jitter := time.Duration(rand.Intn(500)) * time.Millisecond + time.Sleep(time.Duration(retryAfter)*time.Second + jitter) + } + + return nil, fmt.Errorf("request failed after retry budget") +} + func transitEncrypt(token, keyName, plaintext string) (string, error) { _ = createTransitKey(token, keyName) @@ -220,6 +259,10 @@ Deterministic caveat: - Keys configured as deterministic can emit the same token for the same plaintext under the same active key. - Use deterministic mode only when your workflow requires equality matching. +Rate-limit note: + +- For protected endpoints, retry `429` with `Retry-After` plus jittered backoff + ## Common Mistakes - Posting raw strings instead of base64-encoded fields for secrets/transit payloads @@ -227,6 +270,7 @@ Deterministic caveat: - Missing bearer token header on one request in a multi-step flow - Ignoring `409 Conflict` on transit create and not switching to rotate logic - Sending tokenization token in URL path instead of JSON body for `detokenize`, `validate`, and `revoke` +- Retrying immediately after `429` without honoring `Retry-After` ## See also @@ -235,3 +279,4 @@ Deterministic caveat: - [Transit API](../api/transit.md) - [Tokenization API](../api/tokenization.md) - [Response shapes](../api/response-shapes.md) +- [API rate limiting](../api/rate-limiting.md) diff --git a/docs/examples/javascript.md b/docs/examples/javascript.md index ee8c090..7b1f611 100644 --- a/docs/examples/javascript.md +++ b/docs/examples/javascript.md @@ -26,6 +26,29 @@ const CLIENT_SECRET = process.env.CLIENT_SECRET || ""; const toBase64 = (value) => Buffer.from(value, "utf8").toString("base64"); +async function postWithRetry(path, token, body, maxAttempts = 5) { + for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { + const response = await fetch(`${BASE_URL}${path}`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${token}`, + }, + body: JSON.stringify(body), + }); + + if (response.status !== 429) { + return response; + } + + const retryAfter = Number(response.headers.get("Retry-After") || "1"); + const jitterMs = Math.floor(Math.random() * 500); + await new Promise((resolve) => setTimeout(resolve, retryAfter * 1000 + jitterMs)); + } + + throw new Error("request failed after retry budget"); +} + async function issueToken() { const response = await fetch(`${BASE_URL}/v1/token`, { method: "POST", @@ -147,6 +170,10 @@ Deterministic caveat: - With `is_deterministic: true`, tokenizing the same plaintext with the same active key can produce the same token. - Prefer non-deterministic mode unless stable equality matching is required. +Rate-limit note: + +- For protected endpoints, honor `Retry-After` on `429` with exponential/backoff + jitter + ## Common Mistakes - Sending UTF-8 plaintext directly instead of base64 in transit/secrets payloads @@ -154,6 +181,7 @@ Deterministic caveat: - Missing `Authorization: Bearer ` header on protected endpoints - Reusing transit create for existing keys without fallback to rotate on `409` - Sending tokenization token in URL path instead of JSON body for `detokenize`, `validate`, and `revoke` +- Retrying immediately after `429` without delay/jitter ## See also @@ -162,3 +190,4 @@ Deterministic caveat: - [Transit API](../api/transit.md) - [Tokenization API](../api/tokenization.md) - [Response shapes](../api/response-shapes.md) +- [API rate limiting](../api/rate-limiting.md) diff --git a/docs/examples/python.md b/docs/examples/python.md index 80d5a77..1a7583d 100644 --- a/docs/examples/python.md +++ b/docs/examples/python.md @@ -22,6 +22,8 @@ export CLIENT_SECRET="" ```python import base64 import os +import random +import time import requests BASE_URL = os.getenv("BASE_URL", "http://localhost:8080") @@ -43,6 +45,19 @@ def issue_token() -> str: return response.json()["token"] +def post_with_retry(url: str, headers: dict[str, str], payload: dict, timeout: int = 10) -> requests.Response: + for attempt in range(5): + response = requests.post(url, headers=headers, json=payload, timeout=timeout) + if response.status_code != 429: + return response + + retry_after = int(response.headers.get("Retry-After", "1")) + jitter = random.uniform(0.0, 0.5) + time.sleep(retry_after + jitter) + + return response + + def create_secret(token: str) -> None: headers = {"Authorization": f"Bearer {token}"} response = requests.post( @@ -137,6 +152,10 @@ Deterministic caveat: - If you create a key with `is_deterministic=True`, repeated tokenization of identical plaintext can return the same token. - Use deterministic mode only when equality matching is a functional requirement. +Rate-limit note: + +- For protected endpoints, prefer retry logic that honors `Retry-After` on `429` (see `post_with_retry` helper above) + ## Common Mistakes - Passing raw plaintext instead of base64-encoded `value`/`plaintext` @@ -144,6 +163,7 @@ Deterministic caveat: - Forgetting `Bearer` prefix in `Authorization` header - Retrying transit create for an existing key name instead of handling `409` with rotate - Sending tokenization token in URL path instead of JSON body for `detokenize`, `validate`, and `revoke` +- Retrying immediately after `429` without backoff/jitter ## See also @@ -152,3 +172,4 @@ Deterministic caveat: - [Transit API](../api/transit.md) - [Tokenization API](../api/tokenization.md) - [Response shapes](../api/response-shapes.md) +- [API rate limiting](../api/rate-limiting.md) diff --git a/docs/getting-started/docker.md b/docs/getting-started/docker.md index 39f67ab..4a00d5e 100644 --- a/docs/getting-started/docker.md +++ b/docs/getting-started/docker.md @@ -4,15 +4,25 @@ This is the default way to run Secrets. -For release reproducibility, this guide uses the pinned image tag `allisson/secrets:v0.4.1`. +For release reproducibility, this guide uses the pinned image tag `allisson/secrets:v0.5.0`. You can use `allisson/secrets:latest` for fast iteration. +**⚠️ Security Warning:** This guide is for **development and testing only**. For production deployments, see [Security Hardening Guide](../operations/security-hardening.md) and [Production Deployment Guide](../operations/production.md). + +## v0.5.0 Security Defaults + +- `AUTH_TOKEN_EXPIRATION_SECONDS` default is now `14400` (4 hours) +- `RATE_LIMIT_ENABLED` default is `true` (per authenticated client) +- `CORS_ENABLED` default is `false` + +If upgrading from `v0.4.x`, review [v0.5.0 upgrade guide](../releases/v0.5.0-upgrade.md). + ## ⚡ Quickstart Copy Block Use this minimal flow when you just want to get a working instance quickly: ```bash -docker pull allisson/secrets:v0.4.1 +docker pull allisson/secrets:v0.5.0 docker network create secrets-net || true docker run -d --name secrets-postgres --network secrets-net \ @@ -21,19 +31,19 @@ docker run -d --name secrets-postgres --network secrets-net \ -e POSTGRES_DB=mydb \ postgres:16-alpine -docker run --rm allisson/secrets:v0.4.1 create-master-key --id default +docker run --rm allisson/secrets:v0.5.0 create-master-key --id default # copy generated MASTER_KEYS and ACTIVE_MASTER_KEY_ID into .env -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 migrate -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 create-kek --algorithm aes-gcm +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 migrate +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 create-kek --algorithm aes-gcm docker run --rm --name secrets-api --network secrets-net --env-file .env -p 8080:8080 \ - allisson/secrets:v0.4.1 server + allisson/secrets:v0.5.0 server ``` ## 1) Pull the image ```bash -docker pull allisson/secrets:v0.4.1 +docker pull allisson/secrets:v0.5.0 ``` ## 2) Start PostgreSQL @@ -51,7 +61,7 @@ docker run -d --name secrets-postgres --network secrets-net \ ## 3) Generate a master key ```bash -docker run --rm allisson/secrets:v0.4.1 create-master-key --id default +docker run --rm allisson/secrets:v0.5.0 create-master-key --id default ``` Copy the generated values into a local `.env` file. @@ -73,7 +83,7 @@ LOG_LEVEL=info MASTER_KEYS=default:REPLACE_WITH_BASE64_32_BYTE_KEY ACTIVE_MASTER_KEY_ID=default -AUTH_TOKEN_EXPIRATION_SECONDS=86400 +AUTH_TOKEN_EXPIRATION_SECONDS=14400 METRICS_ENABLED=true METRICS_NAMESPACE=secrets @@ -83,15 +93,15 @@ EOF ## 5) Run migrations and bootstrap KEK ```bash -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 migrate -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 create-kek --algorithm aes-gcm +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 migrate +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 create-kek --algorithm aes-gcm ``` ## 6) Start the API server ```bash docker run --rm --name secrets-api --network secrets-net --env-file .env -p 8080:8080 \ - allisson/secrets:v0.4.1 server + allisson/secrets:v0.5.0 server ``` ## 7) Verify @@ -111,7 +121,7 @@ Expected: Use the CLI command to create your first API client and policy set: ```bash -docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.4.1 create-client \ +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 create-client \ --name bootstrap-admin \ --active \ --policies '[{"path":"*","capabilities":["read","write","delete","encrypt","decrypt","rotate"]}]' \ diff --git a/docs/getting-started/local-development.md b/docs/getting-started/local-development.md index e83f450..75ae86a 100644 --- a/docs/getting-started/local-development.md +++ b/docs/getting-started/local-development.md @@ -1,9 +1,19 @@ # 💻 Run Locally (Development) -> Last updated: 2026-02-14 +> Last updated: 2026-02-19 Use this path if you want to modify the source code and run from your workstation. +**⚠️ Security Warning:** This guide is for **development and testing only**. For production deployments, see [Security Hardening Guide](../operations/security-hardening.md) and [Production Deployment Guide](../operations/production.md). + +## v0.5.0 Security Defaults + +- `AUTH_TOKEN_EXPIRATION_SECONDS` default is now `14400` (4 hours) +- `RATE_LIMIT_ENABLED` default is `true` (per authenticated client) +- `CORS_ENABLED` default is `false` + +If upgrading from `v0.4.x`, review [v0.5.0 upgrade guide](../releases/v0.5.0-upgrade.md). + ## Prerequisites - Go 1.25+ diff --git a/docs/getting-started/smoke-test.md b/docs/getting-started/smoke-test.md index dff91e3..4936d23 100644 --- a/docs/getting-started/smoke-test.md +++ b/docs/getting-started/smoke-test.md @@ -56,5 +56,5 @@ If transit decrypt fails with `422`, see [Troubleshooting](troubleshooting.md#42 - [Docker getting started](docker.md) - [Local development](local-development.md) - [Troubleshooting](troubleshooting.md) -- [v0.4.1 release notes](../releases/v0.4.1.md) +- [v0.5.0 release notes](../releases/v0.5.0.md) - [Curl examples](../examples/curl.md) diff --git a/docs/getting-started/troubleshooting.md b/docs/getting-started/troubleshooting.md index 6a78361..b5c273a 100644 --- a/docs/getting-started/troubleshooting.md +++ b/docs/getting-started/troubleshooting.md @@ -12,10 +12,12 @@ Use this quick route before diving into detailed sections: 2. Token endpoint (`POST /v1/token`) returns `401`/`403` -> go to `401 Unauthorized` or `Token issuance fails with valid-looking credentials` 3. API requests return `403` with valid token -> go to `403 Forbidden` (policy/capability mismatch) 4. API requests return `422` -> go to `422 Unprocessable Entity` (payload/query format) -5. After rotating keys behavior is stale -> go to `Rotation completed but server still uses old key context` -6. Startup fails with key config errors -> go to `Missing or Invalid Master Keys` -7. Monitoring data is missing -> go to `Metrics Troubleshooting Matrix` -8. Tokenization endpoints fail after upgrade -> go to `Tokenization migration verification` +5. API requests return `429` -> go to `429 Too Many Requests` (rate limiting) +6. Browser calls fail before API handler -> go to `CORS and preflight failures` +7. After rotating keys behavior is stale -> go to `Rotation completed but server still uses old key context` +8. Startup fails with key config errors -> go to `Missing or Invalid Master Keys` +9. Monitoring data is missing -> go to `Metrics Troubleshooting Matrix` +10. Tokenization endpoints fail after upgrade -> go to `Tokenization migration verification` ## 📑 Table of Contents @@ -23,6 +25,9 @@ Use this quick route before diving into detailed sections: - [403 Forbidden](#403-forbidden) - [409 Conflict](#409-conflict) - [422 Unprocessable Entity](#422-unprocessable-entity) +- [429 Too Many Requests](#429-too-many-requests) +- [CORS and preflight failures](#cors-and-preflight-failures) +- [CORS smoke checks (copy/paste)](#cors-smoke-checks-copypaste) - [Database connection failure](#database-connection-failure) - [Migration failure](#migration-failure) - [Missing or Invalid Master Keys](#missing-or-invalid-master-keys) @@ -99,6 +104,72 @@ Common 422 cases: - for transit decrypt, pass `ciphertext` exactly as returned by encrypt (`:`) - validate `offset`, `limit`, and RFC3339 timestamps on audit endpoints +## 429 Too Many Requests + +- Symptom: authenticated requests return `429` +- Likely cause: per-client rate limit exceeded +- Fix: + - check `Retry-After` response header and back off before retrying + - implement exponential backoff with jitter in client retry logic + - reduce request burst/concurrency from caller + - tune `RATE_LIMIT_REQUESTS_PER_SEC` and `RATE_LIMIT_BURST` if traffic is legitimate + +Quick note: + +- Rate limiting applies to authenticated API groups (`/v1/clients`, `/v1/secrets`, `/v1/transit`, `/v1/tokenization`, `/v1/audit-logs`) +- Rate limiting does not apply to `/health`, `/ready`, `/metrics`, and token issuance (`POST /v1/token`) + +## CORS and preflight failures + +- Symptom: browser requests fail on preflight (`OPTIONS`) or show CORS errors in console +- Likely cause: CORS disabled (default) or origin not listed in `CORS_ALLOW_ORIGINS` +- Fix: + - keep `CORS_ENABLED=false` for server-to-server usage + - if browser access is required, set `CORS_ENABLED=true` + - configure explicit origins in `CORS_ALLOW_ORIGINS` (comma-separated, no wildcard in production) + - confirm request origin exactly matches configured origin (scheme/host/port) + +Quick checks: + +- If token call succeeds from backend but browser fails before handler, this is usually CORS, not auth policy +- `403 Forbidden` indicates authorization policy denial; CORS failures usually happen at browser layer + +### CORS behavior matrix + +| Browser scenario | Expected result | Common misconfiguration | +| --- | --- | --- | +| `CORS_ENABLED=false`, same-origin app | Works (no cross-origin checks) | N/A | +| `CORS_ENABLED=false`, cross-origin app | Browser blocks request | Expecting browser access without enabling CORS | +| `CORS_ENABLED=true`, origin listed | Preflight and request succeed | Wrong scheme/port in origin list | +| `CORS_ENABLED=true`, origin missing | Browser blocks request | Origin not included in `CORS_ALLOW_ORIGINS` | +| `CORS_ENABLED=true`, wildcard in production | Works but insecure | Overly broad origin trust | + +## CORS smoke checks (copy/paste) + +Preflight request check: + +```bash +curl -i -X OPTIONS http://localhost:8080/v1/clients \ + -H "Origin: https://app.example.com" \ + -H "Access-Control-Request-Method: GET" \ + -H "Access-Control-Request-Headers: Authorization,Content-Type" +``` + +Expected when CORS is enabled and origin is allowed: + +- `204`/`200` preflight response +- `Access-Control-Allow-Origin: https://app.example.com` +- `Access-Control-Allow-Methods` includes requested method + +Simple cross-origin request header check: + +```bash +curl -i http://localhost:8080/health \ + -H "Origin: https://app.example.com" +``` + +If CORS is disabled or origin is not allowed, browser requests can fail even if raw curl succeeds. + ## Database connection failure - Symptom: app fails at startup or migration with DB connection errors @@ -149,7 +220,7 @@ Common 422 cases: - Symptom: tokenization endpoints return `404`/`500` after upgrading to `v0.4.x` - Likely cause: tokenization migration (`000002_add_tokenization`) not applied or partially applied - Fix: - - run `./bin/app migrate` (or Docker `... allisson/secrets:v0.4.1 migrate`) + - run `./bin/app migrate` (or Docker `... allisson/secrets:v0.5.0 migrate`) - verify migration logs indicate `000002_add_tokenization` applied for your DB - confirm initial KEK exists (`create-kek` if missing) - re-run smoke flow for tokenization (`tokenize -> detokenize -> validate -> revoke`) diff --git a/docs/metadata.json b/docs/metadata.json index 53a0381..ac94999 100644 --- a/docs/metadata.json +++ b/docs/metadata.json @@ -1,5 +1,5 @@ { - "current_release": "v0.4.1", + "current_release": "v0.5.0", "api_version": "v1", "last_docs_refresh": "2026-02-19" } diff --git a/docs/openapi.yaml b/docs/openapi.yaml index ec3e303..7f6ca4f 100644 --- a/docs/openapi.yaml +++ b/docs/openapi.yaml @@ -72,6 +72,8 @@ paths: $ref: "#/components/responses/Unauthorized" "403": $ref: "#/components/responses/Forbidden" + "429": + $ref: "#/components/responses/TooManyRequests" post: tags: [clients] summary: Create client @@ -101,6 +103,8 @@ paths: $ref: "#/components/responses/Forbidden" "422": $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/secrets/{path}: parameters: - name: path @@ -126,6 +130,14 @@ paths: application/json: schema: $ref: "#/components/schemas/SecretWriteResponse" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "422": + $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" get: tags: [secrets] summary: Read latest secret @@ -138,6 +150,18 @@ paths: application/json: schema: $ref: "#/components/schemas/SecretReadResponse" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + description: Secret not found + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "429": + $ref: "#/components/responses/TooManyRequests" delete: tags: [secrets] summary: Delete latest secret version @@ -146,6 +170,18 @@ paths: responses: "204": description: Deleted + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + description: Secret not found + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/transit/keys: post: tags: [transit] @@ -180,6 +216,8 @@ paths: message: transit key already exists "422": $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/transit/keys/{name}/encrypt: post: tags: [transit] @@ -215,6 +253,20 @@ paths: type: string version: type: integer + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + description: Transit key not found + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "422": + $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/transit/keys/{name}/decrypt: post: tags: [transit] @@ -269,6 +321,8 @@ paths: $ref: "#/components/schemas/ErrorResponse" "422": $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/tokenization/keys: post: tags: [tokenization] @@ -300,6 +354,8 @@ paths: $ref: "#/components/schemas/ErrorResponse" "422": $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/tokenization/keys/{name}/rotate: post: tags: [tokenization] @@ -337,6 +393,8 @@ paths: $ref: "#/components/schemas/ErrorResponse" "422": $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/tokenization/keys/{id}: delete: tags: [tokenization] @@ -365,6 +423,8 @@ paths: $ref: "#/components/schemas/ErrorResponse" "422": $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/tokenization/keys/{name}/tokenize: post: tags: [tokenization] @@ -402,6 +462,8 @@ paths: $ref: "#/components/schemas/ErrorResponse" "422": $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/tokenization/detokenize: post: tags: [tokenization] @@ -433,6 +495,8 @@ paths: $ref: "#/components/schemas/ErrorResponse" "422": $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/tokenization/validate: post: tags: [tokenization] @@ -458,6 +522,8 @@ paths: $ref: "#/components/responses/Forbidden" "422": $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/tokenization/revoke: post: tags: [tokenization] @@ -485,6 +551,8 @@ paths: $ref: "#/components/schemas/ErrorResponse" "422": $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" /v1/audit-logs: get: tags: [audit] @@ -513,6 +581,14 @@ paths: items: type: object additionalProperties: true + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "422": + $ref: "#/components/responses/ValidationError" + "429": + $ref: "#/components/responses/TooManyRequests" components: securitySchemes: bearerAuth: @@ -538,6 +614,21 @@ components: application/json: schema: $ref: "#/components/schemas/ErrorResponse" + TooManyRequests: + description: Rate limit exceeded + headers: + Retry-After: + description: Seconds to wait before retrying the request + schema: + type: string + example: "1" + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + error: rate_limit_exceeded + message: Too many requests. Please retry after the specified delay. schemas: ErrorResponse: type: object diff --git a/docs/operations/dashboards/README.md b/docs/operations/dashboards/README.md new file mode 100644 index 0000000..0eb8734 --- /dev/null +++ b/docs/operations/dashboards/README.md @@ -0,0 +1,22 @@ +# 📈 Dashboard Artifacts + +> Last updated: 2026-02-19 + +This directory contains starter Grafana dashboard JSON artifacts for local bootstrap. + +## Artifacts + +- `secrets-overview.json`: baseline request/error/latency view +- `secrets-rate-limiting.json`: `429` behavior and throttle pressure view + +## Import + +1. Open Grafana +2. Go to Dashboards -> Import +3. Upload one of the JSON files from this directory +4. Select your Prometheus datasource + +## Notes + +- Treat these dashboards as starter templates +- Adjust panel thresholds and time windows for your traffic profile diff --git a/docs/operations/dashboards/secrets-overview.json b/docs/operations/dashboards/secrets-overview.json new file mode 100644 index 0000000..25a3f7c --- /dev/null +++ b/docs/operations/dashboards/secrets-overview.json @@ -0,0 +1,51 @@ +{ + "title": "Secrets Overview", + "schemaVersion": 39, + "version": 1, + "refresh": "30s", + "time": { + "from": "now-6h", + "to": "now" + }, + "templating": { + "list": [] + }, + "panels": [ + { + "type": "timeseries", + "title": "HTTP Requests/sec", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(secrets_http_requests_total[5m])) by (path)", + "legendFormat": "{{path}}" + } + ] + }, + { + "type": "timeseries", + "title": "HTTP Error Rate (5xx)", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(secrets_http_requests_total{status_code=~\"5..\"}[5m]))", + "legendFormat": "5xx" + } + ] + }, + { + "type": "timeseries", + "title": "p95 Latency", + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 }, + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.95, sum(rate(secrets_http_request_duration_seconds_bucket[5m])) by (le, path))", + "legendFormat": "{{path}}" + } + ] + } + ] +} diff --git a/docs/operations/dashboards/secrets-rate-limiting.json b/docs/operations/dashboards/secrets-rate-limiting.json new file mode 100644 index 0000000..df34fd8 --- /dev/null +++ b/docs/operations/dashboards/secrets-rate-limiting.json @@ -0,0 +1,56 @@ +{ + "title": "Secrets Rate Limiting", + "schemaVersion": 39, + "version": 1, + "refresh": "30s", + "time": { + "from": "now-6h", + "to": "now" + }, + "templating": { + "list": [] + }, + "panels": [ + { + "type": "timeseries", + "title": "429 Rate by Path", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(secrets_http_requests_total{status_code=\"429\"}[5m])) by (path)", + "legendFormat": "{{path}}" + } + ] + }, + { + "type": "timeseries", + "title": "429 Ratio Global", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(secrets_http_requests_total{status_code=\"429\"}[10m])) / sum(rate(secrets_http_requests_total[10m]))", + "legendFormat": "ratio" + } + ] + }, + { + "type": "timeseries", + "title": "403 vs 429", + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(secrets_http_requests_total{status_code=\"403\"}[5m]))", + "legendFormat": "403" + }, + { + "refId": "B", + "expr": "sum(rate(secrets_http_requests_total{status_code=\"429\"}[5m]))", + "legendFormat": "429" + } + ] + } + ] +} diff --git a/docs/operations/monitoring.md b/docs/operations/monitoring.md index 0059769..49efa3e 100644 --- a/docs/operations/monitoring.md +++ b/docs/operations/monitoring.md @@ -280,6 +280,34 @@ rate(secrets_operations_total{status="error"}[5m]) / rate(secrets_operations_tot topk(5, rate(secrets_operation_duration_seconds_sum[5m]) / rate(secrets_operation_duration_seconds_count[5m])) ``` +### Rate Limiting Observability Queries + +**429 rate by route (5m):** + +```promql +sum(rate(secrets_http_requests_total{status_code="429"}[5m])) by (path) +``` + +**429 ratio by route (5m):** + +```promql +sum(rate(secrets_http_requests_total{status_code="429"}[5m])) by (path) +/ +sum(rate(secrets_http_requests_total[5m])) by (path) +``` + +**Denied authorization rate (`403`) by route (5m):** + +```promql +sum(rate(secrets_http_requests_total{status_code="403"}[5m])) by (path) +``` + +Rate-limit interpretation notes: + +- Stable low-volume `429` can be normal under bursty workloads +- Rising `429` with rising latency usually indicates saturation or mis-tuned clients +- Tune `RATE_LIMIT_REQUESTS_PER_SEC` and `RATE_LIMIT_BURST` only after retry behavior is verified + ### Tokenization-focused Queries **Detokenize error rate (5m):** @@ -315,6 +343,12 @@ rate(secrets_operations_total{domain="tokenization",operation="cleanup_expired", ## Grafana Dashboard +Starter dashboard artifacts: + +- [Dashboard artifacts index](dashboards/README.md) +- [Secrets overview dashboard JSON](dashboards/secrets-overview.json) +- [Secrets rate-limiting dashboard JSON](dashboards/secrets-rate-limiting.json) + ### Recommended Panels 1. **Request Rate** - Line graph showing HTTP requests/sec @@ -333,6 +367,22 @@ sum(rate(secrets_http_requests_total[5m])) by (method, path) ### Recommended Alerts +### Ownership and Escalation Guidance + +| Alert class | Default severity | Primary owner | Escalate if unresolved | +| --- | --- | --- | --- | +| API availability / 5xx surge | `critical` | Platform/on-call | 10 minutes | +| Token issuance failures | `critical` | Platform + IAM owner | 10 minutes | +| Sustained `429` ratio | `warning` | Service owner + platform | 30 minutes | +| Elevated `403` denied rate | `warning` | Security + service owner | 30 minutes | +| Metrics scrape failures | `warning` | Observability owner | 30 minutes | + +Suggested escalation policy: + +1. Page primary owner immediately for `critical` +2. Notify secondary owner if not acknowledged in 5 minutes +3. Escalate to incident commander when SLA/SLO at risk + #### High Error Rate ```yaml @@ -359,6 +409,37 @@ sum(rate(secrets_http_requests_total[5m])) by (method, path) description: "95th percentile latency is {{ $value }}s" ``` +#### Excessive 429 Ratio + +```yaml +- alert: ExcessiveRateLimit429Ratio + expr: | + ( + sum(rate(secrets_http_requests_total{status_code="429"}[10m])) + / + sum(rate(secrets_http_requests_total[10m])) + ) > 0.05 + for: 10m + labels: + severity: warning + annotations: + summary: "High 429 ratio detected" + description: "More than 5% of requests are being throttled" +``` + +#### 429 Burst On Critical Routes + +```yaml +- alert: RateLimitBurstCriticalRoute + expr: sum(rate(secrets_http_requests_total{status_code="429",path=~"/v1/secrets/.*|/v1/transit/.*"}[5m])) > 2 + for: 5m + labels: + severity: warning + annotations: + summary: "Sustained 429 on secrets/transit routes" + description: "Critical crypto routes are being throttled above threshold" +``` + ## Disabling Metrics To disable metrics collection, set `METRICS_ENABLED=false` in your environment: @@ -405,6 +486,8 @@ When disabled: ## See Also - [Production Deployment](production.md) +- [Operator drills](operator-drills.md) - [Failure Playbooks](failure-playbooks.md) +- [API rate limiting](../api/rate-limiting.md) - [OpenTelemetry Documentation](https://opentelemetry.io/docs/) - [Prometheus Documentation](https://prometheus.io/docs/) diff --git a/docs/operations/operator-drills.md b/docs/operations/operator-drills.md new file mode 100644 index 0000000..69d9c4b --- /dev/null +++ b/docs/operations/operator-drills.md @@ -0,0 +1,45 @@ +# 🧯 Operator Drills (Quarterly) + +> Last updated: 2026-02-19 + +Use this page for quarterly game-day exercises that validate operational readiness. + +## Drill Catalog + +| Drill | Scenario | Primary runbooks | Evidence to collect | +| --- | --- | --- | --- | +| Credential compromise | Client secret leaked | `production.md`, `key-management.md`, `failure-playbooks.md` | revocation timeline, new client IDs, audit evidence | +| Key rotation under load | KEK/master-key rotation while traffic is active | `key-management.md`, `production-rollout.md` | rotation timestamps, restart logs, smoke checks | +| Traffic surge / throttling | Burst traffic causes `429` pressure | `monitoring.md`, `../api/rate-limiting.md` | `429` ratio, retry behavior, threshold decision | +| Database outage | DB unreachable / failover | `failure-playbooks.md`, `production.md` | outage timeline, failover duration, restore checks | + +## Quarterly Execution Template + +1. Pick one drill owner and one incident commander +2. Define blast radius and rollback boundary +3. Execute drill in staging (or prod shadow) with fixed timebox +4. Capture metrics, logs, and runbook deviations +5. Produce remediation actions with owners and due dates + +## Pass Criteria + +- Critical runbooks are executable without undocumented tribal knowledge +- On-call can identify root cause and containment path within target SLA +- Recovery path is validated by health checks and smoke tests +- Postmortem includes at least one docs/process improvement item + +## Evidence Checklist + +- Timeline with UTC timestamps +- Request IDs for key failure and recovery events +- Alert timeline (fired, acknowledged, resolved) +- Commands executed and operator decisions +- Follow-up tickets and target completion dates + +## See also + +- [Production rollout golden path](production-rollout.md) +- [Production deployment guide](production.md) +- [Failure playbooks](failure-playbooks.md) +- [Monitoring](monitoring.md) +- [Troubleshooting](../getting-started/troubleshooting.md) diff --git a/docs/operations/policy-smoke-tests.md b/docs/operations/policy-smoke-tests.md index 1314189..fb07c99 100644 --- a/docs/operations/policy-smoke-tests.md +++ b/docs/operations/policy-smoke-tests.md @@ -161,6 +161,13 @@ Expected: - Assert expected status pairs (allow vs deny) - Run after policy deployment but before traffic cutover +Pre-deploy automation pattern: + +1. Run static policy lint checks (JSON shape, wildcard rules, capability allow-list) +2. Deploy policy to staging +3. Run allow/deny smoke assertions from this page +4. Block production rollout on first mismatch + Optional strict CI mode: ```bash @@ -187,6 +194,24 @@ GitHub Actions example: # Run commands from this page and fail on first mismatch. ``` +Scripted wrapper example: + +```bash +#!/usr/bin/env bash +set -euo pipefail + +echo "[1/3] issuing allow/deny tokens" +# insert token issuance block from this page + +echo "[2/3] running allow/deny assertions" +# insert capability checks from this page + +echo "[3/3] verifying denied audit events" +# insert audit verification block from this page + +echo "policy smoke suite: PASS" +``` + ## See also - [Capability matrix](../api/capability-matrix.md) diff --git a/docs/operations/production-rollout.md b/docs/operations/production-rollout.md new file mode 100644 index 0000000..329968a --- /dev/null +++ b/docs/operations/production-rollout.md @@ -0,0 +1,87 @@ +# 🚀 Production Rollout Golden Path + +> Last updated: 2026-02-19 + +Use this runbook for a standard production rollout with verification and rollback checkpoints. + +## Scope + +- Deploy target: Secrets `v0.5.0` +- Database schema changes: run migrations before traffic cutover +- Crypto bootstrap: ensure initial KEK exists for write/encrypt flows + +## Golden Path + +1. Deploy new image/binary to staging/prod environment +2. Run migrations once per environment +3. Verify KEK presence (create only if first bootstrap) +4. Start/roll API instances with health checks +5. Execute smoke checks and policy checks +6. Shift traffic gradually and monitor 4xx/5xx/latency + +## Copy/Paste Rollout Commands + +```bash +# 1) Pull target release +docker pull allisson/secrets:v0.5.0 + +# 2) Run migrations +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 migrate + +# 3) Bootstrap KEK only for first-time environment setup +docker run --rm --network secrets-net --env-file .env allisson/secrets:v0.5.0 create-kek --algorithm aes-gcm + +# 4) Start API +docker run --rm --name secrets-api --network secrets-net --env-file .env -p 8080:8080 \ + allisson/secrets:v0.5.0 server +``` + +## Verification Gates + +Gate A (before traffic): + +- `GET /health` returns `200` +- `GET /ready` returns `200` +- `POST /v1/token` returns `201` + +Gate B (functional): + +- Secrets flow write/read passes +- Transit encrypt/decrypt passes +- Tokenization flow (if enabled) passes + +Gate C (policy and observability): + +- Expected denied actions produce `403` +- Load behavior returns controlled `429` with `Retry-After` +- Metrics and logs ingest normally + +## Rollback Trigger Conditions + +- Sustained elevated `5xx` +- Widespread auth/token issuance failures +- Migration side effects not recoverable via config changes +- Data integrity concerns + +## Rollback Procedure (Binary/Image) + +1. Freeze rollout and stop new traffic shift +2. Roll API instances back to previous stable image +3. Keep additive migrations applied unless a validated DB rollback plan exists +4. Re-run health + smoke checks on rolled-back version +5. Capture incident notes and remediation actions + +## Post-Rollout Checklist + +- Confirm token expiration behavior matches configured policy +- Confirm CORS behavior matches expected browser/server mode +- Confirm rate limiting thresholds are appropriate for production traffic +- Schedule cleanup routines (`clean-audit-logs`, `clean-expired-tokens` if tokenization enabled) + +## See also + +- [Production deployment guide](production.md) +- [v0.5.0 release notes](../releases/v0.5.0.md) +- [v0.5.0 upgrade guide](../releases/v0.5.0-upgrade.md) +- [Release compatibility matrix](../releases/compatibility-matrix.md) +- [Smoke test guide](../getting-started/smoke-test.md) diff --git a/docs/operations/production.md b/docs/operations/production.md index 0394446..5d08f95 100644 --- a/docs/operations/production.md +++ b/docs/operations/production.md @@ -4,6 +4,8 @@ This guide covers baseline production hardening and operations for Secrets. +**For comprehensive security hardening, see [Security Hardening Guide](security-hardening.md).** + ## 📑 Table of Contents - [1) TLS and Reverse Proxy](#1-tls-and-reverse-proxy) @@ -14,6 +16,7 @@ This guide covers baseline production hardening and operations for Secrets. - [6) Retention Defaults](#6-retention-defaults) - [7) Incident Response Checklist](#7-incident-response-checklist) - [8) Go-Live Checklist](#8-go-live-checklist) +- [9) Golden Path Rollout (Recommended)](#9-golden-path-rollout-recommended) ## 1) TLS and Reverse Proxy @@ -156,13 +159,25 @@ Adjust retention to match your compliance and incident-response requirements. - [ ] Monitoring alerts configured - [ ] Incident response owner and process documented +## 9) Golden Path Rollout (Recommended) + +- Follow [Production rollout golden path](production-rollout.md) for step-by-step deployment, + verification gates, and rollback triggers +- Use [Release compatibility matrix](../releases/compatibility-matrix.md) before planning upgrades +- Keep [v0.5.0 upgrade guide](../releases/v0.5.0-upgrade.md) attached to rollout change tickets + ## See also +- [Security hardening guide](security-hardening.md) - [Key management operations](key-management.md) +- [Production rollout golden path](production-rollout.md) - [Operator runbook index](runbook-index.md) - [Monitoring](monitoring.md) +- [Operator drills (quarterly)](operator-drills.md) - [Policy smoke tests](policy-smoke-tests.md) -- [v0.4.1 release notes](../releases/v0.4.1.md) +- [v0.5.0 release notes](../releases/v0.5.0.md) +- [v0.5.0 upgrade guide](../releases/v0.5.0-upgrade.md) +- [Release compatibility matrix](../releases/compatibility-matrix.md) - [Environment variables](../configuration/environment-variables.md) - [Security model](../concepts/security-model.md) - [Troubleshooting](../getting-started/troubleshooting.md) diff --git a/docs/operations/runbook-index.md b/docs/operations/runbook-index.md index 8bd827c..0f1602d 100644 --- a/docs/operations/runbook-index.md +++ b/docs/operations/runbook-index.md @@ -6,7 +6,10 @@ Use this page as the single entry point for rollout, validation, and incident ru ## Release and Rollout -- [v0.4.1 release notes](../releases/v0.4.1.md) +- [v0.5.0 release notes](../releases/v0.5.0.md) +- [v0.5.0 upgrade guide](../releases/v0.5.0-upgrade.md) +- [Release compatibility matrix](../releases/compatibility-matrix.md) +- [Production rollout golden path](production-rollout.md) - [Production deployment guide](production.md) ## Authorization Policy Validation @@ -20,12 +23,14 @@ Use this page as the single entry point for rollout, validation, and incident ru ## API and Access Verification - [Capability matrix](../api/capability-matrix.md) +- [API error decision matrix](../api/error-decision-matrix.md) - [Authentication API](../api/authentication.md) - [Audit logs API](../api/audit-logs.md) ## Incident and Recovery - [Failure playbooks](failure-playbooks.md) +- [Operator drills (quarterly)](operator-drills.md) - [Troubleshooting](../getting-started/troubleshooting.md) - [Key management operations](key-management.md) diff --git a/docs/operations/security-hardening.md b/docs/operations/security-hardening.md new file mode 100644 index 0000000..8f50eb0 --- /dev/null +++ b/docs/operations/security-hardening.md @@ -0,0 +1,706 @@ +# 🔒 Security Hardening Guide + +> Last updated: 2026-02-19 + +This guide covers comprehensive security hardening for production deployments of Secrets. These measures are essential for protecting sensitive data and maintaining operational security. + +## 📑 Table of Contents + +- [1) Transport Layer Security (TLS/HTTPS)](#1-transport-layer-security-tlshttps) +- [2) Database Security](#2-database-security) +- [3) Network Security](#3-network-security) +- [4) Rate Limiting](#4-rate-limiting) +- [5) Cross-Origin Resource Sharing (CORS)](#5-cross-origin-resource-sharing-cors) +- [6) Authentication and Token Management](#6-authentication-and-token-management) +- [7) Master Key Storage and Management](#7-master-key-storage-and-management) +- [8) Audit Logging and Monitoring](#8-audit-logging-and-monitoring) +- [9) Security Checklist](#9-security-checklist) + +## 1) Transport Layer Security (TLS/HTTPS) + +### Requirements + +Secrets **must** run behind a reverse proxy that handles TLS termination. The application does not provide built-in TLS/HTTPS support by design. + +### Reverse Proxy Configuration + +**Supported reverse proxies:** + +- Nginx +- Envoy +- Traefik +- HAProxy +- Cloud load balancers (AWS ALB/NLB, GCP Load Balancer, Azure Application Gateway) + +**Minimum TLS configuration:** + +```nginx +# Nginx example +server { + listen 443 ssl http2; + server_name secrets.example.com; + + # TLS certificate configuration + ssl_certificate /path/to/cert.pem; + ssl_certificate_key /path/to/key.pem; + + # Modern TLS configuration + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers 'ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384'; + ssl_prefer_server_ciphers off; + + # Security headers + add_header Strict-Transport-Security "max-age=63072000; includeSubDomains; preload" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-Frame-Options "DENY" always; + + # Proxy to Secrets application + location / { + proxy_pass http://127.0.0.1:8080; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Request-Id $request_id; + + # Timeouts and limits + proxy_connect_timeout 5s; + proxy_send_timeout 15s; + proxy_read_timeout 15s; + client_max_body_size 1m; + } +} + +# Redirect HTTP to HTTPS +server { + listen 80; + server_name secrets.example.com; + return 301 https://$server_name$request_uri; +} +``` + +**TLS certificate management:** + +- Use automated certificate renewal (Let's Encrypt, cert-manager, ACM) +- Monitor certificate expiration (alert at 30 days remaining) +- Use strong private key protection (file permissions, HSM, KMS) +- Rotate certificates according to your security policy + +### TLS Best Practices + +1. **Protocol versions:** Use TLS 1.2 and TLS 1.3 only +2. **Cipher suites:** Prefer AEAD ciphers (GCM, ChaCha20-Poly1305) +3. **HSTS:** Enable HTTP Strict Transport Security with long max-age +4. **Certificate validation:** Use valid, non-self-signed certificates in production +5. **Forward secrecy:** Ensure cipher suites support perfect forward secrecy (PFS) + +## 2) Database Security + +### SSL/TLS Configuration + +**PostgreSQL production connection string:** + +```dotenv +# Required for production - encrypted connection +DB_CONNECTION_STRING=postgres://user:password@db.example.com:5432/secrets?sslmode=require + +# Recommended - encrypted connection with certificate verification +DB_CONNECTION_STRING=postgres://user:password@db.example.com:5432/secrets?sslmode=verify-full&sslrootcert=/path/to/ca.crt +``` + +**MySQL production connection string:** + +```dotenv +# Required for production - encrypted connection +DB_CONNECTION_STRING=user:password@tcp(db.example.com:3306)/secrets?tls=true + +# Recommended - encrypted connection with certificate verification +DB_CONNECTION_STRING=user:password@tcp(db.example.com:3306)/secrets?tls=custom +``` + +**SSL mode comparison:** + +| Mode | PostgreSQL | MySQL | Use Case | +| --- | --- | --- | --- | +| No encryption | `sslmode=disable` | `tls=false` | **Development only** | +| Encrypted | `sslmode=require` | `tls=true` | **Minimum for production** | +| Verified | `sslmode=verify-full` | `tls=custom` | **Recommended for production** | + +**Warning:** `sslmode=disable` and `tls=false` transmit credentials and data in plaintext. Never use in production. + +### Database Access Control + +1. **Network isolation:** + - Restrict database access to application servers only + - Use VPC/VNET private subnets + - Configure database firewall rules + - Disable public internet access + +2. **Authentication:** + - Use strong, unique passwords (minimum 32 characters) + - Rotate database credentials periodically + - Use IAM authentication where available (AWS RDS, GCP Cloud SQL) + - Disable default/test accounts + +3. **Authorization:** + - Grant minimum required privileges to application user + - Use separate users for migrations and runtime operations + - Restrict administrative access to trusted networks/users + +4. **Encryption at rest:** + - Enable database encryption at rest (LUKS, dm-crypt, cloud provider encryption) + - Verify encrypted storage for backups + - Use separate encryption keys per environment + +### Database Hardening + +```sql +-- PostgreSQL: Create application user with minimal privileges +CREATE USER secrets_app WITH PASSWORD 'strong_random_password'; +GRANT CONNECT ON DATABASE secrets TO secrets_app; +GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO secrets_app; +GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO secrets_app; + +-- PostgreSQL: Create migration user (separate from runtime user) +CREATE USER secrets_migrate WITH PASSWORD 'different_strong_password'; +GRANT ALL PRIVILEGES ON DATABASE secrets TO secrets_migrate; +``` + +```sql +-- MySQL: Create application user with minimal privileges +CREATE USER 'secrets_app'@'%' IDENTIFIED BY 'strong_random_password'; +GRANT SELECT, INSERT, UPDATE, DELETE ON secrets.* TO 'secrets_app'@'%'; + +-- MySQL: Create migration user (separate from runtime user) +CREATE USER 'secrets_migrate'@'%' IDENTIFIED BY 'different_strong_password'; +GRANT ALL PRIVILEGES ON secrets.* TO 'secrets_migrate'@'%'; +``` + +## 3) Network Security + +### Firewall Rules + +**Minimum ingress rules:** + +| Port | Protocol | Source | Purpose | +| --- | --- | --- | --- | +| 443 | TCP | Internet/internal | HTTPS (reverse proxy) | +| 8080 | TCP | Reverse proxy only | Application server | +| 5432/3306 | TCP | Application servers | Database | + +**Block all other inbound traffic by default.** + +### Metrics Endpoint Protection + +The `/metrics` endpoint exposes operational metrics that may contain sensitive information. + +**Security measures:** + +1. **Network restriction (recommended):** + + ```nginx + # Nginx: Restrict /metrics to internal monitoring network + location /metrics { + allow 10.0.0.0/8; # Internal network + allow 172.16.0.0/12; # Docker networks + deny all; + proxy_pass http://127.0.0.1:8080; + } + ``` + +2. **Authentication (alternative):** + + ```nginx + # Nginx: Require basic auth for /metrics + location /metrics { + auth_basic "Metrics"; + auth_basic_user_file /etc/nginx/.htpasswd; + proxy_pass http://127.0.0.1:8080; + } + ``` + +3. **Disable metrics (if unused):** + + ```dotenv + METRICS_ENABLED=false + ``` + +**Never expose `/metrics` to the public internet.** + +### Internal Service Communication + +1. **Use private networks:** + - Deploy application and database in private subnets + - Use security groups/network policies to restrict traffic + - Avoid exposing internal services to public networks + +2. **Service mesh (optional):** + - Consider Istio, Linkerd, or Consul for mTLS between services + - Enforce zero-trust networking policies + - Enable distributed tracing for audit trails + +## 4) Rate Limiting + +Rate limiting protects against abuse, brute force attacks, and denial of service. + +### CORS Configuration + +```dotenv +# Enable rate limiting (default: true) +RATE_LIMIT_ENABLED=true + +# Requests per second per authenticated client (default: 10.0) +RATE_LIMIT_REQUESTS_PER_SEC=10.0 + +# Burst capacity (default: 20) +RATE_LIMIT_BURST=20 +``` + +### How It Works + +- **Scope:** Per-authenticated-client (not per-IP) +- **Algorithm:** Token bucket with automatic refill +- **Enforcement:** Applied after authentication, before handler execution +- **Response:** HTTP 429 with `Retry-After` header when limit exceeded + +### Recommended Settings + +| Workload | Requests/sec | Burst | Reasoning | +| --- | --- | --- | --- | +| High-volume API | 50.0 | 100 | Batch processing, high throughput | +| Standard application | 10.0 | 20 | **Default - suitable for most use cases** | +| Sensitive operations | 1.0 | 5 | Key rotation, admin operations | + +### Excluded Endpoints + +Rate limiting does **not** apply to: + +- `/health` - Health checks +- `/ready` - Readiness probes +- `/metrics` - Metrics collection +- `/v1/token` - Token issuance (pre-authentication) + +### Tuning Guidance + +**If you observe legitimate 429 responses:** + +1. Review client request patterns in audit logs +2. Identify if requests can be batched or optimized +3. Increase `RATE_LIMIT_REQUESTS_PER_SEC` if sustained higher rates are justified +4. Increase `RATE_LIMIT_BURST` if traffic is bursty but averages within limits + +**For defense-in-depth:** + +- Combine application rate limiting with reverse proxy rate limiting +- Use reverse proxy for IP-based rate limiting +- Use application rate limiting for client-based rate limiting + +## 5) Cross-Origin Resource Sharing (CORS) + +Secrets is designed as a server-to-server API. CORS is **disabled by default** and should remain disabled for most deployments. + +### When to Enable CORS + +Enable CORS **only** if you need browser-based access to the API: + +- Single-page applications (SPA) accessing Secrets directly +- Web-based admin interfaces +- Browser extensions + +**Security note:** Exposing the Secrets API to browsers increases attack surface. Consider using a backend-for-frontend (BFF) pattern instead. + +### Configuration + +```dotenv +# Disable CORS (default: false - recommended) +CORS_ENABLED=false + +# Enable CORS only if required +CORS_ENABLED=true +CORS_ALLOW_ORIGINS=https://app.example.com,https://admin.example.com +``` + +### CORS Best Practices + +1. **Explicit origins only:** + - Never use `*` (wildcard) in production + - List exact origins (protocol + domain + port) + - Validate origins match your application domains + +2. **Minimal origin list:** + - Include only origins that require access + - Remove origins when no longer needed + - Audit origin list quarterly + +3. **Combined with authentication:** + - CORS does not replace authentication + - Always require Bearer token authentication + - Use short-lived tokens (see next section) + +**Example secure configuration:** + +```dotenv +CORS_ENABLED=true +CORS_ALLOW_ORIGINS=https://admin.example.com +AUTH_TOKEN_EXPIRATION_SECONDS=3600 # 1 hour for browser-based access +``` + +## 6) Authentication and Token Management + +### Token Expiration + +#### Default token expiration: 4 hours (14400 seconds) + +```dotenv +# Default (recommended for most deployments) +AUTH_TOKEN_EXPIRATION_SECONDS=14400 + +# High-security environments (1 hour) +AUTH_TOKEN_EXPIRATION_SECONDS=3600 + +# Low-security environments (24 hours) +AUTH_TOKEN_EXPIRATION_SECONDS=86400 +``` + +**Migration note:** Prior to v0.5.0, the default was 24 hours (86400 seconds). Review your token expiration settings and client refresh logic when upgrading. + +### Token Lifecycle Best Practices + +1. **Token rotation:** + - Implement token refresh logic in clients + - Request new tokens before expiration + - Handle 401 responses gracefully + +2. **Token revocation:** + - Deactivate clients immediately upon compromise + - Revoke tokens when client credentials rotate + - Audit active tokens periodically + +3. **Token storage:** + - Never log tokens in plaintext + - Store tokens securely in client applications + - Use environment variables or secrets managers + - Never commit tokens to source control + +### Client Management + +1. **Least privilege policies:** + - Grant minimum required capabilities per client + - Use path restrictions to limit access scope + - Review and prune unused policies quarterly + +2. **Client credentials:** + - Generate strong random secrets (use `/v1/token` endpoint) + - Rotate client credentials on personnel changes + - Use separate clients per application/environment + +3. **Client lifecycle:** + - Deactivate unused clients immediately + - Monitor client usage via audit logs + - Delete obsolete clients after deactivation period + +**Example policy (least privilege):** + +```json +{ + "policies": [ + { + "path": "/v1/secrets/app/production/*", + "capabilities": ["read", "write"] + }, + { + "path": "/v1/transit/keys/payment/encrypt", + "capabilities": ["encrypt"] + } + ] +} +``` + +## 7) Master Key Storage and Management + +Master keys are the root of trust in the envelope encryption hierarchy. Protect them accordingly. + +### Storage Requirements + +**Never:** + +- Commit master keys to source control +- Include master keys in container images +- Store master keys in application configuration files +- Share master keys across environments +- Log master keys in plaintext + +**Always:** + +- Use environment variables for runtime injection +- Store master keys in secrets management systems +- Use distinct master keys per environment +- Encrypt master keys at rest +- Audit master key access + +### Recommended Storage Solutions + +| Solution | Use Case | Notes | +| --- | --- | --- | +| AWS Secrets Manager | AWS deployments | Use IAM roles for access control | +| GCP Secret Manager | GCP deployments | Use workload identity for access | +| Azure Key Vault | Azure deployments | Use managed identities for access | +| HashiCorp Vault | Multi-cloud/on-prem | Use AppRole or Kubernetes auth | +| Kubernetes Secrets | Kubernetes clusters | Enable encryption at rest, use external secrets operator | + +### Master Key Rotation + +1. **Generate new master key:** + + ```bash + ./bin/app create-master-key --id master-key-2026-02 + ``` + +2. **Add new key to master key chain:** + + ```dotenv + MASTER_KEYS=master-key-2026-01:OLD_BASE64_KEY,master-key-2026-02:NEW_BASE64_KEY + ACTIVE_MASTER_KEY_ID=master-key-2026-02 + ``` + +3. **Restart all application servers:** + - Use rolling restart to avoid downtime + - Verify `/ready` endpoint after each restart + - Confirm new KEKs use new master key ID + +4. **Rotate KEKs encrypted with old master key:** + + ```bash + ./bin/app rotate-kek --algorithm aes-gcm + ``` + +5. **Remove old master key after migration period:** + + ```dotenv + MASTER_KEYS=master-key-2026-02:NEW_BASE64_KEY + ACTIVE_MASTER_KEY_ID=master-key-2026-02 + ``` + +**Recommended rotation schedule:** + +- Routine rotation: Annually or per organizational policy +- Immediate rotation: Upon suspected compromise +- Audit rotation: Quarterly review of master key usage + +### Master Key Generation + +**Use the built-in generator:** + +```bash +# Generate 32-byte (256-bit) master key +./bin/app create-master-key --id default + +# Output format +MASTER_KEYS=default:A1B2C3D4E5F6G7H8I9J0K1L2M3N4O5P6Q7R8S9T0U1V2W3X4Y5Z6== +``` + +**Key properties:** + +- Exactly 32 bytes (256 bits) of cryptographically secure random data +- Base64-encoded for safe environment variable storage +- Generated using `crypto/rand` (CSPRNG) + +## 8) Audit Logging and Monitoring + +### Audit Log Configuration + +Audit logs record all API operations for security analysis and compliance. + +**Coverage:** + +- All authenticated requests (success and failure) +- Client identity and request path +- Timestamp, method, status code, duration +- Capability enforcement results + +### Audit Log Retention + +#### Recommended retention: 90 days + +```bash +# Monthly cleanup routine + +# 1) Preview audit logs older than 90 days +./bin/app clean-audit-logs --days 90 --dry-run --format json + +# 2) Execute deletion +./bin/app clean-audit-logs --days 90 --format text +``` + +Adjust retention based on: + +- Compliance requirements (SOC 2, PCI-DSS, HIPAA) +- Incident response window +- Storage capacity +- Forensic analysis needs + +### Security Monitoring + +**Alert on:** + +1. **Authentication failures:** + - Repeated 401 responses from same client/IP + - Invalid token attempts + - Threshold: 5 failures in 5 minutes + +2. **Authorization failures:** + - Repeated 403 responses from same client + - Capability denied patterns + - Threshold: 10 denials in 10 minutes + +3. **Rate limiting:** + - Frequent 429 responses + - Potential abuse or misconfigured clients + - Threshold: 100 rate limits in 1 hour + +4. **Anomalous patterns:** + - Client accessing new paths after long idle period + - Unusual request volume from single client + - Access outside normal business hours + +5. **System health:** + - Elevated error rates (5xx responses) + - Database connection failures + - Slow response times (p95 > 1s) + +### Metrics Collection + +**Key metrics to track:** + +```promql +# Request rate by endpoint and status +rate(secrets_http_requests_total[5m]) + +# Request latency percentiles +histogram_quantile(0.95, secrets_http_request_duration_seconds) + +# Error rate +rate(secrets_http_requests_total{status=~"5.."}[5m]) + +# Authentication failures +rate(secrets_http_requests_total{status="401"}[5m]) + +# Authorization failures +rate(secrets_http_requests_total{status="403"}[5m]) + +# Rate limit hits +rate(secrets_http_requests_total{status="429"}[5m]) +``` + +**Example Prometheus alerts:** + +```yaml +groups: + - name: secrets_security + rules: + - alert: HighAuthFailureRate + expr: rate(secrets_http_requests_total{status="401"}[5m]) > 0.1 + for: 5m + annotations: + summary: "High authentication failure rate detected" + + - alert: HighAuthzFailureRate + expr: rate(secrets_http_requests_total{status="403"}[5m]) > 0.2 + for: 10m + annotations: + summary: "High authorization failure rate detected" + + - alert: RateLimitExceeded + expr: rate(secrets_http_requests_total{status="429"}[5m]) > 1 + for: 5m + annotations: + summary: "Clients hitting rate limits frequently" +``` + +### Log Forwarding + +**Forward logs to SIEM/log aggregation:** + +- Splunk, Elasticsearch, Datadog, CloudWatch Logs +- Centralize logs from all application instances +- Correlate with network and infrastructure logs +- Enable long-term retention for compliance + +**Structured logging:** + +- Logs are JSON-formatted with consistent fields +- Request ID (`request_id`) for distributed tracing +- Client ID for cross-referencing with audit logs +- Timestamp in UTC for accurate correlation + +## 9) Security Checklist + +Use this checklist for production deployment validation. + +### Transport Security + +- [ ] HTTPS enforced via reverse proxy +- [ ] TLS 1.2+ configured +- [ ] HSTS header enabled +- [ ] HTTP to HTTPS redirect active +- [ ] Valid TLS certificate installed +- [ ] Certificate expiration monitoring configured + +### Database Security + +- [ ] Database SSL/TLS enabled (`sslmode=require` or `tls=true`) +- [ ] Database credentials rotated and stored securely +- [ ] Database access restricted to application network +- [ ] Encryption at rest enabled +- [ ] Database backups encrypted +- [ ] Minimal database privileges granted + +### Network Security + +- [ ] Firewall rules restrict inbound traffic +- [ ] `/metrics` endpoint not publicly accessible +- [ ] Database port not exposed to internet +- [ ] Application deployed in private subnet +- [ ] Security groups/network policies configured + +### Authentication and Authorization + +- [ ] Token expiration configured appropriately +- [ ] Client policies follow least privilege principle +- [ ] Default/test clients disabled or deleted +- [ ] Client credentials stored securely +- [ ] Rate limiting enabled +- [ ] CORS disabled (or explicitly required and configured) + +### Master Key Management + +- [ ] Master keys stored in secrets manager (not source control) +- [ ] Distinct master keys per environment +- [ ] Master key access audited +- [ ] Master key rotation schedule documented +- [ ] `MASTER_KEYS` not in container images + +### Monitoring and Logging + +- [ ] Audit log retention policy defined +- [ ] Security alerts configured +- [ ] Metrics collection enabled +- [ ] Log forwarding to SIEM configured +- [ ] Incident response runbook documented + +### Operational Security + +- [ ] Backup and restore tested +- [ ] Key rotation procedure documented and tested +- [ ] Incident response plan defined +- [ ] On-call contacts documented +- [ ] Security review scheduled (quarterly) + +## See also + +- [Production deployment guide](production.md) +- [Environment variables](../configuration/environment-variables.md) +- [Security model](../concepts/security-model.md) +- [Monitoring](monitoring.md) +- [Policy management](../api/policies.md) +- [Troubleshooting](../getting-started/troubleshooting.md) diff --git a/docs/releases/compatibility-matrix.md b/docs/releases/compatibility-matrix.md new file mode 100644 index 0000000..a7a64f5 --- /dev/null +++ b/docs/releases/compatibility-matrix.md @@ -0,0 +1,33 @@ +# 🔁 Release Compatibility Matrix + +> Last updated: 2026-02-19 + +Use this page to understand upgrade impact between recent releases. + +## Matrix + +| From -> To | Schema migration impact | Runtime/default changes | Required operator action | +| --- | --- | --- | --- | +| `v0.4.0 -> v0.4.1` | No new mandatory migration beyond v0.4.0 baseline | Policy matcher bugfix and docs alignment | Update image tag and validate policy wildcard behavior | +| `v0.4.x -> v0.5.0` | No new destructive schema migration required for core features | Token TTL default `24h -> 4h`; rate limiting enabled by default; CORS config introduced (disabled by default) | Set explicit `AUTH_TOKEN_EXPIRATION_SECONDS`, review `RATE_LIMIT_*`, configure `CORS_*` only if browser access is required | + +## Upgrade verification by target + +For `v0.5.0`: + +1. `GET /health` and `GET /ready` pass +2. `POST /v1/token` issues token with expected expiration behavior +3. Protected endpoints behave correctly under normal load and return controlled `429` with `Retry-After` under bursts +4. CORS behavior matches deployment mode (server-to-server vs browser) + +## Notes + +- Keep migrations additive and avoid destructive rollback in production unless fully validated +- Pin release tags in automation for reproducible rollouts +- Preserve historical release notes; promote only the current release in operator navigation + +## See also + +- [v0.5.0 release notes](v0.5.0.md) +- [v0.5.0 upgrade guide](v0.5.0-upgrade.md) +- [Production rollout golden path](../operations/production-rollout.md) diff --git a/docs/releases/v0.5.0-upgrade.md b/docs/releases/v0.5.0-upgrade.md new file mode 100644 index 0000000..b8ab422 --- /dev/null +++ b/docs/releases/v0.5.0-upgrade.md @@ -0,0 +1,68 @@ +# ⬆️ Upgrade Guide: v0.4.x -> v0.5.0 + +> Last updated: 2026-02-19 + +Use this guide to safely upgrade from `v0.4.x` to `v0.5.0`. + +## What changed + +- Default token expiration is now shorter (`24h` -> `4h`) +- Per-client rate limiting is enabled by default +- CORS is configurable and remains disabled by default +- Security hardening guidance expanded for production deployments + +## Env diff (copy/paste) + +```diff +- AUTH_TOKEN_EXPIRATION_SECONDS=86400 ++ AUTH_TOKEN_EXPIRATION_SECONDS=14400 + ++ RATE_LIMIT_ENABLED=true ++ RATE_LIMIT_REQUESTS_PER_SEC=10.0 ++ RATE_LIMIT_BURST=20 + ++ CORS_ENABLED=false ++ CORS_ALLOW_ORIGINS= +``` + +If your clients rely on 24-hour tokens, keep explicit configuration: + +```dotenv +AUTH_TOKEN_EXPIRATION_SECONDS=86400 +``` + +## Upgrade steps + +1. Update image/binary to `v0.5.0` +2. Review and set explicit `AUTH_TOKEN_EXPIRATION_SECONDS` +3. Add `RATE_LIMIT_*` variables with values matching your traffic profile +4. Keep `CORS_ENABLED=false` unless browser-based access is required +5. Restart API servers with updated environment + +## Post-upgrade verification + +1. Health checks pass: `GET /health`, `GET /ready` +2. Token issuance works and expiration matches expected TTL +3. Authenticated endpoint rate limit returns `429` with `Retry-After` when exceeded +4. Normal traffic does not hit `429` unexpectedly +5. CORS behavior is correct for your deployment mode + +## Quick checks + +```bash +# token issue still works +curl -i -X POST http://localhost:8080/v1/token \ + -H "Content-Type: application/json" \ + -d '{"client_id":"","client_secret":""}' + +# protected route may return 200/403/429 depending on policy and load +curl -i http://localhost:8080/v1/clients \ + -H "Authorization: Bearer " +``` + +## See also + +- [v0.5.0 release notes](v0.5.0.md) +- [Release compatibility matrix](compatibility-matrix.md) +- [Environment variables](../configuration/environment-variables.md) +- [Security hardening guide](../operations/security-hardening.md) diff --git a/docs/releases/v0.5.0.md b/docs/releases/v0.5.0.md new file mode 100644 index 0000000..0403419 --- /dev/null +++ b/docs/releases/v0.5.0.md @@ -0,0 +1,75 @@ +# 🚀 Secrets v0.5.0 Release Notes + +> Release date: 2026-02-19 + +This release strengthens default runtime security by adding authenticated request rate limiting, +introducing configurable CORS controls, tightening default token lifetime, and expanding +production security documentation. + +## Highlights + +- Added per-client rate limiting for authenticated API routes +- Added configurable CORS middleware with secure defaults +- Reduced default token expiration from 24 hours to 4 hours +- Added comprehensive production security hardening guide + +## Runtime Changes + +- New rate limiting settings: + - `RATE_LIMIT_ENABLED` (default `true`) + - `RATE_LIMIT_REQUESTS_PER_SEC` (default `10.0`) + - `RATE_LIMIT_BURST` (default `20`) +- New CORS settings: + - `CORS_ENABLED` (default `false`) + - `CORS_ALLOW_ORIGINS` (default empty) +- Authenticated endpoints now return `429 Too Many Requests` when limits are exceeded and include + `Retry-After` response header + +## Breaking / Behavior Changes + +- **Default token expiration changed**: + - Previous default: `AUTH_TOKEN_EXPIRATION_SECONDS=86400` (24h) + - New default: `AUTH_TOKEN_EXPIRATION_SECONDS=14400` (4h) + +If your clients expected 24-hour tokens, explicitly set `AUTH_TOKEN_EXPIRATION_SECONDS=86400` +and verify refresh behavior. + +## Upgrade Notes + +1. Deploy binaries/images with `v0.5.0` +2. Review and set explicit token expiration for your environment +3. Verify rate limit thresholds for client traffic profile +4. Keep CORS disabled unless browser-based access is required +5. Restrict `/metrics` exposure to internal networks only + +## Operator Verification Checklist + +1. Confirm health endpoints: `GET /health`, `GET /ready` +2. Validate token issuance and expiration expectations after upgrade +3. Confirm authenticated API traffic is not unintentionally rate limited +4. Validate `429` behavior and `Retry-After` header with controlled load test +5. Confirm CORS behavior matches policy (disabled by default, explicit origins only when enabled) + +## Security Guidance + +- Use TLS termination at reverse proxy/load balancer +- Use database TLS in production (`sslmode=require` or stronger / `tls=true` or stronger) +- Store master keys in a dedicated secrets manager +- Review least-privilege client policies and rotate credentials regularly + +## Documentation Updates + +- Added [Security hardening guide](../operations/security-hardening.md) +- Updated [Environment variables](../configuration/environment-variables.md) with rate limiting, + CORS, and token expiration migration notes +- Updated [Production deployment guide](../operations/production.md) with security hardening links +- Added root-level changelog entry for `0.5.0` + +## See also + +- [v0.5.0 upgrade guide](v0.5.0-upgrade.md) +- [Release compatibility matrix](compatibility-matrix.md) +- [Security hardening guide](../operations/security-hardening.md) +- [Environment variables](../configuration/environment-variables.md) +- [Production deployment guide](../operations/production.md) +- [API compatibility policy](../api/versioning-policy.md) diff --git a/docs/tools/check_docs_metadata.py b/docs/tools/check_docs_metadata.py index da1197d..8bf35eb 100644 --- a/docs/tools/check_docs_metadata.py +++ b/docs/tools/check_docs_metadata.py @@ -22,6 +22,14 @@ def main() -> None: require_contains(Path("README.md"), current_release) require_contains(Path("docs/README.md"), current_release) + # Ensure current release docs links are present in key navigation pages. + current_release_note = f"docs/releases/{current_release}.md" + require_contains(Path("README.md"), current_release_note) + require_contains(Path("docs/README.md"), f"releases/{current_release}.md") + require_contains( + Path("docs/operations/runbook-index.md"), f"../releases/{current_release}.md" + ) + openapi = Path("docs/openapi.yaml").read_text(encoding="utf-8") if f"version: {api_version}" not in openapi: raise ValueError( diff --git a/docs/tools/check_release_docs_links.py b/docs/tools/check_release_docs_links.py new file mode 100644 index 0000000..8a887a8 --- /dev/null +++ b/docs/tools/check_release_docs_links.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +import os +import re +import subprocess +from pathlib import Path + + +RELEASE_RE = re.compile(r"^docs/releases/(v\d+\.\d+\.\d+)\.md$") + + +def run(cmd: list[str]) -> str: + out = subprocess.check_output(cmd, text=True) + return out.strip() + + +def changed_added_release_notes(base_sha: str, head_sha: str) -> list[str]: + output = run(["git", "diff", "--name-status", base_sha, head_sha]) + versions: list[str] = [] + if not output: + return versions + + for line in output.splitlines(): + parts = line.split("\t", 1) + if len(parts) != 2: + continue + status, path = parts + if status != "A": + continue + match = RELEASE_RE.match(path) + if not match: + continue + versions.append(match.group(1)) + return versions + + +def require_contains(path: Path, needle: str) -> None: + content = path.read_text(encoding="utf-8") + if needle not in content: + raise ValueError(f"{path} missing required link/text: {needle}") + + +def validate_release(version: str) -> None: + release_path = Path(f"docs/releases/{version}.md") + upgrade_path = Path(f"docs/releases/{version}-upgrade.md") + compatibility_path = Path("docs/releases/compatibility-matrix.md") + + if not release_path.exists(): + raise ValueError(f"Missing release notes file: {release_path}") + if not upgrade_path.exists(): + raise ValueError(f"Missing upgrade guide for new release notes: {upgrade_path}") + + require_contains(release_path, f"{version}-upgrade.md") + require_contains(release_path, "compatibility-matrix.md") + require_contains(compatibility_path, version) + + # Ensure entry-point navigation includes both links for this release. + require_contains(Path("docs/README.md"), f"releases/{version}.md") + require_contains(Path("docs/README.md"), f"releases/{version}-upgrade.md") + require_contains( + Path("docs/operations/runbook-index.md"), f"../releases/{version}.md" + ) + require_contains( + Path("docs/operations/runbook-index.md"), + f"../releases/{version}-upgrade.md", + ) + + +def main() -> None: + if os.getenv("GITHUB_EVENT_NAME", "") != "pull_request": + print("release docs guard skipped (non-PR)") + return + + base_sha = os.getenv("PR_BASE_SHA", "").strip() + head_sha = os.getenv("PR_HEAD_SHA", "").strip() + if not base_sha or not head_sha: + raise ValueError( + "PR_BASE_SHA and PR_HEAD_SHA must be set for release docs guard" + ) + + versions = changed_added_release_notes(base_sha, head_sha) + if not versions: + print("release docs guard passed (no new release note files)") + return + + for version in versions: + validate_release(version) + + print("release docs guard passed") + + +if __name__ == "__main__": + main() diff --git a/go.mod b/go.mod index badb122..e6d0ac5 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.25 require ( github.com/allisson/go-env v0.6.0 github.com/allisson/go-pwdhash v0.3.1 + github.com/gin-contrib/cors v1.7.6 github.com/gin-contrib/requestid v1.0.5 github.com/gin-gonic/gin v1.11.0 github.com/go-sql-driver/mysql v1.9.3 @@ -21,6 +22,7 @@ require ( go.opentelemetry.io/otel/metric v1.40.0 go.opentelemetry.io/otel/sdk/metric v1.40.0 golang.org/x/crypto v0.48.0 + golang.org/x/time v0.14.0 ) require ( @@ -32,7 +34,7 @@ require ( github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudwego/base64x v0.1.6 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect - github.com/gabriel-vasile/mimetype v1.4.8 // indirect + github.com/gabriel-vasile/mimetype v1.4.9 // indirect github.com/gin-contrib/sse v1.1.0 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect diff --git a/go.sum b/go.sum index 4ab26cd..c2dc7c6 100644 --- a/go.sum +++ b/go.sum @@ -42,8 +42,10 @@ github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4 github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM= -github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8= +github.com/gabriel-vasile/mimetype v1.4.9 h1:5k+WDwEsD9eTLL8Tz3L0VnmVh9QxGjRmjBvAG7U/oYY= +github.com/gabriel-vasile/mimetype v1.4.9/go.mod h1:WnSQhFKJuBlRyLiKohA/2DtIlPFAbguNaG7QCHcyGok= +github.com/gin-contrib/cors v1.7.6 h1:3gQ8GMzs1Ylpf70y8bMw4fVpycXIeX1ZemuSQIsnQQY= +github.com/gin-contrib/cors v1.7.6/go.mod h1:Ulcl+xN4jel9t1Ry8vqph23a60FwH9xVLd+3ykmTjOk= github.com/gin-contrib/requestid v1.0.5 h1:oye4jWPpTmJHLepQWzb36lFZkKzl+gf8R0K/ButxJUY= github.com/gin-contrib/requestid v1.0.5/go.mod h1:vkfMTJPx8IBXnavnuQSM9j5isaQfNja1f1hTB516ilU= github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w= @@ -195,6 +197,8 @@ golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc= golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= diff --git a/internal/app/di.go b/internal/app/di.go index 4ce19bc..3a56c81 100644 --- a/internal/app/di.go +++ b/internal/app/di.go @@ -767,6 +767,7 @@ func (c *Container) initHTTPServer() (*http.Server, error) { // Setup router with dependencies server.SetupRouter( + c.config, clientHandler, tokenHandler, auditLogHandler, diff --git a/internal/auth/http/rate_limit_middleware.go b/internal/auth/http/rate_limit_middleware.go new file mode 100644 index 0000000..e9ddbf1 --- /dev/null +++ b/internal/auth/http/rate_limit_middleware.go @@ -0,0 +1,144 @@ +// Package http provides HTTP middleware and utilities for authentication. +package http + +import ( + "context" + "fmt" + "log/slog" + "net/http" + "sync" + "time" + + "github.com/gin-gonic/gin" + "github.com/google/uuid" + "golang.org/x/time/rate" + + apperrors "github.com/allisson/secrets/internal/errors" + "github.com/allisson/secrets/internal/httputil" +) + +// rateLimiterStore holds per-client rate limiters with automatic cleanup. +type rateLimiterStore struct { + limiters sync.Map // map[uuid.UUID]*rateLimiterEntry + rps float64 + burst int +} + +// rateLimiterEntry holds a rate limiter and last access time for cleanup. +type rateLimiterEntry struct { + limiter *rate.Limiter + lastAccess time.Time + mu sync.Mutex +} + +// RateLimitMiddleware enforces per-client rate limiting on authenticated requests. +// +// MUST be used after AuthenticationMiddleware (requires authenticated client in context). +// Uses token bucket algorithm via golang.org/x/time/rate. Each client gets independent +// rate limiter based on their client ID. +// +// Configuration: +// - rps: Requests per second allowed per client +// - burst: Maximum burst capacity for temporary spikes +// +// Returns: +// - 429 Too Many Requests: Rate limit exceeded (includes Retry-After header) +// - Continues: Request allowed within rate limit +func RateLimitMiddleware(rps float64, burst int, logger *slog.Logger) gin.HandlerFunc { + store := &rateLimiterStore{ + rps: rps, + burst: burst, + } + + // Start cleanup goroutine for stale limiters (every 5 minutes) + go store.cleanupStale(context.Background(), 5*time.Minute) + + return func(c *gin.Context) { + // Get authenticated client from context + client, ok := GetClient(c.Request.Context()) + if !ok || client == nil { + // Should never happen - authentication middleware should have caught this + logger.Error("rate limit middleware: no authenticated client in context") + httputil.HandleErrorGin(c, apperrors.ErrUnauthorized, logger) + c.Abort() + return + } + + // Get or create rate limiter for this client + limiter := store.getLimiter(client.ID) + + // Check if request is allowed + if !limiter.Allow() { + // Calculate retry-after delay + reservation := limiter.Reserve() + retryAfter := int(reservation.Delay().Seconds()) + reservation.Cancel() // Cancel the reservation + + logger.Debug("rate limit exceeded", + slog.String("client_id", client.ID.String()), + slog.Int("retry_after", retryAfter)) + + c.Header("Retry-After", fmt.Sprintf("%d", retryAfter)) + c.JSON(http.StatusTooManyRequests, gin.H{ + "error": "rate_limit_exceeded", + "message": "Too many requests. Please retry after the specified delay.", + }) + c.Abort() + return + } + + // Request allowed, continue + c.Next() + } +} + +// getLimiter retrieves or creates a rate limiter for a client. +func (s *rateLimiterStore) getLimiter(clientID uuid.UUID) *rate.Limiter { + // Try to load existing limiter + if val, ok := s.limiters.Load(clientID); ok { + entry := val.(*rateLimiterEntry) + entry.mu.Lock() + entry.lastAccess = time.Now() + entry.mu.Unlock() + return entry.limiter + } + + // Create new limiter + limiter := rate.NewLimiter(rate.Limit(s.rps), s.burst) + entry := &rateLimiterEntry{ + limiter: limiter, + lastAccess: time.Now(), + } + + // Store and return + s.limiters.Store(clientID, entry) + return limiter +} + +// cleanupStale removes rate limiters that haven't been accessed recently. +// Runs periodically to prevent unbounded memory growth. +func (s *rateLimiterStore) cleanupStale(ctx context.Context, interval time.Duration) { + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + // Remove limiters not accessed in last hour + threshold := time.Now().Add(-1 * time.Hour) + s.limiters.Range(func(key, value interface{}) bool { + entry := value.(*rateLimiterEntry) + entry.mu.Lock() + shouldDelete := entry.lastAccess.Before(threshold) + entry.mu.Unlock() + + if shouldDelete { + s.limiters.Delete(key) + } + return true + }) + } + } +} diff --git a/internal/auth/http/rate_limit_middleware_test.go b/internal/auth/http/rate_limit_middleware_test.go new file mode 100644 index 0000000..6ead654 --- /dev/null +++ b/internal/auth/http/rate_limit_middleware_test.go @@ -0,0 +1,278 @@ +package http + +import ( + "log/slog" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/gin-gonic/gin" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + + authDomain "github.com/allisson/secrets/internal/auth/domain" +) + +func TestRateLimitMiddleware_AllowsRequestsWithinLimit(t *testing.T) { + gin.SetMode(gin.TestMode) + + // Create test client + client := &authDomain.Client{ + ID: uuid.Must(uuid.NewV7()), + Name: "test-client", + } + + // Create middleware with generous limits + logger := slog.Default() + middleware := RateLimitMiddleware(10.0, 20, logger) + + // Create test router + router := gin.New() + router.Use(func(c *gin.Context) { + // Add client to context + ctx := WithClient(c.Request.Context(), client) + c.Request = c.Request.WithContext(ctx) + c.Next() + }) + router.Use(middleware) + router.GET("/test", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"status": "ok"}) + }) + + // Send requests within limit + for i := 0; i < 5; i++ { + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusOK, w.Code) + } +} + +func TestRateLimitMiddleware_BlocksRequestsExceedingLimit(t *testing.T) { + gin.SetMode(gin.TestMode) + + // Create test client + client := &authDomain.Client{ + ID: uuid.Must(uuid.NewV7()), + Name: "test-client", + } + + // Create middleware with very low limits + logger := slog.Default() + middleware := RateLimitMiddleware(1.0, 2, logger) + + // Create test router + router := gin.New() + router.Use(func(c *gin.Context) { + ctx := WithClient(c.Request.Context(), client) + c.Request = c.Request.WithContext(ctx) + c.Next() + }) + router.Use(middleware) + router.GET("/test", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"status": "ok"}) + }) + + // Send requests up to burst capacity (should succeed) + for i := 0; i < 2; i++ { + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) + } + + // Next request should be rate limited + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusTooManyRequests, w.Code) + assert.Contains(t, w.Header().Get("Retry-After"), "") +} + +func TestRateLimitMiddleware_Returns429WithRetryAfterHeader(t *testing.T) { + gin.SetMode(gin.TestMode) + + client := &authDomain.Client{ + ID: uuid.Must(uuid.NewV7()), + Name: "test-client", + } + + logger := slog.Default() + middleware := RateLimitMiddleware(0.5, 1, logger) + + router := gin.New() + router.Use(func(c *gin.Context) { + ctx := WithClient(c.Request.Context(), client) + c.Request = c.Request.WithContext(ctx) + c.Next() + }) + router.Use(middleware) + router.GET("/test", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"status": "ok"}) + }) + + // Consume burst + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) + + // Next request should be rate limited with Retry-After header + w = httptest.NewRecorder() + req = httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusTooManyRequests, w.Code) + assert.NotEmpty(t, w.Header().Get("Retry-After")) +} + +func TestRateLimitMiddleware_IndependentLimitsPerClient(t *testing.T) { + gin.SetMode(gin.TestMode) + + // Create two different clients + client1 := &authDomain.Client{ + ID: uuid.Must(uuid.NewV7()), + Name: "client-1", + } + client2 := &authDomain.Client{ + ID: uuid.Must(uuid.NewV7()), + Name: "client-2", + } + + logger := slog.Default() + middleware := RateLimitMiddleware(1.0, 1, logger) + + router := gin.New() + router.Use(middleware) + router.GET("/test", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"status": "ok"}) + }) + + // Client 1 consumes its limit + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/test", nil) + ctx := WithClient(req.Context(), client1) + req = req.WithContext(ctx) + + router.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) + + // Client 1 is now rate limited + w = httptest.NewRecorder() + req = httptest.NewRequest(http.MethodGet, "/test", nil) + ctx = WithClient(req.Context(), client1) + req = req.WithContext(ctx) + router.ServeHTTP(w, req) + assert.Equal(t, http.StatusTooManyRequests, w.Code) + + // Client 2 should still have its own independent limit + w = httptest.NewRecorder() + req = httptest.NewRequest(http.MethodGet, "/test", nil) + ctx = WithClient(req.Context(), client2) + req = req.WithContext(ctx) + router.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) +} + +func TestRateLimitMiddleware_BurstCapacityWorks(t *testing.T) { + gin.SetMode(gin.TestMode) + + client := &authDomain.Client{ + ID: uuid.Must(uuid.NewV7()), + Name: "test-client", + } + + logger := slog.Default() + // Low rate but higher burst + middleware := RateLimitMiddleware(1.0, 5, logger) + + router := gin.New() + router.Use(func(c *gin.Context) { + ctx := WithClient(c.Request.Context(), client) + c.Request = c.Request.WithContext(ctx) + c.Next() + }) + router.Use(middleware) + router.GET("/test", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"status": "ok"}) + }) + + // Should be able to burst up to 5 requests + for i := 0; i < 5; i++ { + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) + } + + // 6th request should be rate limited + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, req) + assert.Equal(t, http.StatusTooManyRequests, w.Code) +} + +func TestRateLimitMiddleware_RequiresAuthentication(t *testing.T) { + gin.SetMode(gin.TestMode) + + logger := slog.Default() + middleware := RateLimitMiddleware(10.0, 20, logger) + + router := gin.New() + router.Use(middleware) + router.GET("/test", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"status": "ok"}) + }) + + // Request without authenticated client should fail + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/test", nil) + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusUnauthorized, w.Code) +} + +func TestRateLimiterStore_CleanupStaleEntries(t *testing.T) { + store := &rateLimiterStore{ + rps: 10.0, + burst: 20, + } + + // Create a limiter entry + client1 := uuid.Must(uuid.NewV7()) + limiter1 := store.getLimiter(client1) + assert.NotNil(t, limiter1) + + // Verify it's stored + _, ok := store.limiters.Load(client1) + assert.True(t, ok) + + // Manually set last access to old time + if val, ok := store.limiters.Load(client1); ok { + entry := val.(*rateLimiterEntry) + entry.mu.Lock() + entry.lastAccess = time.Now().Add(-2 * time.Hour) + entry.mu.Unlock() + } + + // Run cleanup manually + threshold := time.Now().Add(-1 * time.Hour) + store.limiters.Range(func(key, value interface{}) bool { + entry := value.(*rateLimiterEntry) + entry.mu.Lock() + shouldDelete := entry.lastAccess.Before(threshold) + entry.mu.Unlock() + + if shouldDelete { + store.limiters.Delete(key) + } + return true + }) + + // Verify entry was cleaned up + _, ok = store.limiters.Load(client1) + assert.False(t, ok) +} diff --git a/internal/config/config.go b/internal/config/config.go index d259a44..15d944d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -29,6 +29,15 @@ type Config struct { // Auth AuthTokenExpiration time.Duration + // Rate Limiting + RateLimitEnabled bool + RateLimitRequestsPerSec float64 + RateLimitBurst int + + // CORS + CORSEnabled bool + CORSAllowOrigins string + // Metrics MetricsEnabled bool MetricsNamespace string @@ -58,7 +67,16 @@ func Load() *Config { LogLevel: env.GetString("LOG_LEVEL", "info"), // Auth - AuthTokenExpiration: env.GetDuration("AUTH_TOKEN_EXPIRATION_SECONDS", 86400, time.Second), + AuthTokenExpiration: env.GetDuration("AUTH_TOKEN_EXPIRATION_SECONDS", 14400, time.Second), + + // Rate Limiting + RateLimitEnabled: env.GetBool("RATE_LIMIT_ENABLED", true), + RateLimitRequestsPerSec: env.GetFloat64("RATE_LIMIT_REQUESTS_PER_SEC", 10.0), + RateLimitBurst: env.GetInt("RATE_LIMIT_BURST", 20), + + // CORS + CORSEnabled: env.GetBool("CORS_ENABLED", false), + CORSAllowOrigins: env.GetString("CORS_ALLOW_ORIGINS", ""), // Metrics MetricsEnabled: env.GetBool("METRICS_ENABLED", true), diff --git a/internal/config/config_test.go b/internal/config/config_test.go index e813c94..a7beba0 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -31,7 +31,14 @@ func TestLoad(t *testing.T) { assert.Equal(t, 5, cfg.DBMaxIdleConnections) assert.Equal(t, 5*time.Minute, cfg.DBConnMaxLifetime) assert.Equal(t, "info", cfg.LogLevel) - assert.Equal(t, 86400*time.Second, cfg.AuthTokenExpiration) + assert.Equal(t, 14400*time.Second, cfg.AuthTokenExpiration) + assert.Equal(t, true, cfg.RateLimitEnabled) + assert.Equal(t, 10.0, cfg.RateLimitRequestsPerSec) + assert.Equal(t, 20, cfg.RateLimitBurst) + assert.Equal(t, false, cfg.CORSEnabled) + assert.Equal(t, "", cfg.CORSAllowOrigins) + assert.Equal(t, true, cfg.MetricsEnabled) + assert.Equal(t, "secrets", cfg.MetricsNamespace) }, }, { @@ -80,6 +87,41 @@ func TestLoad(t *testing.T) { assert.Equal(t, "debug", cfg.LogLevel) }, }, + { + name: "load custom rate limit configuration", + envVars: map[string]string{ + "RATE_LIMIT_ENABLED": "false", + "RATE_LIMIT_REQUESTS_PER_SEC": "5.0", + "RATE_LIMIT_BURST": "10", + }, + validate: func(t *testing.T, cfg *Config) { + assert.Equal(t, false, cfg.RateLimitEnabled) + assert.Equal(t, 5.0, cfg.RateLimitRequestsPerSec) + assert.Equal(t, 10, cfg.RateLimitBurst) + }, + }, + { + name: "load custom CORS configuration", + envVars: map[string]string{ + "CORS_ENABLED": "true", + "CORS_ALLOW_ORIGINS": "https://example.com,https://app.example.com", + }, + validate: func(t *testing.T, cfg *Config) { + assert.Equal(t, true, cfg.CORSEnabled) + assert.Equal(t, "https://example.com,https://app.example.com", cfg.CORSAllowOrigins) + }, + }, + { + name: "load custom metrics configuration", + envVars: map[string]string{ + "METRICS_ENABLED": "false", + "METRICS_NAMESPACE": "custom", + }, + validate: func(t *testing.T, cfg *Config) { + assert.Equal(t, false, cfg.MetricsEnabled) + assert.Equal(t, "custom", cfg.MetricsNamespace) + }, + }, } for _, tt := range tests { diff --git a/internal/http/cors.go b/internal/http/cors.go new file mode 100644 index 0000000..7ab50b0 --- /dev/null +++ b/internal/http/cors.go @@ -0,0 +1,84 @@ +package http + +import ( + "log/slog" + "strings" + "time" + + "github.com/gin-contrib/cors" + "github.com/gin-gonic/gin" +) + +// createCORSMiddleware creates a CORS middleware based on configuration. +// Returns nil if CORS is disabled or no origins configured. +// +// CORS is disabled by default since Secrets is designed as a server-to-server API. +// Enable only if browser-based applications require direct API access. +// +// Configuration: +// - enabled: Whether CORS is enabled +// - allowOriginsStr: Comma-separated list of allowed origins +// +// Returns nil if disabled or no valid origins are configured. +func createCORSMiddleware(enabled bool, allowOriginsStr string, logger *slog.Logger) gin.HandlerFunc { + if !enabled { + return nil + } + + if allowOriginsStr == "" { + logger.Warn("CORS enabled but no origins configured - CORS will not be applied") + return nil + } + + // Parse comma-separated origins + origins := parseOrigins(allowOriginsStr) + if len(origins) == 0 { + logger.Warn("CORS enabled but no valid origins found") + return nil + } + + logger.Info("CORS enabled", + slog.Int("origin_count", len(origins)), + slog.Any("origins", origins)) + + config := cors.Config{ + AllowOrigins: origins, + AllowMethods: []string{ + "GET", + "POST", + "PUT", + "DELETE", + }, + AllowHeaders: []string{ + "Authorization", + "Content-Type", + }, + ExposeHeaders: []string{ + "X-Request-Id", + }, + AllowCredentials: true, + MaxAge: 12 * time.Hour, + } + + return cors.New(config) +} + +// parseOrigins parses comma-separated origin list and trims whitespace. +// Returns empty slice if input is empty. +func parseOrigins(originsStr string) []string { + if originsStr == "" { + return nil + } + + parts := strings.Split(originsStr, ",") + origins := make([]string, 0, len(parts)) + + for _, part := range parts { + trimmed := strings.TrimSpace(part) + if trimmed != "" { + origins = append(origins, trimmed) + } + } + + return origins +} diff --git a/internal/http/cors_test.go b/internal/http/cors_test.go new file mode 100644 index 0000000..8d07d1e --- /dev/null +++ b/internal/http/cors_test.go @@ -0,0 +1,126 @@ +package http + +import ( + "log/slog" + "net/http" + "net/http/httptest" + "testing" + + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/assert" +) + +func TestCreateCORSMiddleware_DisabledReturnsNil(t *testing.T) { + logger := slog.Default() + middleware := createCORSMiddleware(false, "https://example.com", logger) + assert.Nil(t, middleware) +} + +func TestCreateCORSMiddleware_EnabledWithoutOriginsReturnsNil(t *testing.T) { + logger := slog.Default() + middleware := createCORSMiddleware(true, "", logger) + assert.Nil(t, middleware) +} + +func TestCreateCORSMiddleware_ParsesCommaSeparatedOrigins(t *testing.T) { + logger := slog.Default() + middleware := createCORSMiddleware(true, "https://app.example.com,https://admin.example.com", logger) + assert.NotNil(t, middleware) +} + +func TestCreateCORSMiddleware_TrimsWhitespace(t *testing.T) { + logger := slog.Default() + middleware := createCORSMiddleware(true, " https://app.example.com , https://admin.example.com ", logger) + assert.NotNil(t, middleware) +} + +func TestParseOrigins_ParsesCommaSeparated(t *testing.T) { + origins := parseOrigins("https://app.example.com,https://admin.example.com") + assert.Equal(t, 2, len(origins)) + assert.Equal(t, "https://app.example.com", origins[0]) + assert.Equal(t, "https://admin.example.com", origins[1]) +} + +func TestParseOrigins_TrimsWhitespace(t *testing.T) { + origins := parseOrigins(" https://app.example.com , https://admin.example.com ") + assert.Equal(t, 2, len(origins)) + assert.Equal(t, "https://app.example.com", origins[0]) + assert.Equal(t, "https://admin.example.com", origins[1]) +} + +func TestParseOrigins_HandlesEmptyString(t *testing.T) { + origins := parseOrigins("") + assert.Nil(t, origins) +} + +func TestCORSIntegration_HeadersAddedWhenEnabled(t *testing.T) { + gin.SetMode(gin.TestMode) + + logger := slog.Default() + middleware := createCORSMiddleware(true, "https://app.example.com", logger) + + router := gin.New() + if middleware != nil { + router.Use(middleware) + } + router.GET("/test", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"status": "ok"}) + }) + + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/test", nil) + req.Header.Set("Origin", "https://app.example.com") + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusOK, w.Code) + assert.Equal(t, "https://app.example.com", w.Header().Get("Access-Control-Allow-Origin")) +} + +func TestCORSIntegration_NoHeadersWhenDisabled(t *testing.T) { + gin.SetMode(gin.TestMode) + + logger := slog.Default() + middleware := createCORSMiddleware(false, "https://app.example.com", logger) + + router := gin.New() + if middleware != nil { + router.Use(middleware) + } + router.GET("/test", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"status": "ok"}) + }) + + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/test", nil) + req.Header.Set("Origin", "https://app.example.com") + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusOK, w.Code) + assert.Empty(t, w.Header().Get("Access-Control-Allow-Origin")) +} + +func TestCORSIntegration_PreflightRequestHandled(t *testing.T) { + gin.SetMode(gin.TestMode) + + logger := slog.Default() + middleware := createCORSMiddleware(true, "https://app.example.com", logger) + + router := gin.New() + if middleware != nil { + router.Use(middleware) + } + router.POST("/test", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"status": "ok"}) + }) + + // Send preflight OPTIONS request + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodOptions, "/test", nil) + req.Header.Set("Origin", "https://app.example.com") + req.Header.Set("Access-Control-Request-Method", "POST") + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusNoContent, w.Code) + assert.Equal(t, "https://app.example.com", w.Header().Get("Access-Control-Allow-Origin")) + assert.Contains(t, w.Header().Get("Access-Control-Allow-Methods"), "POST") +} diff --git a/internal/http/server.go b/internal/http/server.go index 9331f70..a5d58ed 100644 --- a/internal/http/server.go +++ b/internal/http/server.go @@ -23,6 +23,7 @@ import ( authHTTP "github.com/allisson/secrets/internal/auth/http" authService "github.com/allisson/secrets/internal/auth/service" authUseCase "github.com/allisson/secrets/internal/auth/usecase" + "github.com/allisson/secrets/internal/config" "github.com/allisson/secrets/internal/metrics" secretsHTTP "github.com/allisson/secrets/internal/secrets/http" tokenizationHTTP "github.com/allisson/secrets/internal/tokenization/http" @@ -56,6 +57,7 @@ func NewServer( // SetupRouter configures the Gin router with all routes and middleware. // This method is called during server initialization with all required dependencies. func (s *Server) SetupRouter( + cfg *config.Config, clientHandler *authHTTP.ClientHandler, tokenHandler *authHTTP.TokenHandler, auditLogHandler *authHTTP.AuditLogHandler, @@ -75,6 +77,16 @@ func (s *Server) SetupRouter( // Apply custom middleware router.Use(gin.Recovery()) // Gin's panic recovery + + // Add CORS middleware if enabled + if corsMiddleware := createCORSMiddleware( + cfg.CORSEnabled, + cfg.CORSAllowOrigins, + s.logger, + ); corsMiddleware != nil { + router.Use(corsMiddleware) + } + router.Use(requestid.New(requestid.WithGenerator(func() string { return uuid.Must(uuid.NewV7()).String() }))) // Request ID with UUIDv7 @@ -101,6 +113,16 @@ func (s *Server) SetupRouter( s.logger, ) + // Create rate limit middleware (applied to authenticated routes only) + var rateLimitMiddleware gin.HandlerFunc + if cfg.RateLimitEnabled { + rateLimitMiddleware = authHTTP.RateLimitMiddleware( + cfg.RateLimitRequestsPerSec, + cfg.RateLimitBurst, + s.logger, + ) + } + // API v1 routes v1 := router.Group("/v1") { @@ -110,6 +132,9 @@ func (s *Server) SetupRouter( // Client management endpoints clients := v1.Group("/clients") clients.Use(authMiddleware) // All client routes require authentication + if rateLimitMiddleware != nil { + clients.Use(rateLimitMiddleware) // Apply rate limiting to authenticated clients + } { clients.POST("", authHTTP.AuthorizationMiddleware(authDomain.WriteCapability, auditLogUseCase, s.logger), @@ -136,6 +161,9 @@ func (s *Server) SetupRouter( // Audit log endpoints auditLogs := v1.Group("/audit-logs") auditLogs.Use(authMiddleware) // All audit log routes require authentication + if rateLimitMiddleware != nil { + auditLogs.Use(rateLimitMiddleware) // Apply rate limiting to authenticated clients + } { auditLogs.GET("", authHTTP.AuthorizationMiddleware(authDomain.ReadCapability, auditLogUseCase, s.logger), @@ -146,6 +174,9 @@ func (s *Server) SetupRouter( // Secret management endpoints secrets := v1.Group("/secrets") secrets.Use(authMiddleware) // All secret routes require authentication + if rateLimitMiddleware != nil { + secrets.Use(rateLimitMiddleware) // Apply rate limiting to authenticated clients + } { secrets.POST("/*path", authHTTP.AuthorizationMiddleware(authDomain.EncryptCapability, auditLogUseCase, s.logger), @@ -164,6 +195,9 @@ func (s *Server) SetupRouter( // Transit encryption endpoints transit := v1.Group("/transit") transit.Use(authMiddleware) // All transit routes require authentication + if rateLimitMiddleware != nil { + transit.Use(rateLimitMiddleware) // Apply rate limiting to authenticated clients + } { keys := transit.Group("/keys") { @@ -202,6 +236,9 @@ func (s *Server) SetupRouter( // Tokenization endpoints tokenization := v1.Group("/tokenization") tokenization.Use(authMiddleware) // All tokenization routes require authentication + if rateLimitMiddleware != nil { + tokenization.Use(rateLimitMiddleware) // Apply rate limiting to authenticated clients + } { keys := tokenization.Group("/keys") {