diff --git a/.env.example b/.env.example index 2c82bf6..b490ee6 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,5 @@ -# Copy this to .env.local and fill in your values -# NEVER commit .env.local to git +# Copy this to .env and fill in your values +# NEVER commit .env to git # Database DATABASE_URL=postgresql://pulseops:dev_password_change_in_production@localhost:5432/pulseops_dev @@ -13,6 +13,10 @@ KAFKA_BROKERS=localhost:9092 # API Configuration PORT=3001 NODE_ENV=development +API_URL=http://localhost:3001 +GRAPHQL_URL=http://localhost:3002/graphql +VITE_GRAPHQL_URL=http://localhost:3002/graphql +VITE_PULSEOPS_API_KEY=demo_key_change_this # Security (generate secure values for production) JWT_SECRET=your_jwt_secret_here_change_in_production @@ -25,7 +29,7 @@ GOOGLE_CALLBACK_URL=http://localhost:3001/auth/google/callback # Rate Limiting RATE_LIMIT_WINDOW_MS=60000 -RATE_LIMIT_MAX_REQUESTS=100 +RATE_LIMIT_MAX_REQUESTS=20000 # Monitoring LOG_LEVEL=info diff --git a/README.md b/README.md index 2e4e2c1..41841cf 100644 --- a/README.md +++ b/README.md @@ -2,83 +2,94 @@ Event-driven analytics backend with PostgreSQL time-series partitioning, Kafka event queue, Redis caching, and GraphQL API. -Designed to handle 1000+ events/second from heterogeneous sources, aggregate into daily/hourly metrics, and serve sub-second dashboard queries via cached GraphQL. +Designed to evaluate heterogeneous event ingestion, asynchronous aggregation, tenant-skew behavior, and dashboard query performance with benchmark evidence rather than hard-coded performance claims. -## Evidence +## Article Evidence -**Event Ingestion** — HTTP API accepts JSON events (any schema) and publishes to Kafka topic. Batches before writes reduce database load. +PulseOps includes architecture and benchmark artifacts for evidence-based writeups. The repository supports honest claims about the shape of the system: Fastify ingest, Kafka decoupling, worker aggregation, PostgreSQL raw/aggregate storage, Redis-backed GraphQL caching, synthetic skew generation, and k6 benchmark scripts. -- Max: 1000+ events/second per node (multi-instance deployment supported) -- Latency: p95 < 100ms ingestion time -- Rate limiter: 100 req/min per API key (configurable) +Do not publish specific throughput, latency, cache-hit, Kafka-lag, or availability numbers unless they are tied to a dated benchmark report under `docs/benchmarks/` with raw k6 output and environment notes. -(`services/ingest-api/src/handler.ts`) +- Evidence guide: `docs/article-evidence.md` +- Observability map: `docs/observability.md` +- Benchmark report template: `docs/benchmarks/YYYY-MM-DD-pulseops-benchmark.md` +- Clean full local benchmark: `docs/benchmarks/2026-06-16-clean-full-benchmark.md` +- Canonical local smoke report: `docs/benchmarks/2026-06-16-final-benchmark-smoke-pulseops-benchmark.md` +- Heavier ingest-scale report: `docs/benchmarks/2026-06-16-ingest-scale-pulseops-benchmark.md` +- Synthetic skew generator: `scripts/generate-skewed-events.ts` +- k6 load tests: `tests/load/` -**Event Processing** — Worker consumes from Kafka, aggregates events into hourly/daily metrics, writes to PostgreSQL. Handles out-of-order events and late arrivals via watermarking. +## Architecture Evidence -(`services/worker/src/processor.ts`) +**Event Ingestion** — HTTP API accepts validated JSON events and publishes them to Kafka topic `events-raw`. Batches reduce per-event request overhead and keep PostgreSQL writes out of the request path. + +(`services/ingest-api/src/index.ts`) + +**Event Processing** — Worker consumes from Kafka, writes raw events to PostgreSQL, and updates daily aggregates. Late-arrival behavior should be benchmarked before making correctness or freshness claims. + +(`services/worker/src/index.ts`, `services/worker/src/processing.ts`, `services/worker/src/aggregators/daily.ts`) **Storage** — PostgreSQL 16 with time-series optimization: -- Partitioned tables by date (automatic rotation) -- Retention policies (30-day default, configurable) -- Indexes on (timestamp, tenant_id, metric_name) -- Vacuum tuning for write-heavy workload +- `events` partitioned by timestamp in the initial schema +- `daily_aggregates` keyed by `org_id`, `project_id`, metric, date, and dimensions +- Tenant/time indexes for raw event lookup +- Aggregate lookup indexes for dashboard queries -(`services/ingest-api/migrations/` — schema definitions) +(`scripts/init-db.sql`, `migrations/006_performance_optimizations.sql`) **Query Layer** — Apollo GraphQL API: -- Cached queries (Redis TTL: 5min for hourly, 1h for daily aggregates) -- Multi-tenant authorization (checked on resolver entry) -- OAuth 2.0 (Google integration for user auth) -- Query complexity limits (prevent DOS via deep nested queries) +- Redis cache with tenant/project cache-version keys and 300-second TTL +- Queries require `X-API-Key` and reject cross-tenant `orgId`/`projectId` +- Aggregate-backed paths when no property filters are supplied +- Raw event fallback for supported property filters (`services/graphql-api/src/schema.ts`) -**Query Performance**: -- Cache hits: p95 < 50ms -- Database queries (cache miss): p95 < 500ms -- Dashboard (typical 5-10 queries): p95 < 2s total - -(`scripts/load-test.js` — k6 benchmark script) +**Benchmark Tooling**: +- `scripts/generate-skewed-events.ts` produces hot/medium/quiet tenant workloads with late arrivals, duplicates, bursts, multiple metrics, and 7/30/90-day windows. +- `tests/load/ingest-throughput.js` measures ingest acceptance behavior. +- `tests/load/hot-tenant.js` stresses skewed tenant distribution. +- `tests/load/dashboard-query.js` measures GraphQL dashboard query behavior. +- `tests/load/backpressure.js` captures behavior under burst pressure. **Dashboard** — React 18 + Recharts, renders: -- Real-time event count trends (updated via GraphQL subscriptions) +- Event count trends from GraphQL queries - Custom metric dashboards (user-defined dimensions) - Anomaly detection (threshold alerts) - Timezone-aware charting -(`web/src/pages/Dashboard.tsx`) +(`web/src/components/Dashboard.tsx`) **Infrastructure** — Docker Compose local dev stack: - PostgreSQL 16 (time-series partitioned) - Redis 7 (query cache) -- Kafka + Redpanda (message queue, 3 partitions) +- Apache Kafka (message queue, 3 partitions) - Ingest API (Node.js, port 3001) - GraphQL API (Node.js, port 3002) - Worker (Node.js, Kafka consumer) -- React frontend (Vite, port 5173) +- React frontend (Vite, port 5173, run through `pnpm dev:web`) -All services configured with health checks + auto-restart. +Docker Compose health checks cover PostgreSQL, Redis, Kafka, and the app services; the Node app services also use restart policies. (`docker-compose.yml`) -**Stack** — Node.js 20, Express, Apollo GraphQL, React 18, PostgreSQL 16, Redis 7, Kafka, Playwright (E2E), k6 (load testing), pnpm workspaces. +**Stack** — Node.js 20, Fastify, Apollo GraphQL, React 18, PostgreSQL 16, Redis 7, Kafka, Playwright (E2E), k6 (load testing), pnpm workspaces. ## How It Works 1. **Event source sends JSON** → POST to ingest API -2. **API validates + publishes** → Kafka topic (with batching) -3. **Worker consumes** → Aggregates into hourly buckets by (tenant_id, metric_name, dimension_values) -4. **Aggregates stored** → PostgreSQL partitioned tables +2. **API validates + authenticates** → binds org/project from `X-API-Key` +3. **API publishes** → Kafka topic `events-raw` +4. **Worker consumes** → writes idempotent raw events and updates daily aggregates 5. **GraphQL query** → Checks cache (Redis) → if miss, queries DB → returns to dashboard -6. **Dashboard renders** → Real-time updates via subscriptions +6. **Dashboard renders** → React queries GraphQL and can be refreshed by the client ## Getting Started ### One-Command Setup ```bash pnpm bootstrap # Installs deps, starts Docker services, runs migrations -pnpm dev # Starts all 4 backend services + frontend concurrently +pnpm dev # Starts ingest API, GraphQL API, worker, and frontend concurrently ``` Visit `http://localhost:5173`. @@ -90,6 +101,7 @@ docker-compose up -d # Create schema pnpm db:migrate +pnpm db:verify:fresh # Seed sample data pnpm db:seed @@ -99,22 +111,25 @@ pnpm dev ``` ### Services -- **Ingest API** (port 3001): `curl -X POST http://localhost:3001/ingest -H 'Content-Type: application/json' -d '{"event":"signup","user_id":"123"}'` -- **GraphQL API** (port 3002): `http://localhost:3002/graphql` +- **Ingest API** (port 3001): `curl -X POST http://localhost:3001/api/v1/events -H 'X-API-Key: demo_key_change_this' -H 'Content-Type: application/json' -d '{"event_name":"signup","user_id":"123"}'`; metrics at `http://localhost:3001/metrics` +- **GraphQL API** (port 3002): `http://localhost:3002/graphql`; metrics at `http://localhost:3002/metrics` +- **Worker** (metrics port 3003): `http://localhost:3003/metrics` - **Frontend** (port 5173): `http://localhost:5173` ## API Examples ### Ingest Event ```bash -curl -X POST http://localhost:3001/ingest \ - -H "Authorization: Bearer API_KEY" \ +curl -X POST http://localhost:3001/api/v1/events \ + -H "X-API-Key: demo_key_change_this" \ -H "Content-Type: application/json" \ -d '{ - "event": "page_view", + "event_name": "page_view", "user_id": "u123", - "page": "/pricing", - "timestamp": 1705945000 + "properties": { + "page": "/pricing" + }, + "timestamp": "2026-06-16T12:00:00.000Z" }' ``` @@ -122,15 +137,16 @@ curl -X POST http://localhost:3001/ingest \ ```graphql query { metrics( - tenant: "acme-corp" - name: "page_views" - startTime: "2024-01-01T00:00:00Z" - endTime: "2024-01-31T23:59:59Z" - groupBy: ["page"] + orgId: "00000000-0000-0000-0000-000000000001" + projectId: "00000000-0000-0000-0000-000000000002" + startDate: "2026-06-01" + endDate: "2026-06-16" ) { - timestamp - value - dimensions + totalEvents + topEvents { + eventName + count + } } } ``` @@ -154,18 +170,38 @@ pnpm test:e2e # Playwright ### Load Testing ```bash -pnpm test:load # k6, 1000 RPS sustained for 5 minutes +pnpm --silent benchmark:generate -- --tenants 100 --events 100000 --days 30 --hot-tenant-ratio 0.6 --late-arrival-ratio 0.05 --duplicate-ratio 0.01 --output jsonl > docs/benchmarks/evidence/events.jsonl +RUN_ID=local-smoke API_URL=http://localhost:3001 GRAPHQL_URL=http://localhost:3002/graphql API_KEY=demo_key_change_this pnpm benchmark +pnpm benchmark:report -- --run-id local-smoke --output docs/benchmarks/local-smoke-pulseops-benchmark.md +RUN_ID=local-smoke pnpm validate:evidence # writes docs/benchmarks/latest-pulseops-benchmark.md +pnpm db:verify:fresh +API_URL=http://localhost:3001 API_KEY=demo_key_change_this pnpm benchmark:ingest +pnpm benchmark:seed-tenants -- --tenants 100 --hot-tenants 1 --medium-tenants 10 --manifest tmp/benchmark-tenants.json +TENANT_KEYS_FILE=tmp/benchmark-tenants.json API_URL=http://localhost:3001 pnpm benchmark:hot-tenant +RUN_ID=local-smoke TENANT_KEYS_FILE=tmp/benchmark-tenants.json pnpm benchmark:hot-db -- --require-complete +GRAPHQL_URL=http://localhost:3002/graphql API_KEY=demo_key_change_this ORG_ID=00000000-0000-0000-0000-000000000001 PROJECT_ID=00000000-0000-0000-0000-000000000002 pnpm benchmark:dashboard +RUN_ID=cache-smoke WARM_ITERATIONS=12 pnpm benchmark:cache -- --run-id cache-smoke --warm-iterations 12 +pnpm benchmark:worker -- --run-id worker-catchup-smoke --events 1000 --batch-size 100 --poll-ms 500 --timeout-ms 60000 +docker compose stop worker +pnpm prove:worker-retry-offsets -- --timeout-ms 120000 --poll-ms 500 +docker compose start worker +API_URL=http://localhost:3001 API_KEY=demo_key_change_this pnpm benchmark:backpressure ``` ## Performance Characteristics -| Metric | Target | Notes | +These are benchmark targets and measurement areas, not measured claims. + +| Metric | Status | Notes | |--------|--------|-------| -| Ingest throughput | 1000+ RPS | Single node; horizontally scalable | -| Ingest p95 latency | < 100ms | Network + Kafka publish | -| Query p95 (cached) | < 50ms | Redis hit | -| Query p95 (DB) | < 500ms | PostgreSQL + index | -| Availability | 99%+ | Demo target; prod requires multi-region | +| Ingest throughput | Measured locally | See `docs/benchmarks/2026-06-16-clean-full-benchmark.md` for the clean full local run, and `docs/benchmarks/2026-06-16-ingest-scale-pulseops-benchmark.md` for the heavier fixed-rate ingest runs. The 1000 RPS target was not sustained locally. | +| Ingest p95 latency | Measured locally | See dated benchmark reports; request acceptance latency is not aggregate visibility latency | +| Dashboard query p95 | Measured locally | See canonical smoke report; includes k6 dashboard smoke and cold/warm cache smoke | +| Worker catch-up | Measured locally | 200-event local smoke run; see canonical smoke report | +| Kafka lag | Measured locally | Smoke run returned lag to 0; heavier ingest-scale snapshot captured 10,254,305 queued messages. Do not claim a lag limit or freshness guarantee. | +| Tenant skew impact | Smoke measured locally | Canonical local smoke reconciled 249 persisted hot-test events with Kafka lag 0: hot 201, quiet 40, medium 8. Evidence: `docs/benchmarks/evidence/hot-tenant-db-2026-06-16-final-benchmark-smoke.json`; full long-duration skew benchmark still needed | +| Hot-tenant DB pressure | Measured locally when `benchmark:hot-db -- --require-complete` is run | Aggregate-key pressure, request/persistence/lag reconciliation, and after-run DB snapshot; not continuous lock sampling | +| Backpressure behavior | TBD | Record rate limits, errors, queue lag, and recovery | ## Deployment @@ -194,21 +230,20 @@ Event Source ↓ Worker (aggregation) ↓ -PostgreSQL 16 (time-series partitioned, auto-vacuum tuned) +PostgreSQL 16 (time-series partitioned) ↓ GraphQL API (port 3002, Redis cache layer) ↓ -React Dashboard (port 5173, subscriptions for real-time updates) +React Dashboard (port 5173, polling refresh) ``` ## Security - **No secrets in git** — `.env` files gitignored, secrets in environment -- **API key hashing** — bcrypt, stored in PostgreSQL -- **Multi-tenant isolation** — All queries filtered by `tenant_id` at resolver entry -- **OAuth 2.0** — Google sign-in for user authentication -- **Rate limiting** — Token bucket per API key (100 req/min default) -- **SQL injection prevention** — Parameterized queries via ORM +- **API key hashing** — SHA-256 fingerprint lookup plus bcrypt verification +- **Multi-tenant isolation** — API keys bind org/project context; GraphQL rejects cross-tenant args +- **Rate limiting** — Token bucket per API-key fingerprint +- **SQL injection prevention** — Parameterized PostgreSQL queries - **Automated scans** — ESLint security plugin, npm audit in CI ## License diff --git a/docker-compose.yml b/docker-compose.yml index 2d28709..ca1cea1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -38,11 +38,12 @@ services: environment: KAFKA_NODE_ID: 1 KAFKA_PROCESS_ROLES: broker,controller - KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092,CONTROLLER://0.0.0.0:9093 - KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092 + KAFKA_LISTENERS: INTERNAL://0.0.0.0:29092,EXTERNAL://0.0.0.0:9092,CONTROLLER://0.0.0.0:9093 + KAFKA_ADVERTISED_LISTENERS: INTERNAL://kafka:29092,EXTERNAL://localhost:9092 KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT - KAFKA_CONTROLLER_QUORUM_VOTERS: 1@localhost:9093 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,INTERNAL:PLAINTEXT,EXTERNAL:PLAINTEXT + KAFKA_INTER_BROKER_LISTENER_NAME: INTERNAL + KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:9093 KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 @@ -51,7 +52,7 @@ services: volumes: - kafka_data:/var/lib/kafka/data healthcheck: - test: ["CMD-SHELL", "kafka-metadata.sh --snapshot /var/lib/kafka/data/__cluster_metadata-0/00000000000000000000.log --print --skip-record-metadata || exit 0"] + test: ["CMD-SHELL", "/opt/kafka/bin/kafka-topics.sh --bootstrap-server localhost:9092 --list > /dev/null 2>&1"] interval: 10s timeout: 10s retries: 5 @@ -69,7 +70,9 @@ services: PORT: 3001 DATABASE_URL: postgresql://pulseops:dev_password_change_in_production@postgres:5432/pulseops_dev REDIS_URL: redis://redis:6379 - KAFKA_BROKERS: kafka:9092 + KAFKA_BROKERS: kafka:29092 + RATE_LIMIT_WINDOW_MS: 60000 + RATE_LIMIT_MAX_REQUESTS: 20000 depends_on: postgres: condition: service_healthy @@ -79,6 +82,11 @@ services: condition: service_healthy volumes: - ./services/ingest-api/src:/app/src + healthcheck: + test: ["CMD-SHELL", "node -e \"fetch('http://localhost:3001/health').then(r => process.exit(r.ok ? 0 : 1)).catch(() => process.exit(1))\""] + interval: 10s + timeout: 5s + retries: 6 restart: unless-stopped graphql-api: @@ -100,6 +108,11 @@ services: condition: service_healthy volumes: - ./services/graphql-api/src:/app/src + healthcheck: + test: ["CMD-SHELL", "node -e \"fetch('http://localhost:3002/health').then(r => process.exit(r.ok ? 0 : 1)).catch(() => process.exit(1))\""] + interval: 10s + timeout: 5s + retries: 6 restart: unless-stopped worker: @@ -107,18 +120,29 @@ services: context: ./services/worker dockerfile: Dockerfile container_name: pulseops-worker + ports: + - "3003:3003" environment: NODE_ENV: development DATABASE_URL: postgresql://pulseops:dev_password_change_in_production@postgres:5432/pulseops_dev - KAFKA_BROKERS: kafka:9092 + REDIS_URL: redis://redis:6379 + KAFKA_BROKERS: kafka:29092 KAFKA_GROUP_ID: pulseops-aggregators + WORKER_METRICS_PORT: 3003 depends_on: postgres: condition: service_healthy + redis: + condition: service_healthy kafka: condition: service_healthy volumes: - ./services/worker/src:/app/src + healthcheck: + test: ["CMD-SHELL", "node -e \"fetch('http://localhost:3003/metrics').then(r => process.exit(r.ok ? 0 : 1)).catch(() => process.exit(1))\""] + interval: 10s + timeout: 5s + retries: 6 restart: unless-stopped volumes: diff --git a/docs/article-evidence.md b/docs/article-evidence.md new file mode 100644 index 0000000..081641a --- /dev/null +++ b/docs/article-evidence.md @@ -0,0 +1,249 @@ +# PulseOps Article Evidence + +This document separates what the repository implements from what still needs a measured benchmark run. Use it as source material for articles, demos, and portfolio writeups without inventing performance numbers. + +## Architecture Evidence + +PulseOps is an event analytics system with four main runtime paths: + +1. Event producers send JSON events to the Fastify ingest API at `POST /api/v1/events` or `POST /api/v1/events/batch`. +2. The ingest API validates payload shape, applies API-key auth, attaches `org_id` and `project_id`, and publishes accepted events to Kafka topic `events-raw`. +3. The worker consumes Kafka messages, writes raw events to PostgreSQL through an idempotency table, and updates daily aggregate rows only for new events. +4. The GraphQL API authenticates `X-API-Key`, serves dashboard queries from `daily_aggregates` when possible, falls back to filtered raw event queries when filters require it, and caches query responses in Redis with tenant/project cache-version keys. + +Primary code references: + +- Ingest API: `services/ingest-api/src/index.ts` +- Event schema: `services/ingest-api/src/schemas/event.ts` +- Worker: `services/worker/src/index.ts` +- Daily aggregation: `services/worker/src/aggregators/daily.ts` +- GraphQL schema/resolvers: `services/graphql-api/src/schema.ts`, `services/graphql-api/src/resolvers.ts` +- Database initialization: `scripts/init-db.sql` +- Performance migration notes: `migrations/006_performance_optimizations.sql` + +## Event Flow + +Accepted single events return `202 Accepted` after Kafka publish. Batch events return `202 Accepted` with the accepted count after validating and publishing all messages in the batch. The API does not synchronously wait for PostgreSQL writes, so ingestion acceptance and analytics availability are intentionally decoupled. + +Kafka messages are keyed by `org_id:project_id:user/session/event`, which preserves tenant context while spreading a hot tenant across more partition keys than org-only routing. Hot tenants can still concentrate aggregate updates on the same tenant/project metric rows, so Kafka partition distribution and aggregate write contention both need measurement. + +## PostgreSQL Writes + +The worker first inserts `(org_id, project_id, event_id)` into `event_dedup_keys` inside the same transaction as raw-event and aggregate writes. If that insert conflicts, the worker treats the Kafka message as a duplicate and skips aggregate updates. + +For new events, the worker writes into `events`: + +- `org_id` +- `project_id` +- `event_name` +- `user_id` +- `session_id` +- `properties` +- `timestamp` + +It then increments daily aggregate rows in `daily_aggregates` for: + +- `dau`, after inserting a distinct `(org_id, project_id, date, user_id)` row into `daily_active_users` +- `event_count` grouped by `event_name` +- `total_events` + +The aggregate table has a uniqueness constraint on `(org_id, project_id, metric_name, date, dimensions)`, allowing `INSERT ... ON CONFLICT ... DO UPDATE` increments. + +## Redis Cache + +The GraphQL resolvers cache dashboard query results in Redis with keys that include `orgId`, `projectId`, cache version, date range, query type, event name where relevant, and normalized filters. Current resolver TTL is 300 seconds. The worker increments `cache_version:{org_id}:{project_id}` after new aggregate writes so subsequent reads use a fresh cache namespace. + +Cacheable paths include: + +- `dailyActiveUsers` +- `eventCounts` +- `totalEvents` +- `metrics` +- `eventCountsOverTime` + +`recentEvents` currently queries PostgreSQL directly. + +Cache invalidation correctness is covered by `tests/integration/cache-correctness.test.ts`. The integration test populates a real Redis cache entry through the GraphQL resolver, processes a real worker event for the same tenant/project, verifies `cache_version:{org_id}:{project_id}` increments, and verifies the next GraphQL read uses the fresh namespace and sees the updated aggregate. + +## Kafka Decoupling + +Kafka decouples request acceptance from database writes. This lets the ingest API absorb short bursts as long as Kafka accepts messages, while the worker controls database write pressure. Operationally, this means both ingest latency and end-to-end data freshness must be measured: + +- Ingest acceptance latency: HTTP request start to `202 Accepted`. +- Queue lag: Kafka produced offset to committed consumer offset. +- Analytics lag: event timestamp or ingestion time to visible aggregate/query result. + +Do not claim analytics are real-time unless a benchmark run measures acceptable lag under the stated load. + +## Observability Evidence + +The ingest API exposes Prometheus text metrics at `GET /metrics`. The endpoint currently includes HTTP request counts and duration, tenant/project-scoped ingest counters, batch ingest counters, Kafka produced/error counters, and PostgreSQL pool gauges. The live integration test `services/ingest-api/tests/integration/ingest.test.ts` verifies that the endpoint emits the core coordination series. + +The GraphQL API also exposes `GET /metrics` for resolver duration, resolver counts, API-key auth failures, Redis cache hit/miss counts by query family, and PostgreSQL pool gauges. A checked scrape after two authenticated dashboard queries is saved at `docs/benchmarks/evidence/graphql-metrics-2026-06-16.txt`. + +The worker exposes `GET /metrics` on port 3003 for processed events, processing errors, processing duration, cache invalidation count, and PostgreSQL pool gauges. A checked scrape after a 5-event worker catch-up smoke is saved at `docs/benchmarks/evidence/worker-metrics-2026-06-16.txt`. DB write duration remains in structured worker logs as `db_write_duration_ms`; do not claim it is exposed as a Prometheus DB-write histogram until that specific metric is wired and scraped. + +## Tenant Mapping + +The codebase uses `org_id` and `project_id` as the tenant isolation boundary. Some docs or articles may use the generic term `tenant_id`; in PulseOps that maps to: + +| Concept | PulseOps field | Notes | +| --- | --- | --- | +| Tenant | `org_id` | Organization-level boundary from API-key auth. | +| Workspace/app | `project_id` | Project under an organization. | +| Synthetic tenant label | `properties.tenant_id` | Useful for generated benchmark data, not an auth boundary. | + +Direct ingest requests cannot choose arbitrary `org_id`; the API attaches it from the authenticated API key. Project-scoped API keys also reject mismatched `project_id` values. Synthetic benchmark JSONL can include tenant metadata for analysis, but production ingest treats API-key auth as the source of truth. + +Tenant isolation evidence lives in `tests/integration/tenant-isolation.test.ts`, `tests/integration/graphql-authz.test.ts`, and the live ingest checks in `services/ingest-api/tests/integration/ingest.test.ts`. The ingest integration tests verify that the demo project-scoped API key rejects cross-project single and batch writes. + +## Partitions And Indexes + +The initial schema partitions `events` by `timestamp` with monthly partitions for the local 90-day benchmark window. Existing indexes support tenant/time and project/time query patterns: + +- `idx_events_org_time` on `(org_id, timestamp DESC)` +- `idx_events_project_time` on `(project_id, timestamp DESC)` +- `idx_events_org_project_time` on `(org_id, project_id, timestamp DESC)` +- `idx_events_event_id` on `(org_id, project_id, event_id)` +- `idx_aggregates_lookup` on `(org_id, project_id, date)` +- `idx_aggregates_metric` on `(metric_name, date)` + +`migrations/006_performance_optimizations.sql` adds additional index and materialized-view ideas, but benchmark claims should cite the actual migration state used during the run. + +## Tenant-Skew Metrics To Capture + +Tenant skew is the main stress pattern for this system because Kafka keys, aggregate rows, and cache keys are tenant/project scoped. Benchmark reports should include: + +- Tenant distribution: hot, medium, and quiet tenant event counts. +- Hottest tenant share of total events. +- Kafka partition distribution by produced messages and consumer lag. +- PostgreSQL write latency and lock/wait behavior during hot-tenant bursts. +- Aggregate row conflict/update rate for hot `(org_id, project_id, metric_name, date, dimensions)` keys. +- Dashboard query latency for hot tenant vs quiet tenant. +- Cache hit ratio by query family. +- Late-arrival count and duplicate count in the generated workload. + +## Benchmark Commands + +Generate synthetic JSONL without sending it: + +```bash +pnpm --silent benchmark:generate -- --tenants 100 --events 100000 --days 30 --hot-tenant-ratio 0.6 --late-arrival-ratio 0.05 --duplicate-ratio 0.01 --output jsonl > docs/benchmarks/evidence/events.jsonl +``` + +Send synthetic events directly to a local ingest API: + +```bash +API_URL=http://localhost:3001 API_KEY=demo_key_change_this pnpm benchmark:generate -- --tenants 100 --events 10000 --days 7 --hot-tenant-ratio 0.6 --late-arrival-ratio 0.05 --duplicate-ratio 0.01 --output direct +``` + +Run k6 ingest throughput: + +```bash +API_URL=http://localhost:3001 API_KEY=demo_key_change_this pnpm benchmark:ingest +``` + +Run hot-tenant skew: + +```bash +pnpm benchmark:seed-tenants -- --tenants 100 --hot-tenants 1 --medium-tenants 10 --manifest tmp/benchmark-tenants.json +TENANT_KEYS_FILE=tmp/benchmark-tenants.json API_URL=http://localhost:3001 pnpm benchmark:hot-tenant +RUN_ID= TENANT_KEYS_FILE=tmp/benchmark-tenants.json pnpm benchmark:hot-db -- --require-complete +``` + +Run dashboard query benchmark: + +```bash +GRAPHQL_URL=http://localhost:3002/graphql ORG_ID=00000000-0000-0000-0000-000000000001 PROJECT_ID=00000000-0000-0000-0000-000000000002 pnpm benchmark:dashboard +``` + +Run cold/warm dashboard cache measurement: + +```bash +RUN_ID=cache-smoke WARM_ITERATIONS=12 pnpm benchmark:cache -- --run-id cache-smoke --warm-iterations 12 +``` + +Capture run-scoped PostgreSQL query plans and the GraphQL cache-path note: + +```bash +RUN_ID=2026-06-16-final-benchmark-smoke \ +ORG_ID=00000000-0000-4000-8000-0000000f4241 \ +PROJECT_ID=00000000-0000-4000-8000-0000001e8481 \ +CACHE_EVIDENCE=docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-benchmark-smoke.json \ +pnpm query-plans:capture +``` + +Run worker catch-up measurement from HTTP acceptance through Kafka to persisted rows: + +```bash +pnpm benchmark:worker -- --run-id worker-catchup-smoke --events 1000 --batch-size 100 --poll-ms 500 --timeout-ms 60000 +``` + +Run the controlled worker retry/offset proof: + +```bash +docker compose stop worker +pnpm prove:worker-retry-offsets -- --timeout-ms 120000 --poll-ms 500 +docker compose start worker +``` + +Run backpressure benchmark: + +```bash +API_URL=http://localhost:3001 API_KEY=demo_key_change_this pnpm benchmark:backpressure +``` + +Run the full local benchmark suite and generate an evidence-backed report: + +```bash +RUN_ID=local-smoke API_URL=http://localhost:3001 GRAPHQL_URL=http://localhost:3002/graphql API_KEY=demo_key_change_this pnpm benchmark +pnpm benchmark:report -- --run-id local-smoke --output docs/benchmarks/local-smoke-pulseops-benchmark.md +``` + +Verify the fresh PostgreSQL migration path: + +```bash +pnpm db:verify:fresh +``` + +## Safe Public Claims + +These claims are supported by repository evidence: + +- PulseOps uses a Fastify ingest API, Kafka queue, worker aggregation process, PostgreSQL storage, Redis query cache, and GraphQL query layer. +- Ingestion is decoupled from PostgreSQL writes by Kafka. +- The data model scopes events and aggregates by `org_id` and `project_id`. +- Raw events are partitioned by timestamp in the initial database schema. +- Dashboard resolvers use Redis caching for several aggregate query paths. +- The repository includes synthetic skew generation and k6 benchmark scripts that can produce evidence for throughput, hot-tenant behavior, dashboard query latency, and backpressure. +- `pnpm db:verify:fresh` runs the migrator against a throwaway PostgreSQL 16 database and proves the fresh local schema path creates expected partitions, materialized dashboard evidence objects, and migration ledger rows without the known duplicate legacy index names. The checked evidence file is `docs/migrations/evidence/fresh-migration-2026-06-16-final-fresh-migration.txt`. +- In the clean full local benchmark report `docs/benchmarks/2026-06-16-clean-full-benchmark.md`, all benchmark suites completed after a Docker volume reset using conservative local rates. The run produced ingest, hot-tenant, hot-tenant DB reconciliation, dashboard, cache, worker catch-up, backpressure, and run-scoped query-plan evidence, and final Kafka lag was 0. +- In the canonical `2026-06-16-final-benchmark-smoke` report, the ingest smoke run accepted 226 HTTP batch requests at 14.96 requests/second with 0% HTTP request failure and p95 request latency of 7.17 ms. This is request acceptance evidence, not immediate-persistence evidence. +- The heavier `2026-06-16-ingest-scale` report is the better source for ingest stress claims. In that local run, 100 RPS for 2 minutes was clean at the HTTP layer, 500 RPS showed stress through 1,119 dropped iterations and 716.94 ms p95 latency, and the 1000 RPS target was not sustained: actual request rate was 469.37 requests/second, dropped iterations were 158,898, HTTP failure rate was 0.739%, and p99 reached 15000.34 ms. +- The same ingest-scale snapshot captured Kafka lag of 10,254,305 messages for consumer group `pulseops-aggregators`. That supports a coordination/backpressure claim, not a claim that all attempted events were persisted or aggregated. +- In that same canonical smoke report, the hot-tenant DB evidence reconciled 249 successful hot-tenant k6 requests with 249 persisted events and Kafka lag 0. The hot tenant class produced 201 of those 249 persisted events. +- The canonical dashboard cache smoke measured a 32.34 ms cold GraphQL dashboard request, then a 1.54 ms warm median and 2.31 ms warm p95 across 5 warm requests after three Redis keys were created for the tenant/project/date range. +- The canonical worker catch-up smoke accepted 200 events through HTTP/Kafka, persisted 200 raw event rows, and returned Kafka lag to 0. The measured persisted rate until caught up was 92.72 events/second for that bounded local workload. +- The 2026-06-16 controlled worker retry proof stopped the compose worker, ran a local worker with a one-shot crash hook after `processEvent` and before Kafka offset commit, and saved `docs/benchmarks/evidence/worker-retry-offsets-2026-06-16-worker-retry-proof-3.json`. In that proof, the worker exited with code 86, the probe event had exactly 1 raw event row, 1 dedupe key, and aggregate value 1 after the crash, Kafka lag was 1 after the crash, replay kept those database counts at 1, and Kafka lag returned to 0. +- `pnpm benchmark:hot-db -- --require-complete` measures persisted hot-tenant distribution, aggregate-key pressure, burst windows, partition spread, representative hot/quiet query plans, an after-run PostgreSQL lock/activity snapshot, and reconciliation between k6 requests, Kafka lag, and persisted rows for a specific `run_id`. +- In the canonical `2026-06-16-final-benchmark-smoke` hot-tenant DB evidence, `pnpm benchmark:hot-db -- --require-complete` reconciled 249 successful hot-tenant k6 requests with 249 persisted hot-test events and Kafka lag 0. The persisted events were 201 hot-class events, 40 quiet-class events, and 8 medium-class events. Evidence: `docs/benchmarks/evidence/hot-tenant-db-2026-06-16-final-benchmark-smoke.json`. +- The final 2026-06-16 smoke report now includes run-scoped query-plan evidence under `docs/query-plans/2026-06-16-final-benchmark-smoke-*.md`. Those files capture the run ID, Git commit, target hot-tenant org/project, row counts, indexes, event partitions, exact SQL, EXPLAIN ANALYZE output, and interpretation. The GraphQL cache-path file explicitly records that Redis cache hits do not have a PostgreSQL EXPLAIN plan and must be cited from the dashboard cache JSON evidence instead. + +## Unsafe Public Claims Until Measured + +Do not publish these as facts without a dated benchmark report and environment details: + +- Specific events-per-second throughput beyond the exact dated run and workload. +- Specific p95 or p99 ingest latency beyond the exact dated run and workload. +- Specific p95 or p99 dashboard query latency. +- Kafka lag limits or dashboard freshness guarantees under load. The ingest-scale report captures a large lag snapshot, but it does not establish a stable upper bound or catch-up SLO. +- General exactly-once processing claims across every crash/rebalance/failure mode. The worker retry proof covers one controlled post-processing/pre-offset-commit replay window. +- PostgreSQL write capacity under hot-tenant skew. +- Production lock/wait behavior from `benchmark:hot-db`; its lock/activity data is an after-run snapshot unless a report explicitly says it sampled continuously during load. +- Redis cache hit ratio in realistic usage. The local cold/warm cache timing exists, but it is not a production cache-hit-ratio measurement. +- Horizontal scalability claims. +- Production availability or SLO claims. + +## Article-Ready Paragraph + +While building PulseOps, I saw a smaller version of this coordination problem. In the canonical local Docker smoke run on June 16, 2026, the ingest API accepted 226 batch requests at 14.96 requests per second with 0% HTTP request failure and 7.17 ms p95 request latency, while a separate worker catch-up proof accepted 200 events, persisted 200 raw event rows, and returned Kafka lag to 0. In the heavier ingest-scale run, the 1000 RPS target was not sustained locally and Kafka lag reached 10,254,305 messages, which made the coordination boundary explicit: HTTP acceptance was not the same thing as database visibility. The dashboard cache path showed the same issue in another form: one cold GraphQL dashboard request took 32.34 ms and populated three Redis keys; the warm median over 5 repeat requests was 1.54 ms. That is the point: a single incoming event was not just one database write. It entered through a Fastify ingest API, moved through Kafka, was consumed by a worker, written idempotently into partitioned PostgreSQL, aggregated into daily buckets, invalidated Redis-backed GraphQL cache keys, and appeared later in dashboard reads. The lesson was not that PulseOps was operating at massive scale. It was not. The lesson was that even at project scale, the hard part became coordination: ingestion, batching, tenant-aware metrics, cache freshness, dashboard latency, and keeping the database predictable under uneven load. diff --git a/docs/benchmarks/2026-06-16-clean-full-benchmark.md b/docs/benchmarks/2026-06-16-clean-full-benchmark.md new file mode 100644 index 0000000..7682219 --- /dev/null +++ b/docs/benchmarks/2026-06-16-clean-full-benchmark.md @@ -0,0 +1,158 @@ +# PulseOps Benchmark Report: 2026-06-16 + +Status: evidence-backed local report for run ID `2026-06-16-clean-full-benchmark`; not production-scale + +## Environment + +| Field | Value | +| --- | --- | +| Git commit | `112ba0414fa55bb431e814441e85ebd925107347` | +| Dirty tree | yes | +| Machine | Apple M4 Pro, 12 logical CPUs, 24.00 GiB host memory | +| Docker resources | 12 CPUs, 7.65 GiB | +| OS | Darwin 25.5.0 arm64 | +| Node.js version | v25.3.0 | +| PostgreSQL version | 16.13 | +| Redis version | v=7.4.8 | +| Kafka version | 4.2.0 | +| PostgreSQL row count | 18364 raw events | +| Daily aggregate row count | 628 rows | +| Event partitions | 7 child partitions | +| k6 version | k6 v2.0.0+dirty (commit/8c3be52cc1-dirty, go1.26.3, linux/arm64) (Docker fallback image grafana/k6:2.0.0) | +| Dataset | local Docker dataset at report generation time | + +## Commands + +```bash +# Command matching the run-specific evidence files currently present in this report: +RUN_ID=2026-06-16-clean-full-benchmark pnpm benchmark +RUN_ID=2026-06-16-clean-full-benchmark pnpm benchmark:report -- --run-id 2026-06-16-clean-full-benchmark --force + +# Full-suite command, if you want every row populated: +RUN_ID=2026-06-16-clean-full-benchmark pnpm benchmark +``` + +The actual clean full benchmark run used conservative local settings to avoid recreating the previous multi-million-message Kafka backlog: + +```bash +RUN_ID=2026-06-16-clean-full-benchmark \ +TENANT_KEYS_FILE=tmp/clean-benchmark-tenants.json \ +API_URL=http://localhost:3001 \ +GRAPHQL_URL=http://localhost:3002/graphql \ +RATE=20 \ +DURATION=20s \ +BATCH_SIZE=20 \ +START_RATE=5 \ +PEAK_RATE=20 \ +RAMP_DURATION=5s \ +HOLD_DURATION=15s \ +RAMP_DOWN_DURATION=5s \ +VUS=10 \ +SLEEP_SECONDS=0 \ +BURST_RATE=20 \ +BURST_RAMP=5s \ +BURST_HOLD=15s \ +RECOVERY_RATE=5 \ +RECOVERY=10s \ +PREALLOCATED_VUS=30 \ +MAX_VUS=100 \ +EVENTS=200 \ +TIMEOUT_MS=120000 \ +POLL_MS=500 \ +WARM_ITERATIONS=10 \ +pnpm benchmark +``` + +Run-specific evidence files found for this report: ingest, hot, hotDb, dashboard, cache, worker, backpressure. +If only part of the suite was run, missing evidence stays marked as `not found` below. + +## Final Validation Gates + +| Gate | Result | Evidence | +| --- | --- | --- | +| Pre-reset evidence preservation | passed | `docs/benchmarks/evidence/pre-reset-state-2026-06-16-final-validation.json` | +| Clean reset | passed | `docker compose down -v`, then `docker compose up -d --build` | +| Migration and seed | passed | `pnpm db:migrate && pnpm db:seed` | +| Health check | passed | `pnpm health`; PostgreSQL, Redis, Kafka, ingest API, GraphQL API, and worker healthy | +| Unit tests | passed | `pnpm test` | +| Integration tests | passed | `pnpm test:integration` | +| Load test | passed | `RUN_ID=2026-06-16-clean-test-load RATE=20 DURATION=10s BATCH_SIZE=20 pnpm test:load`; 201 requests, 0% HTTP failures, p95 4.42 ms | +| Typecheck | passed | `pnpm typecheck` | +| Lint | passed with warnings | `pnpm lint`; 0 errors, existing warnings remain | +| Full benchmark | passed | `RUN_ID=2026-06-16-clean-full-benchmark ... pnpm benchmark`; all suites produced evidence files | +| Query plans | passed | `RUN_ID=2026-06-16-clean-full-benchmark ./scripts/capture-query-plans.sh`; run-scoped files saved under `docs/query-plans/` | +| Final Kafka lag | passed | `pulseops-aggregators` lag was 0 across 3 `events-raw` partitions after validation | + +## Results + +| Test | Command | Throughput | p50 latency | p95 latency | p99 latency | Error rate | Kafka lag | DB notes | Result | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +| Ingest throughput | `pnpm benchmark:ingest` | 20.00 req/s | 3.39 ms | 8.02 ms | 10.54 ms | 0.00% | not measured by this k6 row | 400 requests | Measured; docs/benchmarks/evidence/ingest-throughput-2026-06-16-clean-full-benchmark.json | +| Hot tenant | `pnpm benchmark:hot-tenant` | 16.96 req/s | 4.12 ms | 6.83 ms | 8.21 ms | 0.00% | not measured by this k6 row | 424 requests | Measured; docs/benchmarks/evidence/hot-tenant-2026-06-16-clean-full-benchmark.json | +| Hot tenant DB evidence | `pnpm benchmark:hot-db` | 424 persisted hot-test events | n/a | n/a | n/a | 0 unmatched requests | 0 | hot raw count 0.17 ms; quiet raw count 0.06 ms; 0 waiting locks at snapshot; hot 349/424; max hot events/key 282 | Measured; docs/benchmarks/evidence/hot-tenant-db-2026-06-16-clean-full-benchmark.json | +| Dashboard query | `pnpm benchmark:dashboard` | 2115.89 req/s | 4.15 ms | 6.79 ms | 13.49 ms | 0.00% | not measured by this k6 row | 42324 requests | Measured; docs/benchmarks/evidence/dashboard-query-2026-06-16-clean-full-benchmark.json | +| Dashboard cache | `pnpm benchmark:cache` | n/a | 1.29 ms | 3.85 ms | not captured | 0 GraphQL errors | n/a | cold 115.26 ms, 10 warm iterations | Measured; docs/benchmarks/evidence/dashboard-cache-2026-06-16-clean-full-benchmark.json | +| Worker catch-up | `pnpm benchmark:worker` | 107.48 persisted events/s | n/a | n/a | n/a | 0 lost in run | 0 | 200 accepted / 200 persisted | Measured; docs/benchmarks/evidence/worker-catchup-2026-06-16-clean-full-benchmark.json | +| Backpressure | `pnpm benchmark:backpressure` | 16.28 req/s | 2.93 ms | 4.31 ms | 7.53 ms | 0.00% | not measured by this k6 row | 487 requests | Measured; docs/benchmarks/evidence/backpressure-2026-06-16-clean-full-benchmark.json | + +## Run-Scoped Query Plans + +| Query | Plan file | Observation | +| --- | --- | --- | +| clean-full-benchmark-aggregate-daily-dashboard | `docs/query-plans/2026-06-16-clean-full-benchmark-aggregate-daily-dashboard.md` | Captured for run ID 2026-06-16-clean-full-benchmark; read file for row counts and interpretation | +| clean-full-benchmark-graphql-cache-path | `docs/query-plans/2026-06-16-clean-full-benchmark-graphql-cache-path.md` | Captured for run ID 2026-06-16-clean-full-benchmark; read file for row counts and interpretation | +| clean-full-benchmark-materialized-dashboard | `docs/query-plans/2026-06-16-clean-full-benchmark-materialized-dashboard.md` | Captured for run ID 2026-06-16-clean-full-benchmark; read file for row counts and interpretation | +| clean-full-benchmark-partition-pruning-24h | `docs/query-plans/2026-06-16-clean-full-benchmark-partition-pruning-24h.md` | Captured for run ID 2026-06-16-clean-full-benchmark; read file for row counts and interpretation | +| clean-full-benchmark-partition-pruning-30d | `docs/query-plans/2026-06-16-clean-full-benchmark-partition-pruning-30d.md` | Captured for run ID 2026-06-16-clean-full-benchmark; read file for row counts and interpretation | +| clean-full-benchmark-tenant-dashboard-chosen-index | `docs/query-plans/2026-06-16-clean-full-benchmark-tenant-dashboard-chosen-index.md` | Captured for run ID 2026-06-16-clean-full-benchmark; read file for row counts and interpretation | +| clean-full-benchmark-tenant-dashboard-index-disabled | `docs/query-plans/2026-06-16-clean-full-benchmark-tenant-dashboard-index-disabled.md` | Captured for run ID 2026-06-16-clean-full-benchmark; read file for row counts and interpretation | + +## Reference Query Plans + +These saved EXPLAIN ANALYZE files are repository evidence, not generated by this benchmark report unless they explicitly mention run ID `2026-06-16-clean-full-benchmark`. + +| Query | Plan file | Observation | +| --- | --- | --- | +| aggregate-daily-dashboard | `docs/query-plans/2026-06-16-aggregate-daily-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-aggregate-daily-dashboard | `docs/query-plans/2026-06-16-final-benchmark-smoke-aggregate-daily-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-graphql-cache-path | `docs/query-plans/2026-06-16-final-benchmark-smoke-graphql-cache-path.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-materialized-dashboard | `docs/query-plans/2026-06-16-final-benchmark-smoke-materialized-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-partition-pruning-24h | `docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-24h.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-partition-pruning-30d | `docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-30d.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-tenant-dashboard-chosen-index | `docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-chosen-index.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-tenant-dashboard-index-disabled | `docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-index-disabled.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| materialized-dashboard | `docs/query-plans/2026-06-16-materialized-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| partition-pruning-24h | `docs/query-plans/2026-06-16-partition-pruning-24h.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| partition-pruning-30d | `docs/query-plans/2026-06-16-partition-pruning-30d.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| tenant-dashboard-chosen-index | `docs/query-plans/2026-06-16-tenant-dashboard-chosen-index.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| tenant-dashboard-index-disabled | `docs/query-plans/2026-06-16-tenant-dashboard-index-disabled.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | + +## Evidence Files + +| File | Description | +| --- | --- | +| `docs/benchmarks/evidence/ingest-throughput-2026-06-16-clean-full-benchmark.json` | Raw k6 ingest summary JSON | +| `docs/benchmarks/evidence/hot-tenant-2026-06-16-clean-full-benchmark.json` | Raw k6 hot-tenant summary JSON | +| `docs/benchmarks/evidence/hot-tenant-db-2026-06-16-clean-full-benchmark.json` | Hot-tenant PostgreSQL evidence JSON | +| `docs/benchmarks/evidence/dashboard-query-2026-06-16-clean-full-benchmark.json` | Raw k6 dashboard-query summary JSON | +| `docs/benchmarks/evidence/dashboard-cache-2026-06-16-clean-full-benchmark.json` | Cold/warm GraphQL cache JSON measurement | +| `docs/benchmarks/evidence/worker-catchup-2026-06-16-clean-full-benchmark.json` | Worker catch-up JSON measurement | +| `docs/benchmarks/evidence/backpressure-2026-06-16-clean-full-benchmark.json` | Raw k6 backpressure summary JSON | + +## Claims Allowed From This Run + +- The numbers in the table are local measurements for run ID `2026-06-16-clean-full-benchmark` only. +- Kafka decoupling can be discussed when ingest acceptance and worker catch-up or lag evidence are both present. +- Cache claims are limited to the cold/warm GraphQL measurement if the dashboard cache evidence file exists. +- Worker throughput claims are limited to the bounded worker catch-up workload if the worker evidence file exists. +- Hot-tenant database claims are limited to the aggregate-key pressure, representative EXPLAIN timings, reconciliation status, and after-run PostgreSQL snapshot in the hot-tenant DB evidence file if present. +- Query plan claims from this run require run-scoped files above. Otherwise cite the reference query-plan files separately. + +## Claims Not Supported By This Run + +- Do not claim production scale, production readiness, or a fixed capacity limit. +- Do not extrapolate beyond the exact workload, machine, Docker resources, and dataset above. +- Do not claim long-duration or million-event tenant-skew behavior unless that evidence file is present. +- Do not claim realistic cache hit ratio from a cold/warm smoke measurement. +- Do not claim Kafka lag limits beyond the captured lag evidence; this run's worker final lag was 0. +- The fallback k6 runner is pinned to `grafana/k6:2.0.0`; record a new exact version if you override it or use a local k6 binary. diff --git a/docs/benchmarks/2026-06-16-final-benchmark-smoke-pulseops-benchmark.md b/docs/benchmarks/2026-06-16-final-benchmark-smoke-pulseops-benchmark.md new file mode 100644 index 0000000..c4b35f0 --- /dev/null +++ b/docs/benchmarks/2026-06-16-final-benchmark-smoke-pulseops-benchmark.md @@ -0,0 +1,103 @@ +# PulseOps Benchmark Report: 2026-06-16 + +Status: evidence-backed local report for run ID `2026-06-16-final-benchmark-smoke`; not production-scale + +## Environment + +| Field | Value | +| --- | --- | +| Git commit | `112ba0414fa55bb431e814441e85ebd925107347` | +| Dirty tree | yes | +| Machine | Apple M4 Pro, 12 logical CPUs, 24.00 GiB host memory | +| Docker resources | 12 CPUs, 7.65 GiB | +| OS | Darwin 25.5.0 arm64 | +| Node.js version | v25.3.0 | +| PostgreSQL version | 16.13 | +| Redis version | v=7.4.8 | +| Kafka version | 4.2.0 | +| PostgreSQL row count | 623997 raw events | +| Daily aggregate row count | 620 rows | +| Event partitions | 7 child partitions | +| k6 version | k6 v2.0.0+dirty (commit/8c3be52cc1-dirty, go1.26.3, linux/arm64) (Docker fallback image grafana/k6:2.0.0) | +| Dataset | local Docker dataset at report generation time | + +## Commands + +```bash +# Command matching the run-specific evidence files currently present in this report: +RUN_ID=2026-06-16-final-benchmark-smoke pnpm benchmark +RUN_ID=2026-06-16-final-benchmark-smoke pnpm benchmark:report -- --run-id 2026-06-16-final-benchmark-smoke --force + +# Full-suite command, if you want every row populated: +RUN_ID=2026-06-16-final-benchmark-smoke pnpm benchmark +``` + +Run-specific evidence files found for this report: ingest, hot, hotDb, dashboard, cache, worker, backpressure. +If only part of the suite was run, missing evidence stays marked as `not found` below. + +## Results + +| Test | Command | Throughput | p50 latency | p95 latency | p99 latency | Error rate | Kafka lag | DB notes | Result | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +| Ingest throughput | `pnpm benchmark:ingest` | 14.96 req/s | 3.44 ms | 7.17 ms | 22.09 ms | 0.00% | not measured by this k6 row | 226 requests | Measured; docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-benchmark-smoke.json | +| Hot tenant | `pnpm benchmark:hot-tenant` | 12.45 req/s | 4.52 ms | 7.81 ms | 9.60 ms | 0.00% | not measured by this k6 row | 249 requests | Measured; docs/benchmarks/evidence/hot-tenant-2026-06-16-final-benchmark-smoke.json | +| Hot tenant DB evidence | `pnpm benchmark:hot-db` | 249 persisted hot-test events | n/a | n/a | n/a | 0 unmatched requests | 0 | hot raw count 0.58 ms; quiet raw count 0.12 ms; 0 waiting locks at snapshot; hot 201/249; max hot events/key 157 | Measured; docs/benchmarks/evidence/hot-tenant-db-2026-06-16-final-benchmark-smoke.json | +| Dashboard query | `pnpm benchmark:dashboard` | 44.88 req/s | 7.67 ms | 16.43 ms | 30.17 ms | 0.00% | not measured by this k6 row | 675 requests | Measured; docs/benchmarks/evidence/dashboard-query-2026-06-16-final-benchmark-smoke.json | +| Dashboard cache | `pnpm benchmark:cache` | n/a | 1.53 ms | 2.31 ms | not captured | 0 GraphQL errors | n/a | cold 32.34 ms, 5 warm iterations | Measured; docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-benchmark-smoke.json | +| Worker catch-up | `pnpm benchmark:worker` | 92.72 persisted events/s | n/a | n/a | n/a | 0 lost in run | 0 | 200 accepted / 200 persisted | Measured; docs/benchmarks/evidence/worker-catchup-2026-06-16-final-benchmark-smoke.json | +| Backpressure | `pnpm benchmark:backpressure` | 23.79 req/s | 3.36 ms | 18.40 ms | 47.65 ms | 0.00% | not measured by this k6 row | 474 requests | Measured; docs/benchmarks/evidence/backpressure-2026-06-16-final-benchmark-smoke.json | + +## Run-Scoped Query Plans + +| Query | Plan file | Observation | +| --- | --- | --- | +| final-benchmark-smoke-aggregate-daily-dashboard | `docs/query-plans/2026-06-16-final-benchmark-smoke-aggregate-daily-dashboard.md` | Captured for run ID 2026-06-16-final-benchmark-smoke; read file for row counts and interpretation | +| final-benchmark-smoke-graphql-cache-path | `docs/query-plans/2026-06-16-final-benchmark-smoke-graphql-cache-path.md` | Captured for run ID 2026-06-16-final-benchmark-smoke; read file for row counts and interpretation | +| final-benchmark-smoke-materialized-dashboard | `docs/query-plans/2026-06-16-final-benchmark-smoke-materialized-dashboard.md` | Captured for run ID 2026-06-16-final-benchmark-smoke; read file for row counts and interpretation | +| final-benchmark-smoke-partition-pruning-24h | `docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-24h.md` | Captured for run ID 2026-06-16-final-benchmark-smoke; read file for row counts and interpretation | +| final-benchmark-smoke-partition-pruning-30d | `docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-30d.md` | Captured for run ID 2026-06-16-final-benchmark-smoke; read file for row counts and interpretation | +| final-benchmark-smoke-tenant-dashboard-chosen-index | `docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-chosen-index.md` | Captured for run ID 2026-06-16-final-benchmark-smoke; read file for row counts and interpretation | +| final-benchmark-smoke-tenant-dashboard-index-disabled | `docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-index-disabled.md` | Captured for run ID 2026-06-16-final-benchmark-smoke; read file for row counts and interpretation | + +## Reference Query Plans + +These saved EXPLAIN ANALYZE files are repository evidence, not generated by this benchmark report unless they explicitly mention run ID `2026-06-16-final-benchmark-smoke`. + +| Query | Plan file | Observation | +| --- | --- | --- | +| aggregate-daily-dashboard | `docs/query-plans/2026-06-16-aggregate-daily-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| materialized-dashboard | `docs/query-plans/2026-06-16-materialized-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| partition-pruning-24h | `docs/query-plans/2026-06-16-partition-pruning-24h.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| partition-pruning-30d | `docs/query-plans/2026-06-16-partition-pruning-30d.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| tenant-dashboard-chosen-index | `docs/query-plans/2026-06-16-tenant-dashboard-chosen-index.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| tenant-dashboard-index-disabled | `docs/query-plans/2026-06-16-tenant-dashboard-index-disabled.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | + +## Evidence Files + +| File | Description | +| --- | --- | +| `docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-benchmark-smoke.json` | Raw k6 ingest summary JSON | +| `docs/benchmarks/evidence/hot-tenant-2026-06-16-final-benchmark-smoke.json` | Raw k6 hot-tenant summary JSON | +| `docs/benchmarks/evidence/hot-tenant-db-2026-06-16-final-benchmark-smoke.json` | Hot-tenant PostgreSQL evidence JSON | +| `docs/benchmarks/evidence/dashboard-query-2026-06-16-final-benchmark-smoke.json` | Raw k6 dashboard-query summary JSON | +| `docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-benchmark-smoke.json` | Cold/warm GraphQL cache JSON measurement | +| `docs/benchmarks/evidence/worker-catchup-2026-06-16-final-benchmark-smoke.json` | Worker catch-up JSON measurement | +| `docs/benchmarks/evidence/backpressure-2026-06-16-final-benchmark-smoke.json` | Raw k6 backpressure summary JSON | + +## Claims Allowed From This Run + +- The numbers in the table are local measurements for run ID `2026-06-16-final-benchmark-smoke` only. +- Kafka decoupling can be discussed when ingest acceptance and worker catch-up or lag evidence are both present. +- Cache claims are limited to the cold/warm GraphQL measurement if the dashboard cache evidence file exists. +- Worker throughput claims are limited to the bounded worker catch-up workload if the worker evidence file exists. +- Hot-tenant database claims are limited to the aggregate-key pressure, representative EXPLAIN timings, reconciliation status, and after-run PostgreSQL snapshot in the hot-tenant DB evidence file if present. +- Query plan claims from this run require run-scoped files above. Otherwise cite the reference query-plan files separately. + +## Claims Not Supported By This Run + +- Do not claim production scale, production readiness, or a fixed capacity limit. +- Do not extrapolate beyond the exact workload, machine, Docker resources, and dataset above. +- Do not claim long-duration or million-event tenant-skew behavior unless that evidence file is present. +- Do not claim realistic cache hit ratio from a cold/warm smoke measurement. +- Do not claim Kafka lag limits beyond the captured lag evidence; this run's worker final lag was 0. +- The fallback k6 runner is pinned to `grafana/k6:2.0.0`; record a new exact version if you override it or use a local k6 binary. diff --git a/docs/benchmarks/2026-06-16-final-validation-benchmark.md b/docs/benchmarks/2026-06-16-final-validation-benchmark.md new file mode 100644 index 0000000..d3e050b --- /dev/null +++ b/docs/benchmarks/2026-06-16-final-validation-benchmark.md @@ -0,0 +1,116 @@ +# PulseOps Benchmark Report: 2026-06-16 + +Status: partial local validation report for run ID `2026-06-16-final-validation-benchmark`; not production-scale + +## Environment + +| Field | Value | +| --- | --- | +| Git commit | `112ba0414fa55bb431e814441e85ebd925107347` | +| Dirty tree | yes | +| Machine | Apple M4 Pro, 12 logical CPUs, 24.00 GiB host memory | +| Docker resources | 12 CPUs, 7.65 GiB | +| OS | Darwin 25.5.0 arm64 | +| Node.js version | v25.3.0 | +| PostgreSQL version | 16.13 | +| Redis version | v=7.4.8 | +| Kafka version | 4.2.0 | +| PostgreSQL row count | 1254500 raw events | +| Daily aggregate row count | 1214 rows | +| Event partitions | 7 child partitions | +| k6 version | k6 v2.0.0+dirty (commit/8c3be52cc1-dirty, go1.26.3, linux/arm64) (Docker fallback image grafana/k6:2.0.0) | +| Dataset | local Docker dataset at report generation time | + +## Commands + +```bash +# Command matching the run-specific evidence files currently present in this report: +RUN_ID=2026-06-16-final-validation-benchmark node scripts/run-benchmark.js ingest hot dashboard cache +RUN_ID=2026-06-16-final-validation-benchmark pnpm benchmark:report -- --run-id 2026-06-16-final-validation-benchmark --force + +# Full-suite command, if you want every row populated: +RUN_ID=2026-06-16-final-validation-benchmark pnpm benchmark +``` + +Run-specific evidence files found for this report: ingest, hot, dashboard, cache. +If only part of the suite was run, missing evidence stays marked as `not found` below. + +Validation note: the full `RUN_ID=2026-06-16-final-validation-benchmark pnpm benchmark` command was attempted. It completed the ingest and hot-tenant k6 suites, then failed at `benchmark:hot-db` because `scripts/measure-hot-tenant-db.ts` waits for Kafka lag to reach zero before measuring persisted hot-tenant rows. The observed lag at failure was 10,663,074 messages across 3 `events-raw` partitions. Dashboard and cache suites were run separately afterward for the same run ID. This report is therefore evidence for the completed k6/cache suites and for the validation blocker, not a complete full-suite benchmark. + +## Validation Gates + +| Gate | Result | Evidence | +| --- | --- | --- | +| `pnpm test` | passed | Unit tests passed across GraphQL API, ingest API, and worker | +| `pnpm test:integration` | passed | Root integration tests plus ingest API and worker integration tests passed | +| `pnpm test:load` | passed | `docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-validation-test-load.json` | +| `pnpm db:verify:fresh` | passed | Fresh PostgreSQL 16 migration proof completed with expected partitions, indexes, materialized view, and migration ledger | +| `pnpm typecheck` | passed | TypeScript check passed across packages, services, and web | +| `pnpm lint` | passed with warnings | No lint errors; warnings remain for existing `any`/console usage | +| `pnpm health` | passed | PostgreSQL, Redis, Kafka, ingest API, GraphQL API, and worker reported healthy | +| `pnpm benchmark` | partial/fail | Ingest and hot-tenant k6 suites completed; `benchmark:hot-db` failed waiting for Kafka lag to reach zero | + +## Results + +| Test | Command | Throughput | p50 latency | p95 latency | p99 latency | Error rate | Kafka lag | DB notes | Result | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +| Ingest throughput | `pnpm benchmark:ingest` | 100.03 req/s | 2.76 ms | 4.50 ms | 6.75 ms | 0.00% | not measured by this k6 row | 1501 requests | Measured; docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-validation-benchmark.json | +| Hot tenant | `pnpm benchmark:hot-tenant` | 8.75 req/s | 1.82 ms | 3.65 ms | 7.89 ms | 0.00% | not measured by this k6 row | 175 requests | Measured; docs/benchmarks/evidence/hot-tenant-2026-06-16-final-validation-benchmark.json | +| Hot tenant DB evidence | `pnpm benchmark:hot-db` | not found | n/a | n/a | n/a | not found | not found | not found | Missing evidence file | +| Dashboard query | `pnpm benchmark:dashboard` | 1830.61 req/s | 4.50 ms | 6.67 ms | 12.17 ms | 0.00% | not measured by this k6 row | 27464 requests | Measured; docs/benchmarks/evidence/dashboard-query-2026-06-16-final-validation-benchmark.json | +| Dashboard cache | `pnpm benchmark:cache` | n/a | 2.07 ms | 10.83 ms | not captured | 0 GraphQL errors | n/a | cold 36.99 ms, 10 warm iterations | Measured; docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-validation-benchmark.json | +| Worker catch-up | `pnpm benchmark:worker` | not found | not found | not found | not found | not found | not found | not found | Missing evidence file | +| Backpressure | `pnpm benchmark:backpressure` | not found | not found | not found | not found | not found | not measured by this k6 row | not found | Missing evidence file | + +## Run-Scoped Query Plans + +| Query | Plan file | Observation | +| --- | --- | --- | +| not found | not found | not found | + +## Reference Query Plans + +These saved EXPLAIN ANALYZE files are repository evidence, not generated by this benchmark report unless they explicitly mention run ID `2026-06-16-final-validation-benchmark`. + +| Query | Plan file | Observation | +| --- | --- | --- | +| aggregate-daily-dashboard | `docs/query-plans/2026-06-16-aggregate-daily-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-aggregate-daily-dashboard | `docs/query-plans/2026-06-16-final-benchmark-smoke-aggregate-daily-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-graphql-cache-path | `docs/query-plans/2026-06-16-final-benchmark-smoke-graphql-cache-path.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-materialized-dashboard | `docs/query-plans/2026-06-16-final-benchmark-smoke-materialized-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-partition-pruning-24h | `docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-24h.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-partition-pruning-30d | `docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-30d.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-tenant-dashboard-chosen-index | `docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-chosen-index.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-tenant-dashboard-index-disabled | `docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-index-disabled.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| materialized-dashboard | `docs/query-plans/2026-06-16-materialized-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| partition-pruning-24h | `docs/query-plans/2026-06-16-partition-pruning-24h.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| partition-pruning-30d | `docs/query-plans/2026-06-16-partition-pruning-30d.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| tenant-dashboard-chosen-index | `docs/query-plans/2026-06-16-tenant-dashboard-chosen-index.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| tenant-dashboard-index-disabled | `docs/query-plans/2026-06-16-tenant-dashboard-index-disabled.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | + +## Evidence Files + +| File | Description | +| --- | --- | +| `docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-validation-benchmark.json` | Raw k6 ingest summary JSON | +| `docs/benchmarks/evidence/hot-tenant-2026-06-16-final-validation-benchmark.json` | Raw k6 hot-tenant summary JSON | +| `docs/benchmarks/evidence/dashboard-query-2026-06-16-final-validation-benchmark.json` | Raw k6 dashboard-query summary JSON | +| `docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-validation-benchmark.json` | Cold/warm GraphQL cache JSON measurement | + +## Claims Allowed From This Run + +- The numbers in the table are local measurements for run ID `2026-06-16-final-validation-benchmark` only. +- Kafka decoupling can be discussed when ingest acceptance and worker catch-up or lag evidence are both present. +- Cache claims are limited to the cold/warm GraphQL measurement if the dashboard cache evidence file exists. +- Worker throughput claims are limited to the bounded worker catch-up workload if the worker evidence file exists. +- Hot-tenant database claims are limited to the aggregate-key pressure, representative EXPLAIN timings, reconciliation status, and after-run PostgreSQL snapshot in the hot-tenant DB evidence file if present. +- Query plan claims from this run require run-scoped files above. Otherwise cite the reference query-plan files separately. + +## Claims Not Supported By This Run + +- Do not claim production scale, production readiness, or a fixed capacity limit. +- Do not extrapolate beyond the exact workload, machine, Docker resources, and dataset above. +- Do not claim long-duration or million-event tenant-skew behavior unless that evidence file is present. +- Do not claim realistic cache hit ratio from a cold/warm smoke measurement. +- Do not claim Kafka lag limits beyond the captured lag evidence. +- The fallback k6 runner is pinned to `grafana/k6:2.0.0`; record a new exact version if you override it or use a local k6 binary. diff --git a/docs/benchmarks/2026-06-16-hot-db-smoke-pulseops-benchmark.md b/docs/benchmarks/2026-06-16-hot-db-smoke-pulseops-benchmark.md new file mode 100644 index 0000000..8b4c275 --- /dev/null +++ b/docs/benchmarks/2026-06-16-hot-db-smoke-pulseops-benchmark.md @@ -0,0 +1,99 @@ +# PulseOps Benchmark Report: 2026-06-16 + +Status: evidence-backed local report for run ID `2026-06-16-hot-db-smoke`; not production-scale + +## Environment + +| Field | Value | +| --- | --- | +| Git commit | `112ba0414fa55bb431e814441e85ebd925107347` | +| Dirty tree | yes | +| Machine | Apple M4 Pro, 12 logical CPUs, 24.00 GiB host memory | +| Docker resources | 12 CPUs, 7.65 GiB | +| OS | Darwin 25.5.0 arm64 | +| Node.js version | v25.3.0 | +| PostgreSQL version | 16.13 | +| Redis version | v=7.4.8 | +| Kafka version | 4.2.0 | +| PostgreSQL row count | 623997 raw events | +| Daily aggregate row count | 620 rows | +| Event partitions | 7 child partitions | +| k6 version | k6 v2.0.0+dirty (commit/8c3be52cc1-dirty, go1.26.3, linux/arm64) (Docker fallback image grafana/k6:2.0.0) | +| Dataset | local Docker dataset at report generation time | + +## Commands + +```bash +# Command matching the run-specific evidence files currently present in this report: +RUN_ID=2026-06-16-hot-db-smoke node scripts/run-benchmark.js hot hotDb +RUN_ID=2026-06-16-hot-db-smoke pnpm benchmark:report -- --run-id 2026-06-16-hot-db-smoke --force + +# Full-suite command, if you want every row populated: +RUN_ID=2026-06-16-hot-db-smoke pnpm benchmark +``` + +Run-specific evidence files found for this report: hot, hotDb. +If only part of the suite was run, missing evidence stays marked as `not found` below. + +## Results + +| Test | Command | Throughput | p50 latency | p95 latency | p99 latency | Error rate | Kafka lag | DB notes | Result | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +| Ingest throughput | `pnpm benchmark:ingest` | not found | not found | not found | not found | not found | not measured by this k6 row | not found | Missing evidence file | +| Hot tenant | `pnpm benchmark:hot-tenant` | 16.20 req/s | 3.76 ms | 9.10 ms | 11.69 ms | 0.00% | not measured by this k6 row | 324 requests | Measured; docs/benchmarks/evidence/hot-tenant-2026-06-16-hot-db-smoke.json | +| Hot tenant DB evidence | `pnpm benchmark:hot-db` | 324 persisted hot-test events | n/a | n/a | n/a | 0 unmatched requests | 0 | hot raw count 0.29 ms; quiet raw count 0.07 ms; 0 waiting locks at snapshot; hot 243/324; max hot events/key 188 | Measured; docs/benchmarks/evidence/hot-tenant-db-2026-06-16-hot-db-smoke.json | +| Dashboard query | `pnpm benchmark:dashboard` | not found | not found | not found | not found | not found | not measured by this k6 row | not found | Missing evidence file | +| Dashboard cache | `pnpm benchmark:cache` | not found | not found | not found | not found | not found | not found | not found | Missing evidence file | +| Worker catch-up | `pnpm benchmark:worker` | not found | not found | not found | not found | not found | not found | not found | Missing evidence file | +| Backpressure | `pnpm benchmark:backpressure` | not found | not found | not found | not found | not found | not measured by this k6 row | not found | Missing evidence file | + +## Run-Scoped Query Plans + +| Query | Plan file | Observation | +| --- | --- | --- | +| not found | not found | not found | + +## Reference Query Plans + +These saved EXPLAIN ANALYZE files are repository evidence, not generated by this benchmark report unless they explicitly mention run ID `2026-06-16-hot-db-smoke`. + +| Query | Plan file | Observation | +| --- | --- | --- | +| aggregate-daily-dashboard | `docs/query-plans/2026-06-16-aggregate-daily-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-aggregate-daily-dashboard | `docs/query-plans/2026-06-16-final-benchmark-smoke-aggregate-daily-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-graphql-cache-path | `docs/query-plans/2026-06-16-final-benchmark-smoke-graphql-cache-path.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-materialized-dashboard | `docs/query-plans/2026-06-16-final-benchmark-smoke-materialized-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-partition-pruning-24h | `docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-24h.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-partition-pruning-30d | `docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-30d.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-tenant-dashboard-chosen-index | `docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-chosen-index.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| final-benchmark-smoke-tenant-dashboard-index-disabled | `docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-index-disabled.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| materialized-dashboard | `docs/query-plans/2026-06-16-materialized-dashboard.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| partition-pruning-24h | `docs/query-plans/2026-06-16-partition-pruning-24h.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| partition-pruning-30d | `docs/query-plans/2026-06-16-partition-pruning-30d.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| tenant-dashboard-chosen-index | `docs/query-plans/2026-06-16-tenant-dashboard-chosen-index.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | +| tenant-dashboard-index-disabled | `docs/query-plans/2026-06-16-tenant-dashboard-index-disabled.md` | Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run | + +## Evidence Files + +| File | Description | +| --- | --- | +| `docs/benchmarks/evidence/hot-tenant-2026-06-16-hot-db-smoke.json` | Raw k6 hot-tenant summary JSON | +| `docs/benchmarks/evidence/hot-tenant-db-2026-06-16-hot-db-smoke.json` | Hot-tenant PostgreSQL evidence JSON | + +## Claims Allowed From This Run + +- The numbers in the table are local measurements for run ID `2026-06-16-hot-db-smoke` only. +- Kafka decoupling can be discussed when ingest acceptance and worker catch-up or lag evidence are both present. +- Cache claims are limited to the cold/warm GraphQL measurement if the dashboard cache evidence file exists. +- Worker throughput claims are limited to the bounded worker catch-up workload if the worker evidence file exists. +- Hot-tenant database claims are limited to the aggregate-key pressure, representative EXPLAIN timings, reconciliation status, and after-run PostgreSQL snapshot in the hot-tenant DB evidence file if present. +- Query plan claims from this run require run-scoped files above. Otherwise cite the reference query-plan files separately. + +## Claims Not Supported By This Run + +- Do not claim production scale, production readiness, or a fixed capacity limit. +- Do not extrapolate beyond the exact workload, machine, Docker resources, and dataset above. +- Do not claim long-duration or million-event tenant-skew behavior unless that evidence file is present. +- Do not claim realistic cache hit ratio from a cold/warm smoke measurement. +- Do not claim Kafka lag limits beyond the captured lag evidence. +- The fallback k6 runner is pinned to `grafana/k6:2.0.0`; record a new exact version if you override it or use a local k6 binary. diff --git a/docs/benchmarks/2026-06-16-ingest-scale-pulseops-benchmark.md b/docs/benchmarks/2026-06-16-ingest-scale-pulseops-benchmark.md new file mode 100644 index 0000000..c8f295b --- /dev/null +++ b/docs/benchmarks/2026-06-16-ingest-scale-pulseops-benchmark.md @@ -0,0 +1,77 @@ +# PulseOps Ingest-Scale Benchmark Report: 2026-06-16 + +Status: local fixed-rate ingest evidence; not production-scale + +This report records the heavier ingest runs executed after the canonical end-to-end smoke benchmark. It should be read together with: + +- `docs/benchmarks/2026-06-16-final-benchmark-smoke-pulseops-benchmark.md` +- `docs/benchmarks/evidence/ingest-scale-snapshot-2026-06-16.json` + +## Environment + +| Field | Value | +| --- | --- | +| Git commit | `112ba0414fa55bb431e814441e85ebd925107347` | +| Dirty tree | yes | +| Machine | local macOS arm64 host | +| OS | Darwin 25.5.0 arm64 | +| Node.js version | v25.3.0 | +| Docker resources | 12 CPUs, 7.65 GiB | +| Services | Postgres, Redis, Kafka, ingest API, GraphQL API, and worker were running healthy before inspection | +| PostgreSQL rows at snapshot | 918,158 raw events | +| Kafka lag at snapshot | 10,254,305 messages for consumer group `pulseops-aggregators` | +| Snapshot evidence | `docs/benchmarks/evidence/ingest-scale-snapshot-2026-06-16.json` | + +## Commands Run + +```bash +pnpm benchmark:seed-tenants -- --tenants 100 --hot-tenants 1 --medium-tenants 10 --manifest tmp/bench-tenants-load.json + +RUN_ID=2026-06-16-ingest-100rps TENANT_KEYS_FILE=tmp/bench-tenants-load.json API_URL=http://localhost:3001 RATE=100 DURATION=2m PREALLOCATED_VUS=100 MAX_VUS=200 pnpm benchmark:ingest + +RUN_ID=2026-06-16-ingest-500rps TENANT_KEYS_FILE=tmp/bench-tenants-load.json API_URL=http://localhost:3001 RATE=500 DURATION=2m PREALLOCATED_VUS=200 MAX_VUS=600 pnpm benchmark:ingest + +RUN_ID=2026-06-16-ingest-1000rps TENANT_KEYS_FILE=tmp/bench-tenants-load.json API_URL=http://localhost:3001 RATE=1000 DURATION=5m PREALLOCATED_VUS=500 MAX_VUS=1400 pnpm benchmark:ingest +``` + +Each request used the batch ingest endpoint with 50 events per request. + +## Results + +| Test | Target | Actual request rate | Requests | Attempted events from completed requests | HTTP failed rate | Dropped iterations | p50 | p95 | p99 | Evidence file | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | +| Ingest 100 RPS | 100 req/s for 2m | 100.00 req/s | 12,000 | 600,000 | 0.000% | 0 | 3.17 ms | 10.95 ms | 86.58 ms | `docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-100rps.json` | +| Ingest 500 RPS | 500 req/s for 2m | 490.36 req/s | 58,882 | 2,944,100 | 0.000% | 1,119 | 7.74 ms | 716.94 ms | 1491.47 ms | `docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-500rps.json` | +| Ingest 1000 RPS | 1000 req/s for 5m | 469.37 req/s | 141,102 | 7,055,100 | 0.739% | 158,898 | 2684.79 ms | 4511.56 ms | 15000.34 ms | `docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-1000rps.json` | + +## Persistence And Lag Snapshot + +At `2026-06-16T20:30:46Z`, PostgreSQL contained 918,158 total raw event rows. Of the three fixed-rate ingest run IDs, only `2026-06-16-ingest-100rps` had visible rows at that point: 293,630 persisted events. The 500 RPS and 1000 RPS run IDs had not appeared in PostgreSQL yet. + +Kafka lag for `pulseops-aggregators` on `events-raw` was 10,254,305 messages across 3 partitions. This means these runs are evidence of HTTP ingest pressure and queue backpressure, not evidence that every attempted event had already been written to PostgreSQL or rolled into aggregates. + +## Interpretation + +The 100 RPS run was clean at the HTTP layer: no dropped iterations, no k6 HTTP failures, and p95 under 11 ms. + +The 500 RPS run completed without k6 HTTP failures, but it missed 1,119 scheduled iterations and p95 climbed to about 717 ms. That is a useful local saturation signal, not a capacity claim. + +The 1000 RPS run did not sustain the requested rate. It averaged about 469 req/s, dropped 158,898 scheduled iterations, and had a 0.739% HTTP failure rate. p99 reached about 15 seconds. This is evidence that local coordination broke down before the requested 1000 req/s target. + +The important article-relevant observation is that accepting events through HTTP is only the first stage. Under high batch load, the bottleneck moved into Kafka lag and worker/database catch-up. A single incoming event still had to be published, consumed, written, aggregated, and eventually served through dashboard paths. + +## Safe Claims + +- In this local run, PulseOps handled the 100 RPS batch-ingest test cleanly at the HTTP layer: 12,000 requests over 2 minutes, 0 k6 HTTP failures, p95 10.95 ms. +- In this local run, the 500 RPS batch-ingest test showed stress: 58,882 completed requests, 1,119 dropped iterations, and p95 716.94 ms. +- In this local run, the 1000 RPS target was not sustained: actual request rate was 469.37 req/s, dropped iterations were 158,898, HTTP failure rate was 0.739%, and p99 was 15000.34 ms. +- The benchmark demonstrates coordination pressure between HTTP ingest, Kafka, worker processing, and PostgreSQL persistence. Kafka lag reached 10,254,305 messages at the captured snapshot. +- These results are project-scale, local Docker measurements on the recorded machine and commit. + +## Unsafe Claims + +- Do not claim PulseOps supports 1000 RPS end-to-end. This run did not sustain that target. +- Do not claim all attempted events were persisted or aggregated. The snapshot shows a large Kafka backlog. +- Do not claim production readiness, production scale, or a universal Postgres capacity limit. +- Do not use the HTTP request rate as database write throughput. The worker and PostgreSQL path was still catching up. +- Do not extrapolate these numbers to other machines, cloud environments, or longer durations. diff --git a/docs/benchmarks/2026-06-16-pulseops-benchmark.md b/docs/benchmarks/2026-06-16-pulseops-benchmark.md new file mode 100644 index 0000000..5e89eb9 --- /dev/null +++ b/docs/benchmarks/2026-06-16-pulseops-benchmark.md @@ -0,0 +1,9 @@ +# PulseOps Benchmark Report: 2026-06-16 + +Status: superseded local smoke report; not production-scale + +This earlier report has been superseded by the canonical smoke report for run ID `2026-06-16-final-benchmark-smoke`: + +- `docs/benchmarks/2026-06-16-final-benchmark-smoke-pulseops-benchmark.md` + +The raw evidence files from earlier smoke runs remain in `docs/benchmarks/evidence/` for auditability, but public article claims should cite the canonical report above unless they explicitly discuss a named historical run. diff --git a/docs/benchmarks/YYYY-MM-DD-pulseops-benchmark.md b/docs/benchmarks/YYYY-MM-DD-pulseops-benchmark.md new file mode 100644 index 0000000..5e9468d --- /dev/null +++ b/docs/benchmarks/YYYY-MM-DD-pulseops-benchmark.md @@ -0,0 +1,79 @@ +# PulseOps Benchmark Report: YYYY-MM-DD + +Status: TBD / not run + +## Environment + +| Field | Value | +| --- | --- | +| Git commit | TBD | +| Dirty tree | TBD | +| Machine/container limits | TBD | +| Node.js version | TBD | +| PostgreSQL version | TBD | +| Redis version | TBD | +| Kafka version | TBD | +| k6 version | TBD | +| Dataset | TBD | + +## Workload + +| Parameter | Value | +| --- | --- | +| Tenants | TBD | +| Events | TBD | +| Days | TBD | +| Hot tenant ratio | TBD | +| Late arrival ratio | TBD | +| Duplicate ratio | TBD | +| Burst pattern | TBD | +| Generator command | TBD | + +## Results + +| Test | Command | Throughput | p50 latency | p95 latency | p99 latency | Error rate | Kafka lag | DB notes | Result | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +| Ingest throughput | `pnpm benchmark:ingest` | TBD | TBD | TBD | TBD | TBD | TBD | TBD | Not run | +| Hot tenant | `pnpm benchmark:hot-tenant` | TBD | TBD | TBD | TBD | TBD | TBD | TBD | Not run | +| Hot tenant DB evidence | `pnpm benchmark:hot-db -- --require-complete` | TBD | n/a | n/a | n/a | TBD | TBD | TBD | Not run | +| Dashboard query | `pnpm benchmark:dashboard` | TBD | TBD | TBD | TBD | TBD | TBD | TBD | Not run | +| Dashboard cache | `pnpm benchmark:cache` | n/a | TBD | TBD | TBD | TBD | n/a | TBD | Not run | +| Worker catch-up | `pnpm benchmark:worker` | TBD | n/a | n/a | n/a | TBD | TBD | TBD | Not run | +| Backpressure | `pnpm benchmark:backpressure` | TBD | TBD | TBD | TBD | TBD | TBD | TBD | Not run | + +## Tenant Skew + +| Tenant class | Tenant count | Event share | Observed throughput | p95 ingest latency | p95 dashboard latency | Notes | +| --- | --- | --- | --- | --- | --- | --- | +| Hot | TBD | TBD | TBD | TBD | TBD | TBD | +| Medium | TBD | TBD | TBD | TBD | TBD | TBD | +| Quiet | TBD | TBD | TBD | TBD | TBD | TBD | + +## Query Plans + +| Query | Plan file | Dataset notes | Observation | +| --- | --- | --- | --- | +| `metrics` | TBD | TBD | TBD | +| `eventCountsOverTime` | TBD | TBD | TBD | +| `recentEvents` | TBD | TBD | TBD | + +## Evidence Files + +| File | Description | +| --- | --- | +| TBD | Raw k6 summary JSON | +| TBD | Generator metadata | +| TBD | PostgreSQL query plan | +| TBD | Kafka lag snapshot | + +## Notes + +- TBD + +## Claims Allowed From This Run + +- TBD + +## Claims Not Supported By This Run + +- TBD diff --git a/docs/benchmarks/evidence/.gitkeep b/docs/benchmarks/evidence/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/docs/benchmarks/evidence/.gitkeep @@ -0,0 +1 @@ + diff --git a/docs/benchmarks/evidence/backpressure-2026-06-16-clean-full-benchmark.json b/docs/benchmarks/evidence/backpressure-2026-06-16-clean-full-benchmark.json new file mode 100644 index 0000000..b5635db --- /dev/null +++ b/docs/benchmarks/evidence/backpressure-2026-06-16-clean-full-benchmark.json @@ -0,0 +1,234 @@ +{ + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdErrTTY": false, + "testRunDurationMs": 29912.847667, + "isStdOutTTY": false + }, + "metrics": { + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0, + "p(95)": 0.9482205999999999, + "p(99)": 1.3305969999999996, + "max": 7.496041, + "avg": 0.08317154209445586, + "min": 0, + "med": 0 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 30, + "min": 30, + "max": 30 + } + }, + "data_received": { + "values": { + "count": 162658, + "rate": 5437.730362911756 + }, + "type": "counter", + "contains": "data" + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 487, + "rate": 16.280629829077114 + } + }, + "checks": { + "contains": "default", + "values": { + "rate": 1, + "passes": 974, + "fails": 0 + }, + "thresholds": { + "rate>0.80": { + "ok": true + } + }, + "type": "rate" + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "min": 2.6235, + "med": 3.332375, + "p(90)": 4.556650000000001, + "p(95)": 5.448616999999999, + "p(99)": 8.631620639999994, + "max": 66.971042, + "avg": 3.8013426694045163 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 487, + "rate": 16.280629829077114 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.076208, + "p(90)": 0.10151660000000001, + "p(95)": 0.11880419999999996, + "p(99)": 0.2062933799999999, + "max": 0.940917, + "avg": 0.08373614168377824, + "min": 0.050292 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "fails": 487, + "rate": 0, + "passes": 0 + } + }, + "http_req_duration": { + "values": { + "p(95)": 4.311312499999998, + "p(99)": 7.53126824, + "max": 65.593334, + "avg": 3.2748486632443528, + "min": 2.272, + "med": 2.9345, + "p(90)": 3.7153166 + }, + "type": "trend", + "contains": "time" + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "avg": 3.2748486632443528, + "min": 2.272, + "med": 2.9345, + "p(90)": 3.7153166, + "p(95)": 4.311312499999998, + "p(99)": 7.53126824, + "max": 65.593334 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 3738592, + "rate": 124982.81813952581 + } + }, + "http_req_tls_handshaking": { + "contains": "time", + "values": { + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + }, + "type": "trend" + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "min": 2.179375, + "med": 2.816834, + "p(90)": 3.5538418000000003, + "p(95)": 4.111575599999999, + "p(99)": 7.27933524, + "max": 64.551667, + "avg": 3.1380662484599573 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "max": 0.300792, + "avg": 0.05304627310061599, + "min": 0.029, + "med": 0.048875, + "p(90)": 0.072975, + "p(95)": 0.08757129999999999, + "p(99)": 0.14017276 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 1.5136236199999995, + "max": 7.596833, + "avg": 0.09211209856262816, + "min": 0.002708, + "med": 0.003667, + "p(90)": 0.006792, + "p(95)": 0.9940625999999998 + } + }, + "vus": { + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + }, + "type": "gauge" + } + }, + "root_group": { + "groups": [], + "checks": [ + { + "name": "accepted, limited, or saturated", + "path": "::accepted, limited, or saturated", + "id": "7e8df65fd154c2d3bd8d977f0ef1cf22", + "passes": 487, + "fails": 0 + }, + { + "fails": 0, + "name": "not a validation failure", + "path": "::not a validation failure", + "id": "f384cc847cef144de5644bbb7040d223", + "passes": 487 + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e" + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/backpressure-2026-06-16-final-benchmark-smoke.json b/docs/benchmarks/evidence/backpressure-2026-06-16-final-benchmark-smoke.json new file mode 100644 index 0000000..4361d9a --- /dev/null +++ b/docs/benchmarks/evidence/backpressure-2026-06-16-final-benchmark-smoke.json @@ -0,0 +1,234 @@ +{ + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 19922.694676 + }, + "metrics": { + "http_req_duration{expected_response:true}": { + "values": { + "med": 3.3619589999999997, + "p(90)": 13.4699206, + "p(95)": 18.40053979999999, + "p(99)": 47.64538066, + "max": 57.422042, + "avg": 6.224787793248941, + "min": 1.991333 + }, + "type": "trend", + "contains": "time" + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 474, + "rate": 23.79196226758457 + } + }, + "checks": { + "contains": "default", + "values": { + "rate": 1, + "passes": 948, + "fails": 0 + }, + "thresholds": { + "rate>0.80": { + "ok": true + } + }, + "type": "rate" + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "min": 0.001667, + "med": 0.004417, + "p(90)": 0.0127661, + "p(95)": 0.04003584999999992, + "p(99)": 1.3593387499999996, + "max": 12.503458, + "avg": 0.08828672573839647 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 474, + "rate": 23.79196226758457 + } + }, + "vus_max": { + "contains": "default", + "values": { + "value": 20, + "min": 20, + "max": 20 + }, + "type": "gauge" + }, + "http_req_waiting": { + "values": { + "max": 54.420125, + "avg": 6.088843510548529, + "min": 1.934, + "med": 3.255125, + "p(90)": 13.159383700000001, + "p(95)": 18.09756885, + "p(99)": 47.329381819999995 + }, + "type": "trend", + "contains": "time" + }, + "http_req_receiving": { + "contains": "time", + "values": { + "max": 1.178917, + "avg": 0.0862038544303798, + "min": 0.0275, + "med": 0.0634585, + "p(90)": 0.13185000000000002, + "p(95)": 0.19049559999999996, + "p(99)": 0.47439681999999966 + }, + "type": "trend" + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.0269375, + "p(90)": 0.06376240000000001, + "p(95)": 0.0923438499999999, + "p(99)": 0.38329566, + "max": 2.789083, + "avg": 0.04974042827004218, + "min": 0.013083 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 1874067, + "rate": 94066.94377832366 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 158308, + "rate": 7946.113845267465 + } + }, + "http_req_failed": { + "values": { + "passes": 0, + "fails": 474, + "rate": 0 + }, + "type": "rate", + "contains": "default" + }, + "iteration_duration": { + "values": { + "med": 104.83883349999999, + "p(90)": 117.4736706, + "p(95)": 122.81039024999998, + "p(99)": 150.44044866, + "max": 168.86275, + "avg": 108.05362107805904, + "min": 102.3855 + }, + "type": "trend", + "contains": "time" + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 47.64538066, + "max": 57.422042, + "avg": 6.224787793248941, + "min": 1.991333, + "med": 3.3619589999999997, + "p(90)": 13.4699206, + "p(95)": 18.40053979999999 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "min": 1, + "max": 4, + "value": 1 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 1.2892674999999993, + "max": 12.40875, + "avg": 0.07564504008438819 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0 + } + } + }, + "root_group": { + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted, limited, or saturated", + "path": "::accepted, limited, or saturated", + "id": "7e8df65fd154c2d3bd8d977f0ef1cf22", + "passes": 474, + "fails": 0 + }, + { + "id": "f384cc847cef144de5644bbb7040d223", + "passes": 474, + "fails": 0, + "name": "not a validation failure", + "path": "::not a validation failure" + } + ], + "name": "" + }, + "options": { + "summaryTimeUnit": "", + "noColor": false, + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ] + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/backpressure-2026-06-16-harness-smoke.json b/docs/benchmarks/evidence/backpressure-2026-06-16-harness-smoke.json new file mode 100644 index 0000000..1450bd0 --- /dev/null +++ b/docs/benchmarks/evidence/backpressure-2026-06-16-harness-smoke.json @@ -0,0 +1,234 @@ +{ + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 3001.501085 + }, + "metrics": { + "http_req_tls_handshaking": { + "contains": "time", + "values": { + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + }, + "type": "trend" + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "min": 0.004333, + "med": 2.2908125, + "p(90)": 4.1199961, + "p(95)": 4.34864405, + "p(99)": 4.53156241, + "max": 4.577292, + "avg": 2.2908125 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 2, + "rate": 0.6663332590466147 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0.2009959, + "p(95)": 0.20899795, + "p(99)": 0.21539959, + "max": 0.217, + "avg": 0.1369795, + "min": 0.056959, + "med": 0.1369795 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 668, + "rate": 222.5553085215693 + } + }, + "checks": { + "contains": "default", + "values": { + "rate": 1, + "passes": 4, + "fails": 0 + }, + "thresholds": { + "rate>0.80": { + "ok": true + } + }, + "type": "rate" + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "med": 1.8042085, + "p(90)": 3.2475753000000003, + "p(95)": 3.4279961500000002, + "p(99)": 3.57233283, + "max": 3.608417, + "avg": 1.8042085, + "min": 0 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.286271, + "min": 0.029917, + "med": 0.28627100000000005, + "p(90)": 0.4913542000000001, + "p(95)": 0.5169896, + "p(99)": 0.5374979200000001, + "max": 0.542625 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 9.78118617, + "max": 9.839167, + "avg": 6.9401255, + "min": 4.041084, + "med": 6.9401255, + "p(90)": 9.2593587, + "p(95)": 9.54926285 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "fails": 2, + "rate": 0, + "passes": 0 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "rate": 0.6663332590466147, + "count": 2 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 6.9401255, + "min": 4.041084, + "med": 6.9401255, + "p(90)": 9.2593587, + "p(95)": 9.54926285, + "p(99)": 9.78118617, + "max": 9.839167 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 6.516875, + "min": 3.954208, + "med": 6.516875000000001, + "p(90)": 8.567008600000001, + "p(95)": 8.8232753, + "p(99)": 9.028288660000001, + "max": 9.079542 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "max": 16.842333, + "avg": 10.6680205, + "min": 4.493708, + "med": 10.6680205, + "p(90)": 15.6074705, + "p(95)": 16.22490175, + "p(99)": 16.71884675 + } + }, + "data_sent": { + "values": { + "count": 6626, + "rate": 2207.5620872214345 + }, + "type": "counter", + "contains": "data" + } + }, + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted, limited, or saturated", + "path": "::accepted, limited, or saturated", + "id": "7e8df65fd154c2d3bd8d977f0ef1cf22", + "passes": 2, + "fails": 0 + }, + { + "name": "not a validation failure", + "path": "::not a validation failure", + "id": "f384cc847cef144de5644bbb7040d223", + "passes": 2, + "fails": 0 + } + ] + }, + "options": { + "noColor": false, + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "" + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/backpressure-2026-06-16-harness-smoke2.json b/docs/benchmarks/evidence/backpressure-2026-06-16-harness-smoke2.json new file mode 100644 index 0000000..3b1485d --- /dev/null +++ b/docs/benchmarks/evidence/backpressure-2026-06-16-harness-smoke2.json @@ -0,0 +1,234 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "path": "::accepted, limited, or saturated", + "id": "7e8df65fd154c2d3bd8d977f0ef1cf22", + "passes": 2, + "fails": 0, + "name": "accepted, limited, or saturated" + }, + { + "name": "not a validation failure", + "path": "::not a validation failure", + "id": "f384cc847cef144de5644bbb7040d223", + "passes": 2, + "fails": 0 + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdErrTTY": false, + "testRunDurationMs": 3001.770668, + "isStdOutTTY": false + }, + "metrics": { + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 1 + } + }, + "http_req_blocked": { + "values": { + "avg": 1.2876875, + "min": 0.028833, + "med": 1.2876875, + "p(90)": 2.2947711, + "p(95)": 2.42065655, + "p(99)": 2.52136491, + "max": 2.546542 + }, + "type": "trend", + "contains": "time" + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 4, + "fails": 0 + }, + "thresholds": { + "rate>0.80": { + "ok": true + } + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 8.99447042, + "max": 9.028375, + "avg": 7.333146, + "min": 5.637917, + "med": 7.333146, + "p(90)": 8.689329200000001, + "p(95)": 8.8588521 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.8693125, + "min": 0, + "med": 0.8693125, + "p(90)": 1.5647625, + "p(95)": 1.65169375, + "p(99)": 1.7212387500000002, + "max": 1.738625 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 2, + "rate": 0.6662734169937595 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 8.430120800000001, + "p(95)": 8.6007479, + "p(99)": 8.73724958, + "max": 8.771375, + "avg": 7.065104, + "min": 5.358833, + "med": 7.065104 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "min": 10.616375, + "med": 11.3432085, + "p(90)": 11.9246753, + "p(95)": 11.99735865, + "p(99)": 12.05550533, + "max": 12.070042, + "avg": 11.3432085 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "fails": 2, + "rate": 0, + "passes": 0 + } + }, + "http_req_receiving": { + "values": { + "p(95)": 0.1770771, + "p(99)": 0.18121542, + "max": 0.18225, + "avg": 0.130521, + "min": 0.078792, + "med": 0.130521, + "p(90)": 0.1719042 + }, + "type": "trend", + "contains": "time" + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 668, + "rate": 222.5353212759157 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 2, + "rate": 0.6662734169937595 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.137521, + "min": 0.07475, + "med": 0.137521, + "p(90)": 0.18773779999999998, + "p(95)": 0.1940149, + "p(99)": 0.19903658, + "max": 0.200292 + } + }, + "http_req_tls_handshaking": { + "contains": "time", + "values": { + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0 + }, + "type": "trend" + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 8.689329200000001, + "p(95)": 8.8588521, + "p(99)": 8.99447042, + "max": 9.028375, + "avg": 7.333146, + "min": 5.637917, + "med": 7.333146 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "rate": 2214.026564670263, + "count": 6646 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/backpressure-2026-06-16-harness-smoke3.json b/docs/benchmarks/evidence/backpressure-2026-06-16-harness-smoke3.json new file mode 100644 index 0000000..2a4e7ed --- /dev/null +++ b/docs/benchmarks/evidence/backpressure-2026-06-16-harness-smoke3.json @@ -0,0 +1,234 @@ +{ + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 3005.784584 + }, + "metrics": { + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 1 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 9.7667708, + "p(99)": 10.15575416, + "max": 10.253, + "avg": 6.336527666666666, + "min": 3.365875, + "med": 5.390708, + "p(90)": 9.280541600000001 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 3, + "rate": 0.9980755161129005 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 10.57197782, + "max": 10.672542, + "avg": 6.587666666666666, + "min": 3.446125, + "med": 5.644333, + "p(90)": 9.6669002, + "p(95)": 10.1697211 + } + }, + "data_received": { + "values": { + "rate": 333.35722238170877, + "count": 1002 + }, + "type": "counter", + "contains": "data" + }, + "http_req_blocked": { + "values": { + "p(95)": 3.2751499999999996, + "p(99)": 3.5654299999999997, + "max": 3.638, + "avg": 1.2180833333333332, + "min": 0.00675, + "med": 0.0095, + "p(90)": 2.9123 + }, + "type": "trend", + "contains": "time" + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 2.0165246999999997, + "p(99)": 2.19577134, + "max": 2.240583, + "avg": 0.746861, + "min": 0, + "med": 0, + "p(90)": 1.7924664000000001 + } + }, + "checks": { + "values": { + "rate": 1, + "passes": 6, + "fails": 0 + }, + "thresholds": { + "rate>0.80": { + "ok": true + } + }, + "type": "rate", + "contains": "default" + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 17.07175384, + "max": 17.290708, + "avg": 9.205805666666667, + "min": 3.983709, + "med": 6.343, + "p(90)": 15.101166399999999, + "p(95)": 16.1959372 + } + }, + "http_req_duration": { + "contains": "time", + "values": { + "p(99)": 10.57197782, + "max": 10.672542, + "avg": 6.587666666666666, + "min": 3.446125, + "med": 5.644333, + "p(90)": 9.6669002, + "p(95)": 10.1697211 + }, + "type": "trend" + }, + "http_req_receiving": { + "contains": "time", + "values": { + "avg": 0.128153, + "min": 0.062292, + "med": 0.149125, + "p(90)": 0.1682586, + "p(95)": 0.1706503, + "p(99)": 0.17256366, + "max": 0.173042 + }, + "type": "trend" + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 10003, + "rate": 3327.9164625591143 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.24366000000000002, + "max": 0.2465, + "avg": 0.12298599999999998, + "min": 0.017958, + "med": 0.1045, + "p(90)": 0.21810000000000002, + "p(95)": 0.2323 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 3 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 3, + "rate": 0.9980755161129005 + } + } + }, + "root_group": { + "groups": [], + "checks": [ + { + "name": "accepted, limited, or saturated", + "path": "::accepted, limited, or saturated", + "id": "7e8df65fd154c2d3bd8d977f0ef1cf22", + "passes": 3, + "fails": 0 + }, + { + "id": "f384cc847cef144de5644bbb7040d223", + "passes": 3, + "fails": 0, + "name": "not a validation failure", + "path": "::not a validation failure" + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e" + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/backpressure-2026-06-16-post-docs-benchmark.json b/docs/benchmarks/evidence/backpressure-2026-06-16-post-docs-benchmark.json new file mode 100644 index 0000000..c1ccab6 --- /dev/null +++ b/docs/benchmarks/evidence/backpressure-2026-06-16-post-docs-benchmark.json @@ -0,0 +1,234 @@ +{ + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 7111.285462 + }, + "metrics": { + "vus": { + "values": { + "value": 0, + "min": 0, + "max": 1 + }, + "type": "gauge", + "contains": "default" + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "fails": 0, + "rate": 1, + "passes": 64 + }, + "thresholds": { + "rate>0.80": { + "ok": true + } + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 8.51678995, + "p(99)": 8.61908375, + "max": 8.65175, + "avg": 6.038065187500001, + "min": 2.890834, + "med": 6.5285005, + "p(90)": 8.1496416 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 10639, + "rate": 1496.0726941494281 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "med": 0, + "p(90)": 1.1848708000000001, + "p(95)": 2.0596187499999994, + "p(99)": 2.27143546, + "max": 2.285334, + "avg": 0.27344271875000004, + "min": 0 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.32398252, + "max": 0.324667, + "avg": 0.13714325, + "min": 0.035375, + "med": 0.11418700000000001, + "p(90)": 0.2620544, + "p(95)": 0.3052484 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "rate": 9295.928331557674, + "count": 66106 + } + }, + "iteration_duration": { + "values": { + "p(99)": 112.86605048, + "max": 113.154751, + "avg": 108.49288806250001, + "min": 103.392042, + "med": 109.1583545, + "p(90)": 110.8835786, + "p(95)": 111.53350655 + }, + "type": "trend", + "contains": "time" + }, + "http_req_sending": { + "values": { + "p(90)": 0.13019650000000002, + "p(95)": 0.17163939999999994, + "p(99)": 0.19683523, + "max": 0.200917, + "avg": 0.06386212499999999, + "min": 0.020459, + "med": 0.051271 + }, + "type": "trend", + "contains": "time" + }, + "vus_max": { + "values": { + "value": 5, + "min": 5, + "max": 5 + }, + "type": "gauge", + "contains": "default" + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 8.51678995, + "p(99)": 8.61908375, + "max": 8.65175, + "avg": 6.038065187500001, + "min": 2.890834, + "med": 6.5285005, + "p(90)": 8.1496416 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.33386446875000003, + "min": 0.003666, + "med": 0.0083955, + "p(90)": 1.9580709000000007, + "p(95)": 2.3666020499999996, + "p(99)": 2.4800650600000003, + "max": 2.505291 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 32, + "rate": 4.499889671283175 + } + }, + "http_req_waiting": { + "values": { + "p(99)": 8.44010977, + "max": 8.498958, + "avg": 5.8370598125000015, + "min": 2.832708, + "med": 6.192646, + "p(90)": 7.9741253, + "p(95)": 8.29310625 + }, + "type": "trend", + "contains": "time" + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 32 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 32, + "rate": 4.499889671283175 + } + } + }, + "root_group": { + "checks": [ + { + "fails": 0, + "name": "accepted, limited, or saturated", + "path": "::accepted, limited, or saturated", + "id": "7e8df65fd154c2d3bd8d977f0ef1cf22", + "passes": 32 + }, + { + "name": "not a validation failure", + "path": "::not a validation failure", + "id": "f384cc847cef144de5644bbb7040d223", + "passes": 32, + "fails": 0 + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/backpressure-2026-06-16-smoke2.json b/docs/benchmarks/evidence/backpressure-2026-06-16-smoke2.json new file mode 100644 index 0000000..02722d4 --- /dev/null +++ b/docs/benchmarks/evidence/backpressure-2026-06-16-smoke2.json @@ -0,0 +1,234 @@ +{ + "root_group": { + "checks": [ + { + "passes": 124, + "fails": 0, + "name": "accepted, limited, or saturated", + "path": "::accepted, limited, or saturated", + "id": "7e8df65fd154c2d3bd8d977f0ef1cf22" + }, + { + "name": "not a validation failure", + "path": "::not a validation failure", + "id": "f384cc847cef144de5644bbb7040d223", + "passes": 124, + "fails": 0 + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 15001.508284 + }, + "metrics": { + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.0572085, + "p(90)": 0.1218874, + "p(95)": 0.18054995, + "p(99)": 0.22828859, + "max": 0.267792, + "avg": 0.07190695161290324, + "min": 0.026125 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "fails": 124, + "rate": 0, + "passes": 0 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "min": 5, + "max": 5, + "value": 5 + } + }, + "http_req_waiting": { + "contains": "time", + "values": { + "p(95)": 7.082547349999996, + "p(99)": 9.844391069999999, + "max": 24.96025, + "avg": 2.964021838709677, + "min": 1.537833, + "med": 2.2519790000000004, + "p(90)": 4.642550100000001 + }, + "type": "trend" + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.0281045, + "p(90)": 0.0633375, + "p(95)": 0.09022859999999998, + "p(99)": 0.14861674999999996, + "max": 0.805, + "avg": 0.04327216129032258, + "min": 0.018625 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.004375, + "p(90)": 0.014420500000000004, + "p(95)": 0.0928124999999996, + "p(99)": 1.05630536, + "max": 1.2595, + "avg": 0.05595465322580644, + "min": 0.002834 + } + }, + "http_req_tls_handshaking": { + "values": { + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0 + }, + "type": "trend", + "contains": "time" + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 248, + "fails": 0 + }, + "thresholds": { + "rate>0.80": { + "ok": true + } + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "med": 2.68825, + "p(90)": 5.795971400000002, + "p(95)": 8.0803937, + "p(99)": 13.211988249999996, + "max": 25.879333, + "avg": 3.5491895322580658, + "min": 1.781584 + } + }, + "http_req_connecting": { + "contains": "time", + "values": { + "avg": 0.03682996774193548, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0.9562419299999999, + "max": 0.970875 + }, + "type": "trend" + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 41282, + "rate": 2751.856627911855 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 124, + "rate": 8.265835518169421 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "data_sent": { + "values": { + "count": 62685, + "rate": 4178.579834326211 + }, + "type": "counter", + "contains": "data" + }, + "http_req_duration": { + "contains": "time", + "values": { + "p(99)": 10.726800569999998, + "max": 25.052709, + "avg": 3.0792009516129024, + "min": 1.596083, + "med": 2.332646, + "p(90)": 4.8163287000000015, + "p(95)": 7.369626899999996 + }, + "type": "trend" + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "avg": 3.0792009516129024, + "min": 1.596083, + "med": 2.332646, + "p(90)": 4.8163287000000015, + "p(95)": 7.369626899999996, + "p(99)": 10.726800569999998, + "max": 25.052709 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 124, + "rate": 8.265835518169421 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/dashboard-cache-2026-06-16-cache-smoke.json b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-cache-smoke.json new file mode 100644 index 0000000..fbbe987 --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-cache-smoke.json @@ -0,0 +1,122 @@ +{ + "run_id": "2026-06-16-cache-smoke", + "captured_at": "2026-06-16T19:07:55.533Z", + "graphql_url": "http://localhost:3002/graphql", + "org_id": "00000000-0000-0000-0000-000000000001", + "project_id": "00000000-0000-0000-0000-000000000002", + "date_range": { + "start": "2026-06-01", + "end": "2026-06-30" + }, + "cache_namespace": { + "version": "610634", + "deleted_keys_before_cold_run": [], + "keys_after_cold_run": [ + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v610634:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v610634:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v610634:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}" + ] + }, + "cold": { + "label": "cold-cache-miss", + "duration_ms": 74.53, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + "warm": [ + { + "label": "warm-cache-hit-1", + "duration_ms": 3.948, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + { + "label": "warm-cache-hit-2", + "duration_ms": 6.295, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + { + "label": "warm-cache-hit-3", + "duration_ms": 4.162, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + { + "label": "warm-cache-hit-4", + "duration_ms": 3.687, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + { + "label": "warm-cache-hit-5", + "duration_ms": 2.84, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + { + "label": "warm-cache-hit-6", + "duration_ms": 1.831, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + { + "label": "warm-cache-hit-7", + "duration_ms": 3.768, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + { + "label": "warm-cache-hit-8", + "duration_ms": 1.98, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + { + "label": "warm-cache-hit-9", + "duration_ms": 3.645, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + { + "label": "warm-cache-hit-10", + "duration_ms": 1.534, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + { + "label": "warm-cache-hit-11", + "duration_ms": 2.071, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + }, + { + "label": "warm-cache-hit-12", + "duration_ms": 1.254, + "status": 200, + "graphql_errors": 0, + "total_events": 128010 + } + ], + "summary": { + "warm_iterations": 12, + "warm_min_ms": 1.254, + "warm_median_ms": 2.84, + "warm_p95_ms": 6.295, + "warm_max_ms": 6.295, + "cold_to_warm_median_ratio": 26.24 + }, + "safe_claim_note": "This is a local cold-vs-warm GraphQL dashboard cache measurement. It is not a production cache-hit-ratio benchmark." +} diff --git a/docs/benchmarks/evidence/dashboard-cache-2026-06-16-clean-full-benchmark.json b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-clean-full-benchmark.json new file mode 100644 index 0000000..ace5d80 --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-clean-full-benchmark.json @@ -0,0 +1,108 @@ +{ + "run_id": "2026-06-16-clean-full-benchmark", + "captured_at": "2026-06-16T20:46:25.155Z", + "graphql_url": "http://localhost:3002/graphql", + "org_id": "00000000-0000-0000-0000-000000000001", + "project_id": "00000000-0000-0000-0000-000000000002", + "date_range": { + "start": "2026-06-01", + "end": "2026-06-30" + }, + "cache_namespace": { + "version": "0", + "deleted_keys_before_cold_run": [], + "keys_after_cold_run": [ + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v0:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v0:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v0:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}" + ] + }, + "cold": { + "label": "cold-cache-miss", + "duration_ms": 115.258, + "status": 200, + "graphql_errors": 0, + "total_events": 0 + }, + "warm": [ + { + "label": "warm-cache-hit-1", + "duration_ms": 2.163, + "status": 200, + "graphql_errors": 0, + "total_events": 0 + }, + { + "label": "warm-cache-hit-2", + "duration_ms": 3.852, + "status": 200, + "graphql_errors": 0, + "total_events": 0 + }, + { + "label": "warm-cache-hit-3", + "duration_ms": 1.703, + "status": 200, + "graphql_errors": 0, + "total_events": 0 + }, + { + "label": "warm-cache-hit-4", + "duration_ms": 1.27, + "status": 200, + "graphql_errors": 0, + "total_events": 0 + }, + { + "label": "warm-cache-hit-5", + "duration_ms": 1.086, + "status": 200, + "graphql_errors": 0, + "total_events": 0 + }, + { + "label": "warm-cache-hit-6", + "duration_ms": 1.288, + "status": 200, + "graphql_errors": 0, + "total_events": 0 + }, + { + "label": "warm-cache-hit-7", + "duration_ms": 1.873, + "status": 200, + "graphql_errors": 0, + "total_events": 0 + }, + { + "label": "warm-cache-hit-8", + "duration_ms": 1.388, + "status": 200, + "graphql_errors": 0, + "total_events": 0 + }, + { + "label": "warm-cache-hit-9", + "duration_ms": 1.233, + "status": 200, + "graphql_errors": 0, + "total_events": 0 + }, + { + "label": "warm-cache-hit-10", + "duration_ms": 0.927, + "status": 200, + "graphql_errors": 0, + "total_events": 0 + } + ], + "summary": { + "warm_iterations": 10, + "warm_min_ms": 0.927, + "warm_median_ms": 1.288, + "warm_p95_ms": 3.852, + "warm_max_ms": 3.852, + "cold_to_warm_median_ratio": 89.49 + }, + "safe_claim_note": "This is a local cold-vs-warm GraphQL dashboard cache measurement. It is not a production cache-hit-ratio benchmark." +} diff --git a/docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-benchmark-smoke.json b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-benchmark-smoke.json new file mode 100644 index 0000000..a956dea --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-benchmark-smoke.json @@ -0,0 +1,73 @@ +{ + "run_id": "2026-06-16-final-benchmark-smoke", + "captured_at": "2026-06-16T19:58:33.960Z", + "graphql_url": "http://localhost:3002/graphql", + "org_id": "00000000-0000-0000-0000-000000000001", + "project_id": "00000000-0000-0000-0000-000000000002", + "date_range": { + "start": "2026-06-01", + "end": "2026-06-30" + }, + "cache_namespace": { + "version": "611889", + "deleted_keys_before_cold_run": [], + "keys_after_cold_run": [ + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611889:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611889:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611889:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}" + ] + }, + "cold": { + "label": "cold-cache-miss", + "duration_ms": 32.337, + "status": 200, + "graphql_errors": 0, + "total_events": 128014 + }, + "warm": [ + { + "label": "warm-cache-hit-1", + "duration_ms": 1.812, + "status": 200, + "graphql_errors": 0, + "total_events": 128014 + }, + { + "label": "warm-cache-hit-2", + "duration_ms": 2.308, + "status": 200, + "graphql_errors": 0, + "total_events": 128014 + }, + { + "label": "warm-cache-hit-3", + "duration_ms": 1.397, + "status": 200, + "graphql_errors": 0, + "total_events": 128014 + }, + { + "label": "warm-cache-hit-4", + "duration_ms": 1.535, + "status": 200, + "graphql_errors": 0, + "total_events": 128014 + }, + { + "label": "warm-cache-hit-5", + "duration_ms": 1.395, + "status": 200, + "graphql_errors": 0, + "total_events": 128014 + } + ], + "summary": { + "warm_iterations": 5, + "warm_min_ms": 1.395, + "warm_median_ms": 1.535, + "warm_p95_ms": 2.308, + "warm_max_ms": 2.308, + "cold_to_warm_median_ratio": 21.07 + }, + "safe_claim_note": "This is a local cold-vs-warm GraphQL dashboard cache measurement. It is not a production cache-hit-ratio benchmark." +} diff --git a/docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-validation-benchmark.json b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-validation-benchmark.json new file mode 100644 index 0000000..da1e792 --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-validation-benchmark.json @@ -0,0 +1,117 @@ +{ + "run_id": "2026-06-16-final-validation-benchmark", + "captured_at": "2026-06-16T20:40:29.231Z", + "graphql_url": "http://localhost:3002/graphql", + "org_id": "00000000-0000-0000-0000-000000000001", + "project_id": "00000000-0000-0000-0000-000000000002", + "date_range": { + "start": "2026-06-01", + "end": "2026-06-30" + }, + "cache_namespace": { + "version": "612120", + "deleted_keys_before_cold_run": [ + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612120:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "events-series:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612120:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612120:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{\"segment\":\"pro\"}", + "events-series:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612120:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{\"segment\":\"pro\"}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612120:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{\"segment\":\"pro\"}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612120:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612120:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{\"segment\":\"pro\"}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612120:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}" + ], + "keys_after_cold_run": [ + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612120:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612120:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612120:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}" + ] + }, + "cold": { + "label": "cold-cache-miss", + "duration_ms": 36.989, + "status": 200, + "graphql_errors": 0, + "total_events": 128003 + }, + "warm": [ + { + "label": "warm-cache-hit-1", + "duration_ms": 2.656, + "status": 200, + "graphql_errors": 0, + "total_events": 128003 + }, + { + "label": "warm-cache-hit-2", + "duration_ms": 3.853, + "status": 200, + "graphql_errors": 0, + "total_events": 128003 + }, + { + "label": "warm-cache-hit-3", + "duration_ms": 10.834, + "status": 200, + "graphql_errors": 0, + "total_events": 128003 + }, + { + "label": "warm-cache-hit-4", + "duration_ms": 2.074, + "status": 200, + "graphql_errors": 0, + "total_events": 128003 + }, + { + "label": "warm-cache-hit-5", + "duration_ms": 1.498, + "status": 200, + "graphql_errors": 0, + "total_events": 128003 + }, + { + "label": "warm-cache-hit-6", + "duration_ms": 1.947, + "status": 200, + "graphql_errors": 0, + "total_events": 128003 + }, + { + "label": "warm-cache-hit-7", + "duration_ms": 2.452, + "status": 200, + "graphql_errors": 0, + "total_events": 128003 + }, + { + "label": "warm-cache-hit-8", + "duration_ms": 3.336, + "status": 200, + "graphql_errors": 0, + "total_events": 128003 + }, + { + "label": "warm-cache-hit-9", + "duration_ms": 1.57, + "status": 200, + "graphql_errors": 0, + "total_events": 128003 + }, + { + "label": "warm-cache-hit-10", + "duration_ms": 1.959, + "status": 200, + "graphql_errors": 0, + "total_events": 128003 + } + ], + "summary": { + "warm_iterations": 10, + "warm_min_ms": 1.498, + "warm_median_ms": 2.074, + "warm_p95_ms": 10.834, + "warm_max_ms": 10.834, + "cold_to_warm_median_ratio": 17.83 + }, + "safe_claim_note": "This is a local cold-vs-warm GraphQL dashboard cache measurement. It is not a production cache-hit-ratio benchmark." +} diff --git a/docs/benchmarks/evidence/dashboard-cache-2026-06-16-harness-smoke2.json b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-harness-smoke2.json new file mode 100644 index 0000000..3c1d68e --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-harness-smoke2.json @@ -0,0 +1,61 @@ +{ + "run_id": "2026-06-16-harness-smoke2", + "captured_at": "2026-06-16T19:17:12.654Z", + "graphql_url": "http://localhost:3002/graphql", + "org_id": "00000000-0000-0000-0000-000000000001", + "project_id": "00000000-0000-0000-0000-000000000002", + "date_range": { + "start": "2026-06-01", + "end": "2026-06-30" + }, + "cache_namespace": { + "version": "611719", + "deleted_keys_before_cold_run": [ + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611719:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{\"segment\":\"pro\"}", + "events-series:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611719:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611719:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611719:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611719:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611719:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{\"segment\":\"pro\"}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611719:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{\"segment\":\"pro\"}", + "events-series:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611719:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{\"segment\":\"pro\"}" + ], + "keys_after_cold_run": [ + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611719:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611719:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611719:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}" + ] + }, + "cold": { + "label": "cold-cache-miss", + "duration_ms": 34.01, + "status": 200, + "graphql_errors": 0, + "total_events": 129095 + }, + "warm": [ + { + "label": "warm-cache-hit-1", + "duration_ms": 1.633, + "status": 200, + "graphql_errors": 0, + "total_events": 129095 + }, + { + "label": "warm-cache-hit-2", + "duration_ms": 2.094, + "status": 200, + "graphql_errors": 0, + "total_events": 129095 + } + ], + "summary": { + "warm_iterations": 2, + "warm_min_ms": 1.633, + "warm_median_ms": 1.633, + "warm_p95_ms": 2.094, + "warm_max_ms": 2.094, + "cold_to_warm_median_ratio": 20.83 + }, + "safe_claim_note": "This is a local cold-vs-warm GraphQL dashboard cache measurement. It is not a production cache-hit-ratio benchmark." +} diff --git a/docs/benchmarks/evidence/dashboard-cache-2026-06-16-harness-smoke3.json b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-harness-smoke3.json new file mode 100644 index 0000000..aafb557 --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-harness-smoke3.json @@ -0,0 +1,61 @@ +{ + "run_id": "2026-06-16-harness-smoke3", + "captured_at": "2026-06-16T19:17:39.744Z", + "graphql_url": "http://localhost:3002/graphql", + "org_id": "00000000-0000-0000-0000-000000000001", + "project_id": "00000000-0000-0000-0000-000000000002", + "date_range": { + "start": "2026-06-01", + "end": "2026-06-30" + }, + "cache_namespace": { + "version": "611772", + "deleted_keys_before_cold_run": [ + "events-series:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611772:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611772:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{\"segment\":\"pro\"}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611772:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "events-series:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611772:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{\"segment\":\"pro\"}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611772:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{\"segment\":\"pro\"}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611772:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611772:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{\"segment\":\"pro\"}", + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611772:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}" + ], + "keys_after_cold_run": [ + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611772:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611772:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611772:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}" + ] + }, + "cold": { + "label": "cold-cache-miss", + "duration_ms": 33.37, + "status": 200, + "graphql_errors": 0, + "total_events": 129148 + }, + "warm": [ + { + "label": "warm-cache-hit-1", + "duration_ms": 2.324, + "status": 200, + "graphql_errors": 0, + "total_events": 129148 + }, + { + "label": "warm-cache-hit-2", + "duration_ms": 2.74, + "status": 200, + "graphql_errors": 0, + "total_events": 129148 + } + ], + "summary": { + "warm_iterations": 2, + "warm_min_ms": 2.324, + "warm_median_ms": 2.324, + "warm_p95_ms": 2.74, + "warm_max_ms": 2.74, + "cold_to_warm_median_ratio": 14.36 + }, + "safe_claim_note": "This is a local cold-vs-warm GraphQL dashboard cache measurement. It is not a production cache-hit-ratio benchmark." +} diff --git a/docs/benchmarks/evidence/dashboard-cache-2026-06-16-post-docs-benchmark.json b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-post-docs-benchmark.json new file mode 100644 index 0000000..8fac5e2 --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-cache-2026-06-16-post-docs-benchmark.json @@ -0,0 +1,52 @@ +{ + "run_id": "2026-06-16-post-docs-benchmark", + "captured_at": "2026-06-16T20:14:49.273Z", + "graphql_url": "http://localhost:3002/graphql", + "org_id": "00000000-0000-0000-0000-000000000001", + "project_id": "00000000-0000-0000-0000-000000000002", + "date_range": { + "start": "2026-06-01", + "end": "2026-06-30" + }, + "cache_namespace": { + "version": "612100", + "deleted_keys_before_cold_run": [], + "keys_after_cold_run": [ + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612100:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612100:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v612100:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}" + ] + }, + "cold": { + "label": "cold-cache-miss", + "duration_ms": 28.824, + "status": 200, + "graphql_errors": 0, + "total_events": 128014 + }, + "warm": [ + { + "label": "warm-cache-hit-1", + "duration_ms": 2.286, + "status": 200, + "graphql_errors": 0, + "total_events": 128014 + }, + { + "label": "warm-cache-hit-2", + "duration_ms": 2.816, + "status": 200, + "graphql_errors": 0, + "total_events": 128014 + } + ], + "summary": { + "warm_iterations": 2, + "warm_min_ms": 2.286, + "warm_median_ms": 2.286, + "warm_p95_ms": 2.816, + "warm_max_ms": 2.816, + "cold_to_warm_median_ratio": 12.61 + }, + "safe_claim_note": "This is a local cold-vs-warm GraphQL dashboard cache measurement. It is not a production cache-hit-ratio benchmark." +} diff --git a/docs/benchmarks/evidence/dashboard-cache-2026-06-16T19-16-42-486Z.json b/docs/benchmarks/evidence/dashboard-cache-2026-06-16T19-16-42-486Z.json new file mode 100644 index 0000000..07ed83f --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-cache-2026-06-16T19-16-42-486Z.json @@ -0,0 +1,61 @@ +{ + "run_id": "2026-06-16T19-16-42-486Z", + "captured_at": "2026-06-16T19:16:42.534Z", + "graphql_url": "http://localhost:3002/graphql", + "org_id": "00000000-0000-0000-0000-000000000001", + "project_id": "00000000-0000-0000-0000-000000000002", + "date_range": { + "start": "2026-06-01", + "end": "2026-06-30" + }, + "cache_namespace": { + "version": "611667", + "deleted_keys_before_cold_run": [ + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611667:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{\"segment\":\"pro\"}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611667:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{\"segment\":\"pro\"}", + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611667:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{\"segment\":\"pro\"}", + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611667:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "events-series:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611667:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{\"segment\":\"pro\"}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611667:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611667:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}", + "events-series:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611667:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}" + ], + "keys_after_cold_run": [ + "dau:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611667:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "total:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611667:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):{}", + "events:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002:v611667:Mon Jun 01 2026 00:00:00 GMT+0000 (Coordinated Universal Time):Tue Jun 30 2026 00:00:00 GMT+0000 (Coordinated Universal Time):all:{}" + ] + }, + "cold": { + "label": "cold-cache-miss", + "duration_ms": 26.706, + "status": 200, + "graphql_errors": 0, + "total_events": 129043 + }, + "warm": [ + { + "label": "warm-cache-hit-1", + "duration_ms": 2.854, + "status": 200, + "graphql_errors": 0, + "total_events": 129043 + }, + { + "label": "warm-cache-hit-2", + "duration_ms": 4.463, + "status": 200, + "graphql_errors": 0, + "total_events": 129043 + } + ], + "summary": { + "warm_iterations": 2, + "warm_min_ms": 2.854, + "warm_median_ms": 2.854, + "warm_p95_ms": 4.463, + "warm_max_ms": 4.463, + "cold_to_warm_median_ratio": 9.36 + }, + "safe_claim_note": "This is a local cold-vs-warm GraphQL dashboard cache measurement. It is not a production cache-hit-ratio benchmark." +} diff --git a/docs/benchmarks/evidence/dashboard-query-2026-06-16-clean-full-benchmark.json b/docs/benchmarks/evidence/dashboard-query-2026-06-16-clean-full-benchmark.json new file mode 100644 index 0000000..785d428 --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-query-2026-06-16-clean-full-benchmark.json @@ -0,0 +1,234 @@ +{ + "root_group": { + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "passes": 42324, + "fails": 0, + "name": "graphql status ok", + "path": "::graphql status ok", + "id": "5ebf21a86bad85f2506224456f9905d1" + }, + { + "path": "::graphql has no errors", + "id": "a51792f93b5c8e532ce92a5e740eaace", + "passes": 42324, + "fails": 0, + "name": "graphql has no errors" + } + ], + "name": "", + "path": "" + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 20002.906787 + }, + "metrics": { + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "passes": 0, + "fails": 42324, + "rate": 0 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 13.492146229999996, + "max": 187.0225, + "avg": 4.5905229968812025, + "min": 1.959625, + "med": 4.1537705, + "p(90)": 5.8140663, + "p(95)": 6.793154400000001 + } + }, + "checks": { + "contains": "default", + "values": { + "rate": 1, + "passes": 84648, + "fails": 0 + }, + "type": "rate" + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.04393024999999995, + "max": 4.135584, + "avg": 0.00708520558075772, + "min": 0.001292, + "med": 0.0035, + "p(90)": 0.0075, + "p(95)": 0.011333849999999998 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 10, + "min": 10, + "max": 10 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 4.721434055807551, + "min": 2.060917, + "med": 4.272270499999999, + "p(90)": 5.9811039, + "p(95)": 6.983606249999999, + "p(99)": 13.868168729999988, + "max": 188.822208 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 42714080, + "rate": 2135393.643275892 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 10, + "min": 10, + "max": 10 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "rate": 2115.892477562641, + "count": 42324 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 4.560021254701874, + "min": 1.947791, + "med": 4.1266665, + "p(90)": 5.7752454, + "p(95)": 6.742798199999998, + "p(99)": 13.444725339999998, + "max": 186.953875 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 56682105, + "rate": 2833693.4028427317 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.0003741228853605519, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 2.459375 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "min": 1.959625, + "med": 4.1537705, + "p(90)": 5.8140663, + "p(95)": 6.793154400000001, + "p(99)": 13.492146229999996, + "max": 187.0225, + "avg": 4.5905229968812025 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.013834, + "p(90)": 0.039292, + "p(95)": 0.059125, + "p(99)": 0.1426087299999999, + "max": 4.935667, + "avg": 0.023416536598620166, + "min": 0.004 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "rate": 2115.892477562641, + "count": 42324 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.0019306113552593632, + "min": 0.000375, + "med": 0.000917, + "p(90)": 0.001791, + "p(95)": 0.00225, + "p(99)": 0.0059587699999999995, + "max": 2.555916 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/dashboard-query-2026-06-16-final-benchmark-smoke.json b/docs/benchmarks/evidence/dashboard-query-2026-06-16-final-benchmark-smoke.json new file mode 100644 index 0000000..03207fb --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-query-2026-06-16-final-benchmark-smoke.json @@ -0,0 +1,234 @@ +{ + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 15041.254715 + }, + "metrics": { + "data_received": { + "contains": "data", + "values": { + "count": 919859, + "rate": 61155.735836496664 + }, + "type": "counter" + }, + "vus_max": { + "values": { + "value": 5, + "min": 5, + "max": 5 + }, + "type": "gauge", + "contains": "default" + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 675, + "rate": 44.87657531168934 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 1350, + "fails": 0 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 5, + "min": 5, + "max": 5 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 675, + "rate": 44.87657531168934 + } + }, + "http_req_duration{expected_response:true}": { + "contains": "time", + "values": { + "avg": 9.878141422222214, + "min": 1.749375, + "med": 7.666125, + "p(90)": 14.621808, + "p(95)": 16.429596099999998, + "p(99)": 30.174868919999977, + "max": 168.916083 + }, + "type": "trend" + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.1320360799999994, + "max": 2.0355, + "avg": 0.019115202962963013, + "min": 0.000792, + "med": 0.004709, + "p(90)": 0.013366400000000002, + "p(95)": 0.016470399999999986 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 111.36970575555549, + "min": 102.726791, + "med": 109.457583, + "p(90)": 116.11631679999999, + "p(95)": 117.5569081, + "p(99)": 131.09011391999996, + "max": 272.38425 + } + }, + "http_req_duration": { + "values": { + "p(90)": 14.621808, + "p(95)": 16.429596099999998, + "p(99)": 30.174868919999977, + "max": 168.916083, + "avg": 9.878141422222214, + "min": 1.749375, + "med": 7.666125 + }, + "type": "trend", + "contains": "time" + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 16.283833199999997, + "p(99)": 29.75020857999998, + "max": 168.87225, + "avg": 9.783067399999997, + "min": 1.716083, + "med": 7.563334, + "p(90)": 14.561299799999999 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.045334, + "p(90)": 0.1163668, + "p(95)": 0.138458, + "p(99)": 0.22964391999999986, + "max": 0.418541, + "avg": 0.05870605481481483, + "min": 0.008459 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "passes": 0, + "fails": 675, + "rate": 0 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0.06259160000000001, + "p(95)": 0.10013369999999994, + "p(99)": 0.29772483999999994, + "max": 1.14575, + "avg": 0.036367967407407414, + "min": 0.002459, + "med": 0.019916 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 680766, + "rate": 45259.920990574086 + } + }, + "http_req_connecting": { + "values": { + "max": 1.589958, + "avg": 0.010103210370370371, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0 + }, + "type": "trend", + "contains": "time" + } + }, + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "id": "5ebf21a86bad85f2506224456f9905d1", + "passes": 675, + "fails": 0, + "name": "graphql status ok", + "path": "::graphql status ok" + }, + { + "path": "::graphql has no errors", + "id": "a51792f93b5c8e532ce92a5e740eaace", + "passes": 675, + "fails": 0, + "name": "graphql has no errors" + } + ] + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/dashboard-query-2026-06-16-final-validation-benchmark.json b/docs/benchmarks/evidence/dashboard-query-2026-06-16-final-validation-benchmark.json new file mode 100644 index 0000000..54fef6a --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-query-2026-06-16-final-validation-benchmark.json @@ -0,0 +1,234 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "graphql status ok", + "path": "::graphql status ok", + "id": "5ebf21a86bad85f2506224456f9905d1", + "passes": 27464, + "fails": 0 + }, + { + "name": "graphql has no errors", + "path": "::graphql has no errors", + "id": "a51792f93b5c8e532ce92a5e740eaace", + "passes": 27429, + "fails": 35 + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 15002.62609 + }, + "metrics": { + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 27464 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 6.668620549999999, + "p(99)": 12.172302079999996, + "max": 948.948167, + "avg": 5.302242337896887, + "min": 1.639875, + "med": 4.5048335, + "p(90)": 5.9598123 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0.0025, + "p(99)": 0.005958, + "max": 6.830708, + "avg": 0.003301870812700155, + "min": 0.000375, + "med": 0.001042, + "p(90)": 0.001959 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.19483120999999992, + "max": 3.433, + "avg": 0.027984046278765, + "min": 0.004584, + "med": 0.016125, + "p(90)": 0.044083, + "p(95)": 0.0679036999999999 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0.9993628022138071, + "passes": 54893, + "fails": 35 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 10, + "min": 10, + "max": 10 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "min": 0.001583, + "med": 0.004125, + "p(90)": 0.008958, + "p(95)": 0.014166, + "p(99)": 0.06482695999999995, + "max": 2.682834, + "avg": 0.008512481721526225 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 46484378, + "rate": 3098416.085366825 + } + }, + "data_sent": { + "values": { + "count": 27360373, + "rate": 1823705.5856665692 + }, + "type": "counter", + "contains": "data" + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "med": 4.646125, + "p(90)": 6.1478749, + "p(95)": 6.8870938, + "p(99)": 12.46574395999999, + "max": 949.563418, + "avg": 5.457138392076881, + "min": 1.8785 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 10, + "min": 10, + "max": 10 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "avg": 5.302242337896887, + "min": 1.639875, + "med": 4.5048335, + "p(90)": 5.9598123, + "p(95)": 6.668620549999999, + "p(99)": 12.172302079999996, + "max": 948.948167 + } + }, + "iterations": { + "values": { + "count": 27464, + "rate": 1830.6128430612644 + }, + "type": "counter", + "contains": "default" + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 3.737917, + "avg": 0.0008809381371977864, + "min": 0 + } + }, + "http_reqs": { + "contains": "default", + "values": { + "count": 27464, + "rate": 1830.6128430612644 + }, + "type": "counter" + }, + "http_req_waiting": { + "contains": "time", + "values": { + "max": 948.833876, + "avg": 5.265745809896575, + "min": 1.559417, + "med": 4.4737705000000005, + "p(90)": 5.9123624, + "p(95)": 6.607218699999999, + "p(99)": 12.103400539999997 + }, + "type": "trend" + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/dashboard-query-2026-06-16-harness-smoke.json b/docs/benchmarks/evidence/dashboard-query-2026-06-16-harness-smoke.json new file mode 100644 index 0000000..79aa489 --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-query-2026-06-16-harness-smoke.json @@ -0,0 +1,234 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "graphql status ok", + "path": "::graphql status ok", + "id": "5ebf21a86bad85f2506224456f9905d1", + "passes": 1199, + "fails": 0 + }, + { + "id": "a51792f93b5c8e532ce92a5e740eaace", + "passes": 1199, + "fails": 0, + "name": "graphql has no errors", + "path": "::graphql has no errors" + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 2001.500459 + }, + "metrics": { + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "avg": 1.531978262718932, + "min": 0.957583, + "med": 1.259666, + "p(90)": 1.6388496000000001, + "p(95)": 1.7993585999999997, + "p(99)": 2.301286839999999, + "max": 218.457917 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0.002042, + "p(99)": 0.0038348199999999996, + "max": 1.117083, + "avg": 0.00219199332777314, + "min": 0.0005, + "med": 0.001042, + "p(90)": 0.001708 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 2160710, + "rate": 1079545.0934243328 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 1199, + "rate": 599.050574586953 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 1199 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 1.531978262718932, + "min": 0.957583, + "med": 1.259666, + "p(90)": 1.6388496000000001, + "p(95)": 1.7993585999999997, + "p(99)": 2.301286839999999, + "max": 218.457917 + } + }, + "http_req_connecting": { + "contains": "time", + "values": { + "p(95)": 0, + "p(99)": 0, + "max": 0.88225, + "avg": 0.0007358215179316097, + "min": 0, + "med": 0, + "p(90)": 0 + }, + "type": "trend" + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.04507199999999998, + "max": 0.172583, + "avg": 0.005646801501251041, + "min": 0.001917, + "med": 0.004125, + "p(90)": 0.007042, + "p(95)": 0.009133399999999995 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "max": 0.14175, + "avg": 0.023329655546288602, + "min": 0.008166, + "med": 0.0205, + "p(90)": 0.03425, + "p(95)": 0.0425831, + "p(99)": 0.06459365999999997 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 1.5030018056713907, + "min": 0.941333, + "med": 1.236666, + "p(90)": 1.6039166, + "p(95)": 1.7576543999999996, + "p(99)": 2.26141934, + "max": 218.38575 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 1.6646851651376149, + "min": 1.040584, + "med": 1.373875, + "p(90)": 1.8073670000000002, + "p(95)": 1.9921041999999998, + "p(99)": 2.6853446599999997, + "max": 218.64425 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 2398, + "fails": 0 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 1194338, + "rate": 596721.322060911 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 1199, + "rate": 599.050574586953 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/dashboard-query-2026-06-16-harness-smoke2.json b/docs/benchmarks/evidence/dashboard-query-2026-06-16-harness-smoke2.json new file mode 100644 index 0000000..a8eed03 --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-query-2026-06-16-harness-smoke2.json @@ -0,0 +1,234 @@ +{ + "root_group": { + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "graphql status ok", + "path": "::graphql status ok", + "id": "5ebf21a86bad85f2506224456f9905d1", + "passes": 821, + "fails": 0 + }, + { + "name": "graphql has no errors", + "path": "::graphql has no errors", + "id": "a51792f93b5c8e532ce92a5e740eaace", + "passes": 821, + "fails": 0 + } + ], + "name": "" + }, + "options": { + "noColor": false, + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "" + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 2002.024417 + }, + "metrics": { + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 7.023399999999995, + "max": 243.4245, + "avg": 2.2334401802679684, + "min": 0.967125, + "med": 1.549042, + "p(90)": 2.634334, + "p(95)": 3.548583 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 821, + "rate": 410.08490856982394 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 821, + "rate": 410.08490856982394 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 817918, + "rate": 408545.46680586267 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 821 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "data_received": { + "values": { + "count": 1472126, + "rate": 735318.7041574429 + }, + "type": "counter", + "contains": "data" + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "min": 1, + "max": 1, + "value": 1 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "fails": 0, + "rate": 1, + "passes": 1642 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.036840191230207, + "min": 0.010333, + "med": 0.028459, + "p(90)": 0.060834, + "p(95)": 0.077291, + "p(99)": 0.16314199999999976, + "max": 0.513584 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "max": 243.4245, + "avg": 2.2334401802679684, + "min": 0.967125, + "med": 1.549042, + "p(90)": 2.634334, + "p(95)": 3.548583, + "p(99)": 7.023399999999995 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "max": 1.931583, + "avg": 0.005625806333739335, + "min": 0.000542, + "med": 0.001458, + "p(90)": 0.002583, + "p(95)": 0.003959, + "p(99)": 0.013749799999999984 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.0009370170523751522, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0.769291 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0.019416, + "p(99)": 0.07591719999999988, + "max": 14.867042, + "avg": 0.03181293544457972, + "min": 0.001958, + "med": 0.006084, + "p(90)": 0.012791 + } + }, + "iteration_duration": { + "contains": "time", + "values": { + "med": 1.700417, + "p(90)": 2.946041, + "p(95)": 3.931042, + "p(99)": 8.146558399999996, + "max": 243.662583, + "avg": 2.432673583434834, + "min": 1.079541 + }, + "type": "trend" + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 2.1647870535931815, + "min": 0.937834, + "med": 1.504583, + "p(90)": 2.53375, + "p(95)": 3.487125, + "p(99)": 6.914557799999995, + "max": 243.328917 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/dashboard-query-2026-06-16-harness-smoke3.json b/docs/benchmarks/evidence/dashboard-query-2026-06-16-harness-smoke3.json new file mode 100644 index 0000000..dedc3d1 --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-query-2026-06-16-harness-smoke3.json @@ -0,0 +1,234 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "fails": 0, + "name": "graphql status ok", + "path": "::graphql status ok", + "id": "5ebf21a86bad85f2506224456f9905d1", + "passes": 1339 + }, + { + "passes": 1339, + "fails": 0, + "name": "graphql has no errors", + "path": "::graphql has no errors", + "id": "a51792f93b5c8e532ce92a5e740eaace" + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 2000.894501 + }, + "metrics": { + "http_req_failed": { + "thresholds": { + "rate<0.05": { + "ok": true + } + }, + "type": "rate", + "contains": "default", + "values": { + "fails": 1339, + "rate": 0, + "passes": 0 + } + }, + "checks": { + "values": { + "rate": 1, + "passes": 2678, + "fails": 0 + }, + "type": "rate", + "contains": "default" + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "med": 1.053375, + "p(90)": 1.3132918, + "p(95)": 1.5250211999999999, + "p(99)": 2.0060605000000002, + "max": 255.550292, + "avg": 1.3350437274085158, + "min": 0.863334 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 2.74303904, + "max": 255.877125, + "avg": 1.4905709902912598, + "min": 0.95325, + "med": 1.174209, + "p(90)": 1.5760666000000003, + "p(95)": 1.8632083999999998 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 2399723, + "rate": 1199325.101248804 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.002958, + "p(90)": 0.0071334000000000015, + "p(95)": 0.01001669999999999, + "p(99)": 0.04830208, + "max": 1.657708, + "avg": 0.009596103808812567, + "min": 0.001458 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0.0020410999999999997, + "p(99)": 0.006898499999999996, + "max": 1.089625, + "avg": 0.0020174301717699626, + "min": 0.000416, + "med": 0.000833, + "p(90)": 0.0015 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 1334216, + "rate": 666809.768997411 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.0006400612397311426, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0.857042 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "max": 255.672084, + "avg": 1.3643372143390615, + "min": 0.877417, + "med": 1.076875, + "p(90)": 1.3452828000000001, + "p(95)": 1.573583, + "p(99)": 2.3653558799999983 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 1339, + "rate": 669.2006996524801 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0.0286752, + "p(95)": 0.03522529999999996, + "p(99)": 0.064715, + "max": 0.32825, + "avg": 0.01969738312173266, + "min": 0.006333, + "med": 0.016292 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "max": 255.672084, + "avg": 1.3643372143390615, + "min": 0.877417, + "med": 1.076875, + "p(90)": 1.3452828000000001, + "p(95)": 1.573583, + "p(99)": 2.3653558799999983 + } + }, + "iterations": { + "values": { + "count": 1339, + "rate": 669.2006996524801 + }, + "type": "counter", + "contains": "default" + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/dashboard-query-2026-06-16-post-docs-benchmark.json b/docs/benchmarks/evidence/dashboard-query-2026-06-16-post-docs-benchmark.json new file mode 100644 index 0000000..4dfd8e9 --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-query-2026-06-16-post-docs-benchmark.json @@ -0,0 +1,234 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "id": "5ebf21a86bad85f2506224456f9905d1", + "passes": 90, + "fails": 0, + "name": "graphql status ok", + "path": "::graphql status ok" + }, + { + "id": "a51792f93b5c8e532ce92a5e740eaace", + "passes": 90, + "fails": 0, + "name": "graphql has no errors", + "path": "::graphql has no errors" + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 5004.682752 + }, + "metrics": { + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0 + } + }, + "http_req_failed": { + "thresholds": { + "rate<0.05": { + "ok": true + } + }, + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 90 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 121123, + "rate": 24201.933669341204 + } + }, + "http_reqs": { + "values": { + "count": 90, + "rate": 17.983157866307046 + }, + "type": "counter", + "contains": "default" + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 13.699004500000001, + "p(95)": 18.162377599999992, + "p(99)": 76.70065387, + "max": 77.693375, + "avg": 9.573676377777778, + "min": 3.140958, + "med": 6.8149795 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 90761, + "rate": 18135.21545670993 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.02840651111111112, + "min": 0.008166, + "med": 0.0208335, + "p(90)": 0.0377375, + "p(95)": 0.06172484999999997, + "p(99)": 0.11966987999999978, + "max": 0.358375 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.00475, + "p(90)": 0.007954100000000002, + "p(95)": 0.012868899999999982, + "p(99)": 1.5866737499999997, + "max": 1.78125, + "avg": 0.049813511111111076, + "min": 0.001959 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "med": 108.0916665, + "p(90)": 115.4806587, + "p(95)": 120.23208735, + "p(99)": 181.10995564, + "max": 182.892291, + "avg": 111.14414636666669, + "min": 103.84925 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 90, + "rate": 17.983157866307046 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 9.470638855555556, + "min": 3.058458, + "med": 6.753458, + "p(90)": 13.5734042, + "p(95)": 18.068683149999995, + "p(99)": 76.49947374999999, + "max": 77.577375 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "max": 2, + "value": 2, + "min": 2 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 18.162377599999992, + "p(99)": 76.70065387, + "max": 77.693375, + "avg": 9.573676377777778, + "min": 3.140958, + "med": 6.8149795, + "p(90)": 13.699004500000001 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.0698125, + "p(90)": 0.107758, + "p(95)": 0.11835409999999998, + "p(99)": 0.19382337, + "max": 0.194417, + "avg": 0.07463101111111113, + "min": 0.015334 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 2, + "min": 2, + "max": 2 + } + }, + "checks": { + "values": { + "rate": 1, + "passes": 180, + "fails": 0 + }, + "type": "rate", + "contains": "default" + }, + "http_req_connecting": { + "contains": "time", + "values": { + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 1.1756901199999998, + "max": 1.305, + "avg": 0.027385644444444444 + }, + "type": "trend" + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/dashboard-query-2026-06-16-smoke2.json b/docs/benchmarks/evidence/dashboard-query-2026-06-16-smoke2.json new file mode 100644 index 0000000..1fe0ffc --- /dev/null +++ b/docs/benchmarks/evidence/dashboard-query-2026-06-16-smoke2.json @@ -0,0 +1,234 @@ +{ + "metrics": { + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 627763, + "rate": 62282.31454246815 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "min": 2, + "max": 2, + "value": 2 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "max": 305.201083, + "avg": 45.24255841950113, + "min": 1.479458, + "med": 4.053375, + "p(90)": 171.269458, + "p(95)": 197.346458, + "p(99)": 247.26915019999996 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 2, + "min": 2, + "max": 2 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.055125, + "p(90)": 0.119208, + "p(95)": 0.158417, + "p(99)": 0.2966999999999998, + "max": 1.231208, + "avg": 0.07260938321995472, + "min": 0.010417 + } + }, + "iterations": { + "contains": "default", + "values": { + "count": 441, + "rate": 43.752977976128655 + }, + "type": "counter" + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.01326002494331068, + "min": 0.001041, + "med": 0.002208, + "p(90)": 0.005083, + "p(95)": 0.006583, + "p(99)": 0.02098319999999999, + "max": 2.340542 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.00464191156462585, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 1.134833 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 441 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "min": 1.610458, + "med": 4.35775, + "p(90)": 171.577791, + "p(95)": 197.507625, + "p(99)": 247.47069199999996, + "max": 305.413958, + "avg": 45.52902857142855 + } + }, + "http_req_tls_handshaking": { + "values": { + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + }, + "type": "trend", + "contains": "time" + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 441, + "rate": 43.752977976128655 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 171.269458, + "p(95)": 197.346458, + "p(99)": 247.26915019999996, + "max": 305.201083, + "avg": 45.24255841950113, + "min": 1.479458, + "med": 4.053375 + } + }, + "http_req_waiting": { + "values": { + "p(99)": 247.16208379999998, + "max": 305.052666, + "avg": 45.15429712471654, + "min": 1.446542, + "med": 3.935292, + "p(90)": 171.176583, + "p(95)": 197.2945 + }, + "type": "trend", + "contains": "time" + }, + "http_req_sending": { + "contains": "time", + "values": { + "p(99)": 0.12118359999999995, + "max": 0.202875, + "avg": 0.015651911564625864, + "min": 0.00375, + "med": 0.010375, + "p(90)": 0.027083, + "p(95)": 0.038625 + }, + "type": "trend" + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 882, + "fails": 0 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 439330, + "rate": 43587.29209581089 + } + } + }, + "root_group": { + "groups": [], + "checks": [ + { + "name": "graphql status ok", + "path": "::graphql status ok", + "id": "5ebf21a86bad85f2506224456f9905d1", + "passes": 441, + "fails": 0 + }, + { + "name": "graphql has no errors", + "path": "::graphql has no errors", + "id": "a51792f93b5c8e532ce92a5e740eaace", + "passes": 441, + "fails": 0 + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e" + }, + "options": { + "noColor": false, + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "" + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 10079.313921 + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/graphql-metrics-2026-06-16.txt b/docs/benchmarks/evidence/graphql-metrics-2026-06-16.txt new file mode 100644 index 0000000..1296cb7 --- /dev/null +++ b/docs/benchmarks/evidence/graphql-metrics-2026-06-16.txt @@ -0,0 +1,20 @@ +# GraphQL Metrics Scrape: 2026-06-16 + +Command: two authenticated dashboard GraphQL queries followed by GET /metrics + +graphql_query_duration_seconds_count{operation="dailyActiveUsers"} 2 +graphql_query_duration_seconds_count{operation="totalEvents"} 2 +graphql_query_duration_seconds_count{operation="eventCounts"} 2 +graphql_query_duration_seconds_count{operation="metrics"} 2 +graphql_queries_total{operation="dailyActiveUsers"} 2 +graphql_queries_total{operation="totalEvents"} 2 +graphql_queries_total{operation="eventCounts"} 2 +graphql_queries_total{operation="metrics"} 2 +graphql_redis_cache_hits_total{query_type="total"} 1 +graphql_redis_cache_hits_total{query_type="dau"} 1 +graphql_redis_cache_hits_total{query_type="events"} 1 +graphql_redis_cache_misses_total{query_type="total"} 1 +graphql_redis_cache_misses_total{query_type="dau"} 1 +graphql_redis_cache_misses_total{query_type="events"} 1 +graphql_db_connections_active 0 +graphql_db_connections_idle 0 diff --git a/docs/benchmarks/evidence/hot-tenant-2026-06-16-clean-full-benchmark.json b/docs/benchmarks/evidence/hot-tenant-2026-06-16-clean-full-benchmark.json new file mode 100644 index 0000000..26c543f --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-2026-06-16-clean-full-benchmark.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 424, + "fails": 0 + } + ] + }, + "options": { + "summaryTimeUnit": "", + "noColor": false, + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ] + }, + "state": { + "isStdErrTTY": false, + "testRunDurationMs": 25002.394804, + "isStdOutTTY": false + }, + "metrics": { + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "max": 0.244417, + "avg": 0.04063621226415099, + "min": 0.016334, + "med": 0.0345205, + "p(90)": 0.06343750000000004, + "p(95)": 0.1104313, + "p(99)": 0.18743783999999997 + } + }, + "http_req_receiving": { + "values": { + "avg": 0.07860304716981134, + "min": 0.02975, + "med": 0.06475, + "p(90)": 0.1285793, + "p(95)": 0.1525965, + "p(99)": 0.3115144299999995, + "max": 0.648458 + }, + "type": "trend", + "contains": "time" + }, + "http_req_failed": { + "contains": "default", + "values": { + "fails": 424, + "rate": 0, + "passes": 0 + }, + "thresholds": { + "rate<0.10": { + "ok": true + } + }, + "type": "rate" + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "rate": 9259.87297676623, + "count": 231519 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "rate": 16.95837552057879, + "count": 424 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 8.0313348, + "p(99)": 9.950255839999997, + "max": 47.869542, + "avg": 4.684405827830188, + "min": 2.089333, + "med": 4.5166249999999994, + "p(90)": 7.0729249 + } + }, + "http_reqs": { + "contains": "default", + "values": { + "count": 424, + "rate": 16.95837552057879 + }, + "type": "counter" + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 158152, + "rate": 6325.474069175889 + } + }, + "http_req_duration{expected_response:true}": { + "contains": "time", + "values": { + "p(99)": 8.21078766, + "max": 37.358625, + "avg": 4.169799617924523, + "min": 1.935375, + "med": 4.1242505000000005, + "p(90)": 6.4160249, + "p(95)": 6.833537399999999 + }, + "type": "trend" + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0 + } + }, + "http_req_connecting": { + "values": { + "med": 0, + "p(90)": 0, + "p(95)": 1.1567338499999986, + "p(99)": 2.21384948, + "max": 3.671875, + "avg": 0.1173511179245283, + "min": 0 + }, + "type": "trend", + "contains": "time" + }, + "vus_max": { + "values": { + "value": 30, + "min": 30, + "max": 30 + }, + "type": "gauge", + "contains": "default" + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "max": 3.843042, + "avg": 0.13189324764150942, + "min": 0.002584, + "med": 0.006792, + "p(90)": 0.016583900000000006, + "p(95)": 1.2255186499999984, + "p(99)": 2.498090839999999 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 6.3261082, + "p(95)": 6.7037749, + "p(99)": 8.1073255, + "max": 37.181917, + "avg": 4.050560358490564, + "min": 1.885542, + "med": 3.981563 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "passes": 424, + "fails": 0, + "rate": 1 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 1 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 4.169799617924523, + "min": 1.935375, + "med": 4.1242505000000005, + "p(90)": 6.4160249, + "p(95)": 6.833537399999999, + "p(99)": 8.21078766, + "max": 37.358625 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/hot-tenant-2026-06-16-final-benchmark-smoke.json b/docs/benchmarks/evidence/hot-tenant-2026-06-16-final-benchmark-smoke.json new file mode 100644 index 0000000..6b38103 --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-2026-06-16-final-benchmark-smoke.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "checks": [ + { + "passes": 249, + "fails": 0, + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef" + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 19999.91937 + }, + "metrics": { + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "max": 30.186375, + "avg": 4.7443509076305235, + "min": 1.887792, + "med": 4.516417, + "p(90)": 7.0776498000000005, + "p(95)": 7.808541199999999, + "p(99)": 9.60227616 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "avg": 4.7443509076305235, + "min": 1.887792, + "med": 4.516417, + "p(90)": 7.0776498000000005, + "p(95)": 7.808541199999999, + "p(99)": 9.60227616, + "max": 30.186375 + } + }, + "http_reqs": { + "values": { + "rate": 12.45005019237735, + "count": 249 + }, + "type": "counter", + "contains": "default" + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "max": 4.387834, + "avg": 0.25892785140562247, + "min": 0.003208, + "med": 0.008916, + "p(90)": 0.04310840000000003, + "p(95)": 2.6982163999999993, + "p(99)": 3.7592218399999995 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 9.34558016, + "max": 28.378458, + "avg": 4.573244289156626, + "min": 1.832125, + "med": 4.352292, + "p(90)": 6.8243744, + "p(95)": 7.599225399999999 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "max": 1.77475, + "avg": 0.10519412449799209, + "min": 0.035125, + "med": 0.088125, + "p(90)": 0.1600334, + "p(95)": 0.18559119999999998, + "p(99)": 0.28794284 + } + }, + "vus_max": { + "values": { + "value": 20, + "min": 20, + "max": 20 + }, + "type": "gauge", + "contains": "default" + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.046042, + "p(90)": 0.1381414, + "p(95)": 0.18029099999999998, + "p(99)": 0.3299068399999999, + "max": 0.39925, + "avg": 0.06591249397590361, + "min": 0.017042 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "max": 38.248, + "avg": 5.49430787148594, + "min": 2.073541, + "med": 5.002666, + "p(90)": 9.218325, + "p(95)": 10.1415086, + "p(99)": 12.138978159999999 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0 + } + }, + "checks": { + "contains": "default", + "values": { + "passes": 249, + "fails": 0, + "rate": 1 + }, + "type": "rate" + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 136143, + "rate": 6807.177443135862 + } + }, + "data_received": { + "values": { + "count": 92754, + "rate": 4637.7186969629265 + }, + "type": "counter", + "contains": "data" + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "passes": 0, + "fails": 249, + "rate": 0 + }, + "thresholds": { + "rate<0.10": { + "ok": true + } + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 3.6229551599999996, + "max": 4.199792, + "avg": 0.2275706144578313, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 2.5278245999999993 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 249, + "rate": 12.45005019237735 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/hot-tenant-2026-06-16-final-validation-benchmark.json b/docs/benchmarks/evidence/hot-tenant-2026-06-16-final-validation-benchmark.json new file mode 100644 index 0000000..52119e3 --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-2026-06-16-final-validation-benchmark.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 175, + "fails": 0 + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 20003.916953 + }, + "metrics": { + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 175, + "rate": 8.748286668614424 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 1.3399426599999997, + "max": 16.095125, + "avg": 0.20503213714285717, + "min": 0, + "med": 0, + "p(90)": 0.878333, + "p(95)": 1.026233 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 175, + "rate": 8.748286668614424 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 85185, + "rate": 4258.415999233827 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 65275, + "rate": 3263.1109273931806 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0.08742929999999999, + "p(99)": 0.21577357999999996, + "max": 0.2445, + "avg": 0.03589500571428571, + "min": 0.0135, + "med": 0.025417, + "p(90)": 0.06115 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 7.6341553399999995, + "max": 26.791625, + "avg": 2.152609291428573, + "min": 1.357917, + "med": 1.750875, + "p(90)": 2.9228754, + "p(95)": 3.322858299999998 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0 + } + }, + "vus_max": { + "values": { + "value": 20, + "min": 20, + "max": 20 + }, + "type": "gauge", + "contains": "default" + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 3.0706088, + "p(95)": 3.6478167999999993, + "p(99)": 7.89493624, + "max": 26.947542, + "avg": 2.2392426114285726, + "min": 1.476875, + "med": 1.818792 + } + }, + "http_req_receiving": { + "values": { + "p(90)": 0.07023360000000001, + "p(95)": 0.09624189999999999, + "p(99)": 0.15995199999999998, + "max": 0.180708, + "avg": 0.05073831428571429, + "min": 0.0275, + "med": 0.044625 + }, + "type": "trend", + "contains": "time" + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "passes": 175, + "fails": 0, + "rate": 1 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 12.467814999999973, + "max": 27.401708, + "avg": 2.734527354285715, + "min": 1.659542, + "med": 2.095292, + "p(90)": 3.6484416, + "p(95)": 4.974108199999999 + } + }, + "http_req_failed": { + "thresholds": { + "rate<0.10": { + "ok": true + } + }, + "type": "rate", + "contains": "default", + "values": { + "passes": 0, + "fails": 175, + "rate": 0 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "min": 1.476875, + "med": 1.818792, + "p(90)": 3.0706088, + "p(95)": 3.6478167999999993, + "p(99)": 7.89493624, + "max": 26.947542, + "avg": 2.2392426114285726 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "min": 0, + "max": 0, + "value": 0 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.22220331999999993, + "min": 0.002709, + "med": 0.004, + "p(90)": 0.9303586, + "p(95)": 1.0986874999999998, + "p(99)": 1.6389570799999997, + "max": 16.29075 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/hot-tenant-2026-06-16-harness-smoke.json b/docs/benchmarks/evidence/hot-tenant-2026-06-16-harness-smoke.json new file mode 100644 index 0000000..3b7f8be --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-2026-06-16-harness-smoke.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "fails": 0, + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 3 + } + ], + "name": "", + "path": "" + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 3009.54471 + }, + "metrics": { + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.16678018, + "max": 0.167416, + "avg": 0.115458, + "min": 0.043333, + "med": 0.135625, + "p(90)": 0.1610578, + "p(95)": 0.1642369 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 1119, + "rate": 371.8170380661997 + } + }, + "iteration_duration": { + "values": { + "p(90)": 13.816392, + "p(95)": 14.587966999999999, + "p(99)": 15.205226999999999, + "max": 15.359542, + "avg": 8.610611333333333, + "min": 2.8285, + "med": 7.643792 + }, + "type": "trend", + "contains": "time" + }, + "iterations": { + "values": { + "count": 3, + "rate": 0.9968285202847177 + }, + "type": "counter", + "contains": "default" + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 2.6170000000000004, + "p(95)": 2.944125, + "p(99)": 3.2058250000000004, + "max": 3.27125, + "avg": 1.0904166666666668, + "min": 0, + "med": 0 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 1414, + "rate": 469.83850922753027 + } + }, + "http_req_blocked": { + "contains": "time", + "values": { + "avg": 2.038917, + "min": 0.003542, + "med": 0.034042, + "p(90)": 4.870142, + "p(95)": 5.474654499999999, + "p(99)": 5.958264499999999, + "max": 6.079167 + }, + "type": "trend" + }, + "http_req_tls_handshaking": { + "values": { + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0 + }, + "type": "trend", + "contains": "time" + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0.1880588, + "p(95)": 0.19600889999999999, + "p(99)": 0.20236898, + "max": 0.203959, + "avg": 0.12266666666666666, + "min": 0.039583, + "med": 0.124458 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "passes": 0, + "fails": 3, + "rate": 0 + }, + "thresholds": { + "rate<0.10": { + "ok": true + } + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "http_req_duration": { + "contains": "time", + "values": { + "avg": 4.779999666666667, + "min": 2.607291, + "med": 5.692458, + "p(90)": 5.9706916, + "p(95)": 6.0054708, + "p(99)": 6.0332941600000005, + "max": 6.04025 + }, + "type": "trend" + }, + "http_req_waiting": { + "values": { + "p(95)": 5.6706578, + "p(99)": 5.69466436, + "max": 5.700666, + "avg": 4.541875, + "min": 2.524375, + "med": 5.400584, + "p(90)": 5.6406496 + }, + "type": "trend", + "contains": "time" + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 1 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "max": 6.04025, + "avg": 4.779999666666667, + "min": 2.607291, + "med": 5.692458, + "p(90)": 5.9706916, + "p(95)": 6.0054708, + "p(99)": 6.0332941600000005 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 3, + "fails": 0 + } + }, + "http_reqs": { + "contains": "default", + "values": { + "count": 3, + "rate": 0.9968285202847177 + }, + "type": "counter" + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/hot-tenant-2026-06-16-harness-smoke2.json b/docs/benchmarks/evidence/hot-tenant-2026-06-16-harness-smoke2.json new file mode 100644 index 0000000..6f87869 --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-2026-06-16-harness-smoke2.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 2, + "fails": 0 + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 3001.897751 + }, + "metrics": { + "http_req_blocked": { + "values": { + "p(95)": 3.1804021, + "p(99)": 3.31378042, + "max": 3.347125, + "avg": 1.679896, + "min": 0.012667, + "med": 1.679896, + "p(90)": 3.0136792000000003 + }, + "type": "trend", + "contains": "time" + }, + "checks": { + "contains": "default", + "values": { + "rate": 1, + "passes": 2, + "fails": 0 + }, + "type": "rate" + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "min": 1, + "max": 1, + "value": 1 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 2, + "rate": 0.6662452108283018 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 2, + "rate": 0.6662452108283018 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 8.605968449999999, + "p(99)": 8.78726009, + "max": 8.832583, + "avg": 6.566437499999999, + "min": 4.300292, + "med": 6.566437499999999, + "p(90)": 8.3793539 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 937, + "rate": 312.1358812730594 + } + }, + "iteration_duration": { + "contains": "time", + "values": { + "p(95)": 16.042227750000002, + "p(99)": 16.51911275, + "max": 16.638334, + "avg": 10.6772715, + "min": 4.716209, + "med": 10.6772715, + "p(90)": 15.446121500000002 + }, + "type": "trend" + }, + "http_req_duration{expected_response:true}": { + "values": { + "avg": 6.566437499999999, + "min": 4.300292, + "med": 6.566437499999999, + "p(90)": 8.3793539, + "p(95)": 8.605968449999999, + "p(99)": 8.78726009, + "max": 8.832583 + }, + "type": "trend", + "contains": "time" + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0.17920595, + "p(99)": 0.18060759, + "max": 0.180958, + "avg": 0.1634375, + "min": 0.145917, + "med": 0.1634375, + "p(90)": 0.1774539 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "passes": 0, + "fails": 2, + "rate": 0 + }, + "thresholds": { + "rate<0.10": { + "ok": true + } + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 6.203229, + "min": 4.093625, + "med": 6.203229, + "p(90)": 7.8909122, + "p(95)": 8.1018726, + "p(99)": 8.27064092, + "max": 8.312833 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 746, + "rate": 248.50946363895656 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.870229, + "p(90)": 1.5664122, + "p(95)": 1.6534351, + "p(99)": 1.72305342, + "max": 1.740458, + "avg": 0.870229, + "min": 0 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "max": 0.338792, + "avg": 0.19977099999999998, + "min": 0.06075, + "med": 0.199771, + "p(90)": 0.31098780000000004, + "p(95)": 0.3248899, + "p(99)": 0.33601158000000003 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/hot-tenant-2026-06-16-harness-smoke3.json b/docs/benchmarks/evidence/hot-tenant-2026-06-16-harness-smoke3.json new file mode 100644 index 0000000..51582da --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-2026-06-16-harness-smoke3.json @@ -0,0 +1,227 @@ +{ + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 3003.905126 + }, + "metrics": { + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.6532083333333334, + "min": 0.004917, + "med": 0.012958, + "p(90)": 1.5559916, + "p(95)": 1.7488708, + "p(99)": 1.90317416, + "max": 1.94175 + } + }, + "iteration_duration": { + "values": { + "avg": 6.903430666666668, + "min": 2.4615, + "med": 8.083083, + "p(90)": 9.749183799999999, + "p(95)": 9.9574464, + "p(99)": 10.12405648, + "max": 10.165709 + }, + "type": "trend", + "contains": "time" + }, + "http_req_duration{expected_response:true}": { + "values": { + "p(90)": 8.8378832, + "p(95)": 9.2513166, + "p(99)": 9.58206332, + "max": 9.66475, + "avg": 5.807319, + "min": 2.226791, + "med": 5.530416 + }, + "type": "trend", + "contains": "time" + }, + "http_reqs": { + "values": { + "count": 3, + "rate": 0.9986999835759793 + }, + "type": "counter", + "contains": "default" + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0.10087439999999999, + "p(99)": 0.10210767999999999, + "max": 0.102416, + "avg": 0.073222, + "min": 0.03025, + "med": 0.087, + "p(90)": 0.0993328 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 1.1482336, + "p(95)": 1.2917627999999999, + "p(99)": 1.40658616, + "max": 1.435292, + "avg": 0.47843066666666667, + "min": 0, + "med": 0 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 3, + "fails": 0 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 1119, + "rate": 372.51509387384027 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 3 + }, + "thresholds": { + "rate<0.10": { + "ok": true + } + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "http_req_tls_handshaking": { + "contains": "time", + "values": { + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0 + }, + "type": "trend" + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 9.389026340000001, + "max": 9.473708, + "avg": 5.620416333333334, + "min": 2.147916, + "med": 5.239625, + "p(90)": 8.6268914, + "p(95)": 9.0502997 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "med": 5.530416, + "p(90)": 8.8378832, + "p(95)": 9.2513166, + "p(99)": 9.58206332, + "max": 9.66475, + "avg": 5.807319, + "min": 2.226791 + } + }, + "http_req_receiving": { + "contains": "time", + "values": { + "min": 0.048625, + "med": 0.104042, + "p(90)": 0.1715084, + "p(95)": 0.17994169999999998, + "p(99)": 0.18668833999999998, + "max": 0.188375, + "avg": 0.11368066666666665 + }, + "type": "trend" + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 3, + "rate": 0.9986999835759793 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 1422, + "rate": 473.38379221501424 + } + } + }, + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 3, + "fails": 0, + "name": "accepted or rate limited" + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/hot-tenant-2026-06-16-hot-db-smoke.json b/docs/benchmarks/evidence/hot-tenant-2026-06-16-hot-db-smoke.json new file mode 100644 index 0000000..55d8680 --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-2026-06-16-hot-db-smoke.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 324, + "fails": 0 + } + ] + }, + "options": { + "summaryTimeUnit": "", + "noColor": false, + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ] + }, + "state": { + "isStdErrTTY": false, + "testRunDurationMs": 20001.112245, + "isStdOutTTY": false + }, + "metrics": { + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "min": 1.80175, + "med": 3.610625, + "p(90)": 6.8191796, + "p(95)": 8.87331875, + "p(99)": 11.573043409999999, + "max": 19.438208, + "avg": 4.288128481481477 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 324, + "rate": 16.199099131649316 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 4.845608697530864, + "min": 2.026458, + "med": 4.1495, + "p(90)": 7.8070412000000005, + "p(95)": 9.781131249999998, + "p(99)": 13.677105749999999, + "max": 25.197459 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "max": 10, + "value": 10, + "min": 10 + } + }, + "http_req_duration": { + "values": { + "p(90)": 7.0184788000000005, + "p(95)": 9.10211245, + "p(99)": 11.691400179999997, + "max": 20.598583, + "avg": 4.418162941358025, + "min": 1.872417, + "med": 3.7598960000000003 + }, + "type": "trend", + "contains": "time" + }, + "http_req_blocked": { + "contains": "time", + "values": { + "avg": 0.06853741666666666, + "min": 0.002834, + "med": 0.005667, + "p(90)": 0.014925200000000005, + "p(95)": 0.024266849999999986, + "p(99)": 2.408551589999999, + "max": 3.743959 + }, + "type": "trend" + }, + "http_req_connecting": { + "contains": "time", + "values": { + "avg": 0.05528446604938272, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 2.16027107, + "max": 3.613375 + }, + "type": "trend" + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0.07613730000000009, + "p(95)": 0.11405354999999998, + "p(99)": 0.21667567999999993, + "max": 2.163459, + "avg": 0.051418475308641964, + "min": 0.016584, + "med": 0.03225 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "avg": 4.418162941358025, + "min": 1.872417, + "med": 3.7598960000000003, + "p(90)": 7.0184788000000005, + "p(95)": 9.10211245, + "p(99)": 11.691400179999997, + "max": 20.598583 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "data_sent": { + "values": { + "count": 174179, + "rate": 8708.465702628228 + }, + "type": "counter", + "contains": "data" + }, + "http_req_tls_handshaking": { + "values": { + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + }, + "type": "trend", + "contains": "time" + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.0786159845679012, + "min": 0.031375, + "med": 0.06785450000000001, + "p(90)": 0.1322082, + "p(95)": 0.1511894, + "p(99)": 0.20908499999999997, + "max": 0.262875 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 324, + "fails": 0 + } + }, + "http_reqs": { + "contains": "default", + "values": { + "count": 324, + "rate": 16.199099131649316 + }, + "type": "counter" + }, + "data_received": { + "values": { + "count": 120852, + "rate": 6042.263976105194 + }, + "type": "counter", + "contains": "data" + }, + "http_req_failed": { + "thresholds": { + "rate<0.10": { + "ok": true + } + }, + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 324 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/hot-tenant-2026-06-16-post-docs-benchmark.json b/docs/benchmarks/evidence/hot-tenant-2026-06-16-post-docs-benchmark.json new file mode 100644 index 0000000..f1a80c7 --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-2026-06-16-post-docs-benchmark.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 35, + "fails": 0 + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "testRunDurationMs": 7013.070739, + "isStdOutTTY": false, + "isStdErrTTY": false + }, + "metrics": { + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "max": 23.501667, + "avg": 6.345325057142858, + "min": 2.0465, + "med": 5.379375, + "p(90)": 10.941133200000001, + "p(95)": 11.846296099999998, + "p(99)": 19.738631659999996 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "med": 5.700792, + "p(90)": 11.100108200000001, + "p(95)": 12.093958099999998, + "p(99)": 19.877425219999996, + "max": 23.650292, + "avg": 6.512058371428572, + "min": 2.103209 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "max": 23.650292, + "avg": 6.512058371428572, + "min": 2.103209, + "med": 5.700792, + "p(90)": 11.100108200000001, + "p(95)": 12.093958099999998, + "p(99)": 19.877425219999996 + } + }, + "data_received": { + "values": { + "count": 13055, + "rate": 1861.5240721016205 + }, + "type": "counter", + "contains": "data" + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.6411928285714286, + "min": 0.003125, + "med": 0.009708, + "p(90)": 1.4528166000000002, + "p(95)": 2.181225099999998, + "p(99)": 10.564761779999996, + "max": 14.462125 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 35 + }, + "thresholds": { + "rate<0.10": { + "ok": true + } + } + }, + "checks": { + "contains": "default", + "values": { + "rate": 1, + "passes": 35, + "fails": 0 + }, + "type": "rate" + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "min": 2.2875, + "med": 6.618167, + "p(90)": 12.395167200000001, + "p(95)": 14.93542959999999, + "p(99)": 22.506913559999997, + "max": 24.066125, + "avg": 7.667306114285712 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 35, + "rate": 4.990681158449385 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.6095857142857143, + "min": 0, + "med": 0, + "p(90)": 1.2807164000000004, + "p(95)": 2.0428626999999984, + "p(99)": 10.436936059999995, + "max": 14.36225 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "min": 0.035792, + "med": 0.0975, + "p(90)": 0.19648320000000002, + "p(95)": 0.2065463, + "p(99)": 0.22339522, + "max": 0.230167, + "avg": 0.10771308571428573 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "vus_max": { + "contains": "default", + "values": { + "value": 5, + "min": 5, + "max": 5 + }, + "type": "gauge" + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 19027, + "rate": 2713.076868623327 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 35, + "rate": 4.990681158449385 + } + }, + "http_req_sending": { + "contains": "time", + "values": { + "p(99)": 0.16351, + "max": 0.173625, + "avg": 0.05902022857142858, + "min": 0.01875, + "med": 0.048583, + "p(90)": 0.10703340000000001, + "p(95)": 0.13343309999999997 + }, + "type": "trend" + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/hot-tenant-2026-06-16-real-tenants-smoke.json b/docs/benchmarks/evidence/hot-tenant-2026-06-16-real-tenants-smoke.json new file mode 100644 index 0000000..e792eab --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-2026-06-16-real-tenants-smoke.json @@ -0,0 +1,227 @@ +{ + "state": { + "isStdErrTTY": false, + "testRunDurationMs": 20005.012343, + "isStdOutTTY": false + }, + "metrics": { + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "min": 10, + "max": 10, + "value": 10 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 325 + }, + "thresholds": { + "rate<0.10": { + "ok": true + } + } + }, + "http_req_duration{expected_response:true}": { + "contains": "time", + "values": { + "min": 1.854292, + "med": 3.856292, + "p(90)": 7.7962582000000005, + "p(95)": 8.936066199999999, + "p(99)": 15.24563007999999, + "max": 87.767333, + "avg": 4.791810667692304 + }, + "type": "trend" + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "max": 87.767333, + "avg": 4.791810667692304, + "min": 1.854292, + "med": 3.856292, + "p(90)": 7.7962582000000005, + "p(95)": 8.936066199999999, + "p(99)": 15.24563007999999 + } + }, + "iterations": { + "contains": "default", + "values": { + "rate": 16.245928491702305, + "count": 325 + }, + "type": "counter" + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 2.4947049199999993, + "max": 5.224041, + "avg": 0.08705191076923066, + "min": 0.00275, + "med": 0.007125, + "p(90)": 0.018167, + "p(95)": 0.03309959999999994 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.06793410153846155, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 2.309133919999999, + "max": 4.301458 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 176686, + "rate": 8832.086527645888 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 8.3611248, + "p(95)": 9.608308399999999, + "p(99)": 16.99046684, + "max": 88.402542, + "avg": 5.239723455384617, + "min": 2.059667, + "med": 4.195292 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.03725, + "p(90)": 0.0817332, + "p(95)": 0.11294179999999994, + "p(99)": 0.23398623999999993, + "max": 1.130542, + "avg": 0.0520829723076923, + "min": 0.017917 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.2783568399999999, + "max": 0.51025, + "avg": 0.08604179384615385, + "min": 0.029417, + "med": 0.072375, + "p(90)": 0.1401582, + "p(95)": 0.16205859999999997 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 325, + "rate": 16.245928491702305 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 325, + "fails": 0 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 121225, + "rate": 6059.73132740496 + } + }, + "http_req_waiting": { + "contains": "time", + "values": { + "p(95)": 8.7407162, + "p(99)": 15.05072699999999, + "max": 87.438542, + "avg": 4.653685901538463, + "min": 1.788417, + "med": 3.699959, + "p(90)": 7.5616082 + }, + "type": "trend" + } + }, + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 325, + "fails": 0 + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/hot-tenant-2026-06-16-smoke2.json b/docs/benchmarks/evidence/hot-tenant-2026-06-16-smoke2.json new file mode 100644 index 0000000..e4f8201 --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-2026-06-16-smoke2.json @@ -0,0 +1,227 @@ +{ + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 15006.771951 + }, + "metrics": { + "data_received": { + "contains": "data", + "values": { + "count": 46625, + "rate": 3106.9306678504613 + }, + "type": "counter" + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 125 + }, + "thresholds": { + "rate<0.10": { + "ok": true + } + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 125, + "rate": 8.329572836060219 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 125, + "fails": 0 + } + }, + "data_sent": { + "contains": "data", + "values": { + "count": 58308, + "rate": 3885.445863399994 + }, + "type": "counter" + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 125, + "rate": 8.329572836060219 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.053708, + "p(90)": 0.12110000000000003, + "p(95)": 0.13920899999999997, + "p(99)": 1.2458018399999997, + "max": 1.5835, + "avg": 0.095643976, + "min": 0.027833 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 3.931447352, + "min": 1.511584, + "med": 2.04525, + "p(90)": 9.258525200000001, + "p(95)": 14.345825199999984, + "p(99)": 21.20610984, + "max": 23.234208 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.004417, + "p(90)": 0.0123336, + "p(95)": 0.02676659999999994, + "p(99)": 1.12147516, + "max": 1.627667, + "avg": 0.05032932, + "min": 0.002125 + } + }, + "vus_max": { + "contains": "default", + "values": { + "value": 5, + "min": 5, + "max": 5 + }, + "type": "gauge" + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "min": 1.56225, + "med": 2.113833, + "p(90)": 9.655517, + "p(95)": 15.669758599999975, + "p(99)": 21.33895184, + "max": 23.32875, + "avg": 4.062943703999998 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 4.395770735999998, + "min": 1.723375, + "med": 2.35425, + "p(90)": 10.073233400000003, + "p(95)": 16.213108199999983, + "p(99)": 22.08461692, + "max": 23.698834 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 15.669758599999975, + "p(99)": 21.33895184, + "max": 23.32875, + "avg": 4.062943703999998, + "min": 1.56225, + "med": 2.113833, + "p(90)": 9.655517 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 1.0581101599999998, + "max": 1.202167, + "avg": 0.039363336000000006, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + } + }, + "http_req_sending": { + "values": { + "min": 0.014042, + "med": 0.02425, + "p(90)": 0.0549168, + "p(95)": 0.06417479999999998, + "p(99)": 0.2327050799999999, + "max": 0.426542, + "avg": 0.035852376 + }, + "type": "trend", + "contains": "time" + } + }, + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 125, + "fails": 0 + } + ] + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-clean-full-benchmark.json b/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-clean-full-benchmark.json new file mode 100644 index 0000000..8a2133a --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-clean-full-benchmark.json @@ -0,0 +1,541 @@ +{ + "run_id": "2026-06-16-clean-full-benchmark", + "captured_at": "2026-06-16T20:46:03.517Z", + "manifest": { + "path": "tmp/clean-benchmark-tenants.json", + "counts": { + "total": 100, + "hot": 1, + "medium": 10, + "quiet": 89 + } + }, + "total_events": 424, + "tenant_distribution": [ + { + "tenant_class": "hot", + "events": 349, + "orgs": 1, + "projects": 1, + "synthetic_tenants": 1, + "first_event_timestamp": "2026-06-16T20:45:35.516Z", + "last_event_timestamp": "2026-06-16T20:46:00.140Z", + "first_received_at": "2026-06-16T20:45:35.523Z", + "last_received_at": "2026-06-16T20:46:00.181Z" + }, + { + "tenant_class": "quiet", + "events": 49, + "orgs": 38, + "projects": 38, + "synthetic_tenants": 38, + "first_event_timestamp": "2026-06-16T20:45:35.993Z", + "last_event_timestamp": "2026-06-16T20:45:59.520Z", + "first_received_at": "2026-06-16T20:45:36.001Z", + "last_received_at": "2026-06-16T20:45:59.524Z" + }, + { + "tenant_class": "medium", + "events": 26, + "orgs": 9, + "projects": 9, + "synthetic_tenants": 9, + "first_event_timestamp": "2026-06-16T20:45:38.703Z", + "last_event_timestamp": "2026-06-16T20:45:59.808Z", + "first_received_at": "2026-06-16T20:45:38.706Z", + "last_received_at": "2026-06-16T20:45:59.813Z" + } + ], + "top_tenants": [ + { + "tenant_class": "hot", + "tenant_id": "tenant_001", + "org_id": "00000000-0000-4000-8000-0000000f4241", + "project_id": "00000000-0000-4000-8000-0000001e8481", + "events": 349, + "users": 349, + "event_names": 2, + "first_event_timestamp": "2026-06-16T20:45:35.516Z", + "last_event_timestamp": "2026-06-16T20:46:00.140Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_007", + "org_id": "00000000-0000-4000-8000-0000000f4247", + "project_id": "00000000-0000-4000-8000-0000001e8487", + "events": 5, + "users": 5, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:44.503Z", + "last_event_timestamp": "2026-06-16T20:45:58.062Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_015", + "org_id": "00000000-0000-4000-8000-0000000f424f", + "project_id": "00000000-0000-4000-8000-0000001e848f", + "events": 4, + "users": 4, + "event_names": 2, + "first_event_timestamp": "2026-06-16T20:45:35.993Z", + "last_event_timestamp": "2026-06-16T20:45:50.752Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_006", + "org_id": "00000000-0000-4000-8000-0000000f4246", + "project_id": "00000000-0000-4000-8000-0000001e8486", + "events": 4, + "users": 4, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:46.952Z", + "last_event_timestamp": "2026-06-16T20:45:53.952Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_010", + "org_id": "00000000-0000-4000-8000-0000000f424a", + "project_id": "00000000-0000-4000-8000-0000001e848a", + "events": 4, + "users": 4, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:42.153Z", + "last_event_timestamp": "2026-06-16T20:45:51.651Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_041", + "org_id": "00000000-0000-4000-8000-0000000f4269", + "project_id": "00000000-0000-4000-8000-0000001e84a9", + "events": 3, + "users": 3, + "event_names": 2, + "first_event_timestamp": "2026-06-16T20:45:38.432Z", + "last_event_timestamp": "2026-06-16T20:45:56.628Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_002", + "org_id": "00000000-0000-4000-8000-0000000f4242", + "project_id": "00000000-0000-4000-8000-0000001e8482", + "events": 3, + "users": 3, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:41.251Z", + "last_event_timestamp": "2026-06-16T20:45:54.254Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_009", + "org_id": "00000000-0000-4000-8000-0000000f4249", + "project_id": "00000000-0000-4000-8000-0000001e8489", + "events": 3, + "users": 3, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:46.603Z", + "last_event_timestamp": "2026-06-16T20:45:56.153Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_004", + "org_id": "00000000-0000-4000-8000-0000000f4244", + "project_id": "00000000-0000-4000-8000-0000001e8484", + "events": 3, + "users": 3, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:40.854Z", + "last_event_timestamp": "2026-06-16T20:45:57.082Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_012", + "org_id": "00000000-0000-4000-8000-0000000f424c", + "project_id": "00000000-0000-4000-8000-0000001e848c", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:46.151Z", + "last_event_timestamp": "2026-06-16T20:45:55.052Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_064", + "org_id": "00000000-0000-4000-8000-0000000f4280", + "project_id": "00000000-0000-4000-8000-0000001e84c0", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:39.833Z", + "last_event_timestamp": "2026-06-16T20:45:58.326Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_039", + "org_id": "00000000-0000-4000-8000-0000000f4267", + "project_id": "00000000-0000-4000-8000-0000001e84a7", + "events": 2, + "users": 2, + "event_names": 2, + "first_event_timestamp": "2026-06-16T20:45:47.502Z", + "last_event_timestamp": "2026-06-16T20:45:51.952Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_005", + "org_id": "00000000-0000-4000-8000-0000000f4245", + "project_id": "00000000-0000-4000-8000-0000001e8485", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:38.703Z", + "last_event_timestamp": "2026-06-16T20:45:48.902Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_042", + "org_id": "00000000-0000-4000-8000-0000000f426a", + "project_id": "00000000-0000-4000-8000-0000001e84aa", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:43.203Z", + "last_event_timestamp": "2026-06-16T20:45:56.691Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_096", + "org_id": "00000000-0000-4000-8000-0000000f42a0", + "project_id": "00000000-0000-4000-8000-0000001e84e0", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:45.902Z", + "last_event_timestamp": "2026-06-16T20:45:54.802Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_024", + "org_id": "00000000-0000-4000-8000-0000000f4258", + "project_id": "00000000-0000-4000-8000-0000001e8498", + "events": 2, + "users": 2, + "event_names": 2, + "first_event_timestamp": "2026-06-16T20:45:46.752Z", + "last_event_timestamp": "2026-06-16T20:45:55.661Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_017", + "org_id": "00000000-0000-4000-8000-0000000f4251", + "project_id": "00000000-0000-4000-8000-0000001e8491", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:46.401Z", + "last_event_timestamp": "2026-06-16T20:45:46.401Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_014", + "org_id": "00000000-0000-4000-8000-0000000f424e", + "project_id": "00000000-0000-4000-8000-0000001e848e", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:50.702Z", + "last_event_timestamp": "2026-06-16T20:45:50.702Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_013", + "org_id": "00000000-0000-4000-8000-0000000f424d", + "project_id": "00000000-0000-4000-8000-0000001e848d", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:41.752Z", + "last_event_timestamp": "2026-06-16T20:45:41.752Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_031", + "org_id": "00000000-0000-4000-8000-0000000f425f", + "project_id": "00000000-0000-4000-8000-0000001e849f", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:45:47.102Z", + "last_event_timestamp": "2026-06-16T20:45:47.102Z" + } + ], + "aggregate_pressure": [ + { + "tenant_class": "hot", + "event_count_aggregate_keys": 2, + "events": 349, + "avg_events_per_event_count_key": 174.5, + "max_events_per_event_count_key": 282, + "p95_events_per_event_count_key": 271.25 + }, + { + "tenant_class": "quiet", + "event_count_aggregate_keys": 42, + "events": 49, + "avg_events_per_event_count_key": 1.17, + "max_events_per_event_count_key": 3, + "p95_events_per_event_count_key": 2 + }, + { + "tenant_class": "medium", + "event_count_aggregate_keys": 9, + "events": 26, + "avg_events_per_event_count_key": 2.89, + "max_events_per_event_count_key": 5, + "p95_events_per_event_count_key": 4.6 + } + ], + "active_user_pressure": [ + { + "tenant_class": "hot", + "active_user_keys": 349, + "events_with_user": 349, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + }, + { + "tenant_class": "quiet", + "active_user_keys": 49, + "events_with_user": 49, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + }, + { + "tenant_class": "medium", + "active_user_keys": 26, + "events_with_user": 26, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + } + ], + "burst_seconds": [ + { + "second": "2026-06-16T20:45:41.000Z", + "events": 20 + }, + { + "second": "2026-06-16T20:45:42.000Z", + "events": 20 + }, + { + "second": "2026-06-16T20:45:43.000Z", + "events": 20 + }, + { + "second": "2026-06-16T20:45:44.000Z", + "events": 20 + }, + { + "second": "2026-06-16T20:45:45.000Z", + "events": 20 + }, + { + "second": "2026-06-16T20:45:46.000Z", + "events": 20 + }, + { + "second": "2026-06-16T20:45:47.000Z", + "events": 20 + }, + { + "second": "2026-06-16T20:45:48.000Z", + "events": 20 + }, + { + "second": "2026-06-16T20:45:49.000Z", + "events": 20 + }, + { + "second": "2026-06-16T20:45:50.000Z", + "events": 20 + } + ], + "partitions": [ + { + "partition": "events_2026_06", + "events": 424 + } + ], + "reconciliation": { + "status": "complete", + "k6_summary_path": "docs/benchmarks/evidence/hot-tenant-2026-06-16-clean-full-benchmark.json", + "k6_request_count": 424, + "k6_http_req_failed_rate": 0, + "persisted_events": 424, + "kafka_lag_after": { + "captured_at": "2026-06-16T20:46:03.510Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 2816, + "log_end_offset": 2816, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 2814, + "log_end_offset": 2814, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 2794, + "log_end_offset": 2794, + "lag": 0 + } + ], + "total_lag": 0 + }, + "notes": [] + }, + "postgres_snapshot": { + "locks": { + "locks_total": 2, + "waiting_locks": 0, + "waiting_pids": 0 + }, + "activity": { + "connections": 2, + "active_connections": 1, + "waiting_connections": 1, + "lock_wait_connections": 0 + }, + "database": { + "xact_commit": "9022", + "xact_rollback": "0", + "deadlocks": "0", + "conflicts": "0", + "temp_files": "0", + "temp_bytes": "0", + "blk_read_time": 0, + "blk_write_time": 0 + }, + "table_stats": [ + { + "relname": "daily_active_users", + "n_tup_ins": "8418", + "n_tup_upd": "0", + "n_tup_del": "0", + "n_dead_tup": "0", + "seq_scan": "3", + "idx_scan": "8418" + }, + { + "relname": "daily_aggregates", + "n_tup_ins": "600", + "n_tup_upd": "24654", + "n_tup_del": "0", + "n_dead_tup": "88", + "seq_scan": "8", + "idx_scan": "25254" + }, + { + "relname": "event_dedup_keys", + "n_tup_ins": "8418", + "n_tup_upd": "0", + "n_tup_del": "0", + "n_dead_tup": "0", + "seq_scan": "2", + "idx_scan": "8418" + }, + { + "relname": "events", + "n_tup_ins": "0", + "n_tup_upd": "0", + "n_tup_del": "0", + "n_dead_tup": "0", + "seq_scan": "0", + "idx_scan": "0" + } + ], + "note": "PostgreSQL lock/activity/stat snapshots are captured after the run, not sampled continuously during load." + }, + "representative_query_plans": [ + { + "label": "hot-raw-event-count", + "planning_ms": 0.49, + "execution_ms": 0.169, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Only Scan": 1 + }, + "relations": [ + "events_2026_06" + ], + "actual_rows_sum": 350, + "plan_rows_sum": 2, + "shared_hit_blocks": 54, + "shared_read_blocks": 0 + } + }, + { + "label": "hot-aggregate-total-events", + "planning_ms": 0.862, + "execution_ms": 0.086, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "daily_aggregates" + ], + "actual_rows_sum": 2, + "plan_rows_sum": 2, + "shared_hit_blocks": 6, + "shared_read_blocks": 0 + } + }, + { + "label": "quiet-raw-event-count", + "planning_ms": 0.175, + "execution_ms": 0.056, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Only Scan": 1 + }, + "relations": [ + "events_2026_06" + ], + "actual_rows_sum": 5, + "plan_rows_sum": 2, + "shared_hit_blocks": 14, + "shared_read_blocks": 0 + } + }, + { + "label": "quiet-aggregate-total-events", + "planning_ms": 0.057, + "execution_ms": 0.08, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "daily_aggregates" + ], + "actual_rows_sum": 2, + "plan_rows_sum": 2, + "shared_hit_blocks": 10, + "shared_read_blocks": 0 + } + } + ], + "safe_claim_note": "Local hot-tenant PostgreSQL evidence for one run_id. Aggregate pressure is derived from raw events per aggregate key, not from a Postgres ON CONFLICT counter.", + "unsafe_claim_note": "Do not claim production lock behavior or long-duration contention from this after-run snapshot." +} diff --git a/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-final-benchmark-smoke.json b/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-final-benchmark-smoke.json new file mode 100644 index 0000000..728d405 --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-final-benchmark-smoke.json @@ -0,0 +1,541 @@ +{ + "run_id": "2026-06-16-final-benchmark-smoke", + "captured_at": "2026-06-16T19:58:17.861Z", + "manifest": { + "path": "tmp/benchmark-tenants-final-smoke.json", + "counts": { + "total": 100, + "hot": 1, + "medium": 10, + "quiet": 89 + } + }, + "total_events": 249, + "tenant_distribution": [ + { + "tenant_class": "hot", + "events": 201, + "orgs": 1, + "projects": 1, + "synthetic_tenants": 1, + "first_event_timestamp": "2026-06-16T19:57:54.588Z", + "last_event_timestamp": "2026-06-16T19:58:14.203Z", + "first_received_at": "2026-06-16T19:57:54.600Z", + "last_received_at": "2026-06-16T19:58:14.210Z" + }, + { + "tenant_class": "quiet", + "events": 40, + "orgs": 34, + "projects": 34, + "synthetic_tenants": 34, + "first_event_timestamp": "2026-06-16T19:57:54.768Z", + "last_event_timestamp": "2026-06-16T19:58:12.739Z", + "first_received_at": "2026-06-16T19:57:54.779Z", + "last_received_at": "2026-06-16T19:58:12.746Z" + }, + { + "tenant_class": "medium", + "events": 8, + "orgs": 5, + "projects": 5, + "synthetic_tenants": 5, + "first_event_timestamp": "2026-06-16T19:57:56.283Z", + "last_event_timestamp": "2026-06-16T19:58:11.969Z", + "first_received_at": "2026-06-16T19:57:56.293Z", + "last_received_at": "2026-06-16T19:58:11.976Z" + } + ], + "top_tenants": [ + { + "tenant_class": "hot", + "tenant_id": "tenant_001", + "org_id": "00000000-0000-4000-8000-0000000f4241", + "project_id": "00000000-0000-4000-8000-0000001e8481", + "events": 201, + "users": 201, + "event_names": 2, + "first_event_timestamp": "2026-06-16T19:57:54.588Z", + "last_event_timestamp": "2026-06-16T19:58:14.203Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_009", + "org_id": "00000000-0000-4000-8000-0000000f4249", + "project_id": "00000000-0000-4000-8000-0000001e8489", + "events": 3, + "users": 3, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:57:57.748Z", + "last_event_timestamp": "2026-06-16T19:58:03.929Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_026", + "org_id": "00000000-0000-4000-8000-0000000f425a", + "project_id": "00000000-0000-4000-8000-0000001e849a", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:57:56.505Z", + "last_event_timestamp": "2026-06-16T19:58:08.927Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_087", + "org_id": "00000000-0000-4000-8000-0000000f4297", + "project_id": "00000000-0000-4000-8000-0000001e84d7", + "events": 2, + "users": 2, + "event_names": 2, + "first_event_timestamp": "2026-06-16T19:58:01.128Z", + "last_event_timestamp": "2026-06-16T19:58:07.060Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_091", + "org_id": "00000000-0000-4000-8000-0000000f429b", + "project_id": "00000000-0000-4000-8000-0000001e84db", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:58:01.395Z", + "last_event_timestamp": "2026-06-16T19:58:07.326Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_053", + "org_id": "00000000-0000-4000-8000-0000000f4275", + "project_id": "00000000-0000-4000-8000-0000001e84b5", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:57:58.842Z", + "last_event_timestamp": "2026-06-16T19:58:10.872Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_054", + "org_id": "00000000-0000-4000-8000-0000000f4276", + "project_id": "00000000-0000-4000-8000-0000001e84b6", + "events": 2, + "users": 2, + "event_names": 2, + "first_event_timestamp": "2026-06-16T19:57:58.914Z", + "last_event_timestamp": "2026-06-16T19:58:10.956Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_097", + "org_id": "00000000-0000-4000-8000-0000000f42a1", + "project_id": "00000000-0000-4000-8000-0000001e84e1", + "events": 2, + "users": 2, + "event_names": 2, + "first_event_timestamp": "2026-06-16T19:58:01.795Z", + "last_event_timestamp": "2026-06-16T19:58:07.727Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_008", + "org_id": "00000000-0000-4000-8000-0000000f4248", + "project_id": "00000000-0000-4000-8000-0000001e8488", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:58:01.195Z", + "last_event_timestamp": "2026-06-16T19:58:05.192Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_020", + "org_id": "00000000-0000-4000-8000-0000000f4254", + "project_id": "00000000-0000-4000-8000-0000001e8494", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:58:02.596Z", + "last_event_timestamp": "2026-06-16T19:58:02.596Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_021", + "org_id": "00000000-0000-4000-8000-0000000f4255", + "project_id": "00000000-0000-4000-8000-0000001e8495", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:57:55.926Z", + "last_event_timestamp": "2026-06-16T19:57:55.926Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_024", + "org_id": "00000000-0000-4000-8000-0000000f4258", + "project_id": "00000000-0000-4000-8000-0000001e8498", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:58:02.861Z", + "last_event_timestamp": "2026-06-16T19:58:02.861Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_033", + "org_id": "00000000-0000-4000-8000-0000000f4261", + "project_id": "00000000-0000-4000-8000-0000001e84a1", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:58:09.393Z", + "last_event_timestamp": "2026-06-16T19:58:09.393Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_034", + "org_id": "00000000-0000-4000-8000-0000000f4262", + "project_id": "00000000-0000-4000-8000-0000001e84a2", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:58:03.529Z", + "last_event_timestamp": "2026-06-16T19:58:03.529Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_035", + "org_id": "00000000-0000-4000-8000-0000000f4263", + "project_id": "00000000-0000-4000-8000-0000001e84a3", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:58:09.528Z", + "last_event_timestamp": "2026-06-16T19:58:09.528Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_036", + "org_id": "00000000-0000-4000-8000-0000000f4264", + "project_id": "00000000-0000-4000-8000-0000001e84a4", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:58:03.661Z", + "last_event_timestamp": "2026-06-16T19:58:03.661Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_041", + "org_id": "00000000-0000-4000-8000-0000000f4269", + "project_id": "00000000-0000-4000-8000-0000001e84a9", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:58:09.950Z", + "last_event_timestamp": "2026-06-16T19:58:09.950Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_043", + "org_id": "00000000-0000-4000-8000-0000000f426b", + "project_id": "00000000-0000-4000-8000-0000001e84ab", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:58:04.127Z", + "last_event_timestamp": "2026-06-16T19:58:04.127Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_044", + "org_id": "00000000-0000-4000-8000-0000000f426c", + "project_id": "00000000-0000-4000-8000-0000001e84ac", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:58:10.167Z", + "last_event_timestamp": "2026-06-16T19:58:10.167Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_050", + "org_id": "00000000-0000-4000-8000-0000000f4272", + "project_id": "00000000-0000-4000-8000-0000001e84b2", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:57:58.624Z", + "last_event_timestamp": "2026-06-16T19:57:58.624Z" + } + ], + "aggregate_pressure": [ + { + "tenant_class": "hot", + "event_count_aggregate_keys": 2, + "events": 201, + "avg_events_per_event_count_key": 100.5, + "max_events_per_event_count_key": 157, + "p95_events_per_event_count_key": 151.35 + }, + { + "tenant_class": "quiet", + "event_count_aggregate_keys": 37, + "events": 40, + "avg_events_per_event_count_key": 1.08, + "max_events_per_event_count_key": 2, + "p95_events_per_event_count_key": 2 + }, + { + "tenant_class": "medium", + "event_count_aggregate_keys": 5, + "events": 8, + "avg_events_per_event_count_key": 1.6, + "max_events_per_event_count_key": 3, + "p95_events_per_event_count_key": 2.8 + } + ], + "active_user_pressure": [ + { + "tenant_class": "hot", + "active_user_keys": 201, + "events_with_user": 201, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + }, + { + "tenant_class": "quiet", + "active_user_keys": 40, + "events_with_user": 40, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + }, + { + "tenant_class": "medium", + "active_user_keys": 8, + "events_with_user": 8, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + } + ], + "burst_seconds": [ + { + "second": "2026-06-16T19:58:00.000Z", + "events": 16 + }, + { + "second": "2026-06-16T19:58:03.000Z", + "events": 16 + }, + { + "second": "2026-06-16T19:58:02.000Z", + "events": 15 + }, + { + "second": "2026-06-16T19:58:04.000Z", + "events": 15 + }, + { + "second": "2026-06-16T19:58:05.000Z", + "events": 15 + }, + { + "second": "2026-06-16T19:58:06.000Z", + "events": 15 + }, + { + "second": "2026-06-16T19:58:07.000Z", + "events": 15 + }, + { + "second": "2026-06-16T19:58:08.000Z", + "events": 15 + }, + { + "second": "2026-06-16T19:57:58.000Z", + "events": 14 + }, + { + "second": "2026-06-16T19:57:59.000Z", + "events": 14 + } + ], + "partitions": [ + { + "partition": "events_2026_06", + "events": 249 + } + ], + "reconciliation": { + "status": "complete", + "k6_summary_path": "docs/benchmarks/evidence/hot-tenant-2026-06-16-final-benchmark-smoke.json", + "k6_request_count": 249, + "k6_http_req_failed_rate": 0, + "persisted_events": 249, + "kafka_lag_after": { + "captured_at": "2026-06-16T19:58:17.850Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 206025, + "log_end_offset": 206025, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 206955, + "log_end_offset": 206955, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 206077, + "log_end_offset": 206077, + "lag": 0 + } + ], + "total_lag": 0 + }, + "notes": [] + }, + "postgres_snapshot": { + "locks": { + "locks_total": 2, + "waiting_locks": 0, + "waiting_pids": 0 + }, + "activity": { + "connections": 2, + "active_connections": 1, + "waiting_connections": 1, + "lock_wait_connections": 0 + }, + "database": { + "xact_commit": "628768", + "xact_rollback": "27", + "deadlocks": "0", + "conflicts": "0", + "temp_files": "224", + "temp_bytes": "644382720", + "blk_read_time": 0, + "blk_write_time": 0 + }, + "table_stats": [ + { + "relname": "daily_active_users", + "n_tup_ins": "35181", + "n_tup_upd": "0", + "n_tup_del": "20659", + "n_dead_tup": "0", + "seq_scan": "3", + "idx_scan": "619169" + }, + { + "relname": "daily_aggregates", + "n_tup_ins": "761", + "n_tup_upd": "1272612", + "n_tup_del": "147", + "n_dead_tup": "45", + "seq_scan": "42", + "idx_scan": "1274843" + }, + { + "relname": "event_dedup_keys", + "n_tup_ins": "619075", + "n_tup_upd": "0", + "n_tup_del": "22", + "n_dead_tup": "22", + "seq_scan": "32", + "idx_scan": "619161" + }, + { + "relname": "events", + "n_tup_ins": "0", + "n_tup_upd": "0", + "n_tup_del": "0", + "n_dead_tup": "0", + "seq_scan": "0", + "idx_scan": "0" + } + ], + "note": "PostgreSQL lock/activity/stat snapshots are captured after the run, not sampled continuously during load." + }, + "representative_query_plans": [ + { + "label": "hot-raw-event-count", + "planning_ms": 0.708, + "execution_ms": 0.581, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "events_2026_06" + ], + "actual_rows_sum": 202, + "plan_rows_sum": 2, + "shared_hit_blocks": 116, + "shared_read_blocks": 0 + } + }, + { + "label": "hot-aggregate-total-events", + "planning_ms": 1.102, + "execution_ms": 0.08, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "daily_aggregates" + ], + "actual_rows_sum": 2, + "plan_rows_sum": 2, + "shared_hit_blocks": 8, + "shared_read_blocks": 0 + } + }, + { + "label": "quiet-raw-event-count", + "planning_ms": 0.326, + "execution_ms": 0.121, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "events_2026_06" + ], + "actual_rows_sum": 3, + "plan_rows_sum": 2, + "shared_hit_blocks": 40, + "shared_read_blocks": 0 + } + }, + { + "label": "quiet-aggregate-total-events", + "planning_ms": 0.093, + "execution_ms": 0.066, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "daily_aggregates" + ], + "actual_rows_sum": 2, + "plan_rows_sum": 2, + "shared_hit_blocks": 10, + "shared_read_blocks": 0 + } + } + ], + "safe_claim_note": "Local hot-tenant PostgreSQL evidence for one run_id. Aggregate pressure is derived from raw events per aggregate key, not from a Postgres ON CONFLICT counter.", + "unsafe_claim_note": "Do not claim production lock behavior or long-duration contention from this after-run snapshot." +} diff --git a/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-hot-db-smoke.json b/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-hot-db-smoke.json new file mode 100644 index 0000000..2da3588 --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-hot-db-smoke.json @@ -0,0 +1,541 @@ +{ + "run_id": "2026-06-16-hot-db-smoke", + "captured_at": "2026-06-16T19:49:59.974Z", + "manifest": { + "path": "tmp/benchmark-tenants.json", + "counts": { + "total": 100, + "hot": 1, + "medium": 10, + "quiet": 89 + } + }, + "total_events": 324, + "tenant_distribution": [ + { + "tenant_class": "hot", + "events": 243, + "orgs": 1, + "projects": 1, + "synthetic_tenants": 1, + "first_event_timestamp": "2026-06-16T19:39:21.484Z", + "last_event_timestamp": "2026-06-16T19:39:40.933Z", + "first_received_at": "2026-06-16T19:39:21.491Z", + "last_received_at": "2026-06-16T19:39:40.935Z" + }, + { + "tenant_class": "quiet", + "events": 66, + "orgs": 52, + "projects": 52, + "synthetic_tenants": 52, + "first_event_timestamp": "2026-06-16T19:39:21.316Z", + "last_event_timestamp": "2026-06-16T19:39:39.609Z", + "first_received_at": "2026-06-16T19:39:21.345Z", + "last_received_at": "2026-06-16T19:39:39.620Z" + }, + { + "tenant_class": "medium", + "events": 15, + "orgs": 8, + "projects": 8, + "synthetic_tenants": 8, + "first_event_timestamp": "2026-06-16T19:39:26.000Z", + "last_event_timestamp": "2026-06-16T19:39:40.185Z", + "first_received_at": "2026-06-16T19:39:26.007Z", + "last_received_at": "2026-06-16T19:39:40.188Z" + } + ], + "top_tenants": [ + { + "tenant_class": "hot", + "tenant_id": "tenant_001", + "org_id": "00000000-0000-4000-8000-0000000f4241", + "project_id": "00000000-0000-4000-8000-0000001e8481", + "events": 243, + "users": 243, + "event_names": 2, + "first_event_timestamp": "2026-06-16T19:39:21.484Z", + "last_event_timestamp": "2026-06-16T19:39:40.933Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_010", + "org_id": "00000000-0000-4000-8000-0000000f424a", + "project_id": "00000000-0000-4000-8000-0000001e848a", + "events": 4, + "users": 4, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:27.447Z", + "last_event_timestamp": "2026-06-16T19:39:40.185Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_026", + "org_id": "00000000-0000-4000-8000-0000000f425a", + "project_id": "00000000-0000-4000-8000-0000001e849a", + "events": 3, + "users": 3, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:28.198Z", + "last_event_timestamp": "2026-06-16T19:39:37.181Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_036", + "org_id": "00000000-0000-4000-8000-0000000f4264", + "project_id": "00000000-0000-4000-8000-0000001e84a4", + "events": 3, + "users": 3, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:23.865Z", + "last_event_timestamp": "2026-06-16T19:39:33.148Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_082", + "org_id": "00000000-0000-4000-8000-0000000f4292", + "project_id": "00000000-0000-4000-8000-0000001e84d2", + "events": 2, + "users": 2, + "event_names": 2, + "first_event_timestamp": "2026-06-16T19:39:26.548Z", + "last_event_timestamp": "2026-06-16T19:39:30.998Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_090", + "org_id": "00000000-0000-4000-8000-0000000f429a", + "project_id": "00000000-0000-4000-8000-0000001e84da", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:31.397Z", + "last_event_timestamp": "2026-06-16T19:39:35.847Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_005", + "org_id": "00000000-0000-4000-8000-0000000f4245", + "project_id": "00000000-0000-4000-8000-0000001e8485", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:26.197Z", + "last_event_timestamp": "2026-06-16T19:39:33.699Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_035", + "org_id": "00000000-0000-4000-8000-0000000f4263", + "project_id": "00000000-0000-4000-8000-0000001e84a3", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:23.791Z", + "last_event_timestamp": "2026-06-16T19:39:33.097Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_046", + "org_id": "00000000-0000-4000-8000-0000000f426e", + "project_id": "00000000-0000-4000-8000-0000001e84ae", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:24.565Z", + "last_event_timestamp": "2026-06-16T19:39:29.197Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_028", + "org_id": "00000000-0000-4000-8000-0000000f425c", + "project_id": "00000000-0000-4000-8000-0000001e849c", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:23.213Z", + "last_event_timestamp": "2026-06-16T19:39:37.302Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_002", + "org_id": "00000000-0000-4000-8000-0000000f4242", + "project_id": "00000000-0000-4000-8000-0000001e8482", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:27.550Z", + "last_event_timestamp": "2026-06-16T19:39:34.050Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_016", + "org_id": "00000000-0000-4000-8000-0000000f4250", + "project_id": "00000000-0000-4000-8000-0000001e8490", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:21.929Z", + "last_event_timestamp": "2026-06-16T19:39:32.147Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_009", + "org_id": "00000000-0000-4000-8000-0000000f4249", + "project_id": "00000000-0000-4000-8000-0000001e8489", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:29.898Z", + "last_event_timestamp": "2026-06-16T19:39:31.897Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_089", + "org_id": "00000000-0000-4000-8000-0000000f4299", + "project_id": "00000000-0000-4000-8000-0000001e84d9", + "events": 2, + "users": 2, + "event_names": 2, + "first_event_timestamp": "2026-06-16T19:39:26.897Z", + "last_event_timestamp": "2026-06-16T19:39:35.797Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_024", + "org_id": "00000000-0000-4000-8000-0000000f4258", + "project_id": "00000000-0000-4000-8000-0000001e8498", + "events": 2, + "users": 2, + "event_names": 2, + "first_event_timestamp": "2026-06-16T19:39:32.549Z", + "last_event_timestamp": "2026-06-16T19:39:37.065Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_011", + "org_id": "00000000-0000-4000-8000-0000000f424b", + "project_id": "00000000-0000-4000-8000-0000001e848b", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:26.000Z", + "last_event_timestamp": "2026-06-16T19:39:34.997Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_034", + "org_id": "00000000-0000-4000-8000-0000000f4262", + "project_id": "00000000-0000-4000-8000-0000001e84a2", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:23.712Z", + "last_event_timestamp": "2026-06-16T19:39:28.597Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_066", + "org_id": "00000000-0000-4000-8000-0000000f4282", + "project_id": "00000000-0000-4000-8000-0000001e84c2", + "events": 2, + "users": 2, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:30.197Z", + "last_event_timestamp": "2026-06-16T19:39:34.649Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_020", + "org_id": "00000000-0000-4000-8000-0000000f4254", + "project_id": "00000000-0000-4000-8000-0000001e8494", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:32.348Z", + "last_event_timestamp": "2026-06-16T19:39:32.348Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_018", + "org_id": "00000000-0000-4000-8000-0000000f4252", + "project_id": "00000000-0000-4000-8000-0000001e8492", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T19:39:36.726Z", + "last_event_timestamp": "2026-06-16T19:39:36.726Z" + } + ], + "aggregate_pressure": [ + { + "tenant_class": "hot", + "event_count_aggregate_keys": 2, + "events": 243, + "avg_events_per_event_count_key": 121.5, + "max_events_per_event_count_key": 188, + "p95_events_per_event_count_key": 181.35 + }, + { + "tenant_class": "quiet", + "event_count_aggregate_keys": 55, + "events": 66, + "avg_events_per_event_count_key": 1.2, + "max_events_per_event_count_key": 3, + "p95_events_per_event_count_key": 2 + }, + { + "tenant_class": "medium", + "event_count_aggregate_keys": 8, + "events": 15, + "avg_events_per_event_count_key": 1.88, + "max_events_per_event_count_key": 4, + "p95_events_per_event_count_key": 3.2999999999999994 + } + ], + "active_user_pressure": [ + { + "tenant_class": "hot", + "active_user_keys": 243, + "events_with_user": 243, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + }, + { + "tenant_class": "quiet", + "active_user_keys": 66, + "events_with_user": 66, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + }, + { + "tenant_class": "medium", + "active_user_keys": 15, + "events_with_user": 15, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + } + ], + "burst_seconds": [ + { + "second": "2026-06-16T19:39:26.000Z", + "events": 20 + }, + { + "second": "2026-06-16T19:39:27.000Z", + "events": 20 + }, + { + "second": "2026-06-16T19:39:28.000Z", + "events": 20 + }, + { + "second": "2026-06-16T19:39:29.000Z", + "events": 20 + }, + { + "second": "2026-06-16T19:39:30.000Z", + "events": 20 + }, + { + "second": "2026-06-16T19:39:31.000Z", + "events": 20 + }, + { + "second": "2026-06-16T19:39:32.000Z", + "events": 20 + }, + { + "second": "2026-06-16T19:39:33.000Z", + "events": 20 + }, + { + "second": "2026-06-16T19:39:34.000Z", + "events": 20 + }, + { + "second": "2026-06-16T19:39:35.000Z", + "events": 20 + } + ], + "partitions": [ + { + "partition": "events_2026_06", + "events": 324 + } + ], + "reconciliation": { + "status": "complete", + "k6_summary_path": "docs/benchmarks/evidence/hot-tenant-2026-06-16-hot-db-smoke.json", + "k6_request_count": 324, + "k6_http_req_failed_rate": 0, + "persisted_events": 324, + "kafka_lag_after": { + "captured_at": "2026-06-16T19:49:59.961Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 203883, + "log_end_offset": 203883, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 204742, + "log_end_offset": 204742, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 203896, + "log_end_offset": 203896, + "lag": 0 + } + ], + "total_lag": 0 + }, + "notes": [] + }, + "postgres_snapshot": { + "locks": { + "locks_total": 2, + "waiting_locks": 0, + "waiting_pids": 0 + }, + "activity": { + "connections": 1, + "active_connections": 1, + "waiting_connections": 0, + "lock_wait_connections": 0 + }, + "database": { + "xact_commit": "621463", + "xact_rollback": "17", + "deadlocks": "0", + "conflicts": "0", + "temp_files": "224", + "temp_bytes": "644382720", + "blk_read_time": 0, + "blk_write_time": 0 + }, + "table_stats": [ + { + "relname": "daily_active_users", + "n_tup_ins": "31112", + "n_tup_upd": "0", + "n_tup_del": "20640", + "n_dead_tup": "366", + "seq_scan": "3", + "idx_scan": "612617" + }, + { + "relname": "daily_aggregates", + "n_tup_ins": "377", + "n_tup_upd": "1255849", + "n_tup_del": "120", + "n_dead_tup": "0", + "seq_scan": "32", + "idx_scan": "1257669" + }, + { + "relname": "event_dedup_keys", + "n_tup_ins": "612539", + "n_tup_upd": "0", + "n_tup_del": "18", + "n_dead_tup": "18", + "seq_scan": "2", + "idx_scan": "612603" + }, + { + "relname": "events", + "n_tup_ins": "0", + "n_tup_upd": "0", + "n_tup_del": "0", + "n_dead_tup": "0", + "seq_scan": "0", + "idx_scan": "0" + } + ], + "note": "PostgreSQL lock/activity/stat snapshots are captured after the run, not sampled continuously during load." + }, + "representative_query_plans": [ + { + "label": "hot-raw-event-count", + "planning_ms": 0.944, + "execution_ms": 0.294, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "events_2026_06" + ], + "actual_rows_sum": 244, + "plan_rows_sum": 2, + "shared_hit_blocks": 74, + "shared_read_blocks": 0 + } + }, + { + "label": "hot-aggregate-total-events", + "planning_ms": 1.13, + "execution_ms": 0.197, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "daily_aggregates" + ], + "actual_rows_sum": 2, + "plan_rows_sum": 2, + "shared_hit_blocks": 6, + "shared_read_blocks": 0 + } + }, + { + "label": "quiet-raw-event-count", + "planning_ms": 0.407, + "execution_ms": 0.067, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "events_2026_06" + ], + "actual_rows_sum": 4, + "plan_rows_sum": 2, + "shared_hit_blocks": 16, + "shared_read_blocks": 0 + } + }, + { + "label": "quiet-aggregate-total-events", + "planning_ms": 0.125, + "execution_ms": 0.042, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "daily_aggregates" + ], + "actual_rows_sum": 2, + "plan_rows_sum": 2, + "shared_hit_blocks": 4, + "shared_read_blocks": 0 + } + } + ], + "safe_claim_note": "Local hot-tenant PostgreSQL evidence for one run_id. Aggregate pressure is derived from raw events per aggregate key, not from a Postgres ON CONFLICT counter.", + "unsafe_claim_note": "Do not claim production lock behavior or long-duration contention from this after-run snapshot." +} diff --git a/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-post-docs-benchmark.json b/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-post-docs-benchmark.json new file mode 100644 index 0000000..19c720d --- /dev/null +++ b/docs/benchmarks/evidence/hot-tenant-db-2026-06-16-post-docs-benchmark.json @@ -0,0 +1,412 @@ +{ + "run_id": "2026-06-16-post-docs-benchmark", + "captured_at": "2026-06-16T20:14:43.312Z", + "manifest": { + "path": "tmp/benchmark-tenants-final-smoke.json", + "counts": { + "total": 100, + "hot": 1, + "medium": 10, + "quiet": 89 + } + }, + "total_events": 35, + "tenant_distribution": [ + { + "tenant_class": "hot", + "events": 27, + "orgs": 1, + "projects": 1, + "synthetic_tenants": 1, + "first_event_timestamp": "2026-06-16T20:14:32.991Z", + "last_event_timestamp": "2026-06-16T20:14:39.590Z", + "first_received_at": "2026-06-16T20:14:32.995Z", + "last_received_at": "2026-06-16T20:14:39.598Z" + }, + { + "tenant_class": "quiet", + "events": 5, + "orgs": 5, + "projects": 5, + "synthetic_tenants": 5, + "first_event_timestamp": "2026-06-16T20:14:35.590Z", + "last_event_timestamp": "2026-06-16T20:14:39.190Z", + "first_received_at": "2026-06-16T20:14:35.595Z", + "last_received_at": "2026-06-16T20:14:39.201Z" + }, + { + "tenant_class": "medium", + "events": 3, + "orgs": 3, + "projects": 3, + "synthetic_tenants": 3, + "first_event_timestamp": "2026-06-16T20:14:38.191Z", + "last_event_timestamp": "2026-06-16T20:14:39.790Z", + "first_received_at": "2026-06-16T20:14:38.196Z", + "last_received_at": "2026-06-16T20:14:39.802Z" + } + ], + "top_tenants": [ + { + "tenant_class": "hot", + "tenant_id": "tenant_001", + "org_id": "00000000-0000-4000-8000-0000000f4241", + "project_id": "00000000-0000-4000-8000-0000001e8481", + "events": 27, + "users": 27, + "event_names": 2, + "first_event_timestamp": "2026-06-16T20:14:32.991Z", + "last_event_timestamp": "2026-06-16T20:14:39.590Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_002", + "org_id": "00000000-0000-4000-8000-0000000f4242", + "project_id": "00000000-0000-4000-8000-0000001e8482", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:14:38.990Z", + "last_event_timestamp": "2026-06-16T20:14:38.990Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_006", + "org_id": "00000000-0000-4000-8000-0000000f4246", + "project_id": "00000000-0000-4000-8000-0000001e8486", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:14:39.790Z", + "last_event_timestamp": "2026-06-16T20:14:39.790Z" + }, + { + "tenant_class": "medium", + "tenant_id": "tenant_008", + "org_id": "00000000-0000-4000-8000-0000000f4248", + "project_id": "00000000-0000-4000-8000-0000001e8488", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:14:38.191Z", + "last_event_timestamp": "2026-06-16T20:14:38.191Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_025", + "org_id": "00000000-0000-4000-8000-0000000f4259", + "project_id": "00000000-0000-4000-8000-0000001e8499", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:14:35.590Z", + "last_event_timestamp": "2026-06-16T20:14:35.590Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_027", + "org_id": "00000000-0000-4000-8000-0000000f425b", + "project_id": "00000000-0000-4000-8000-0000001e849b", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:14:35.990Z", + "last_event_timestamp": "2026-06-16T20:14:35.990Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_028", + "org_id": "00000000-0000-4000-8000-0000000f425c", + "project_id": "00000000-0000-4000-8000-0000001e849c", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:14:36.190Z", + "last_event_timestamp": "2026-06-16T20:14:36.190Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_031", + "org_id": "00000000-0000-4000-8000-0000000f425f", + "project_id": "00000000-0000-4000-8000-0000001e849f", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:14:36.790Z", + "last_event_timestamp": "2026-06-16T20:14:36.790Z" + }, + { + "tenant_class": "quiet", + "tenant_id": "tenant_043", + "org_id": "00000000-0000-4000-8000-0000000f426b", + "project_id": "00000000-0000-4000-8000-0000001e84ab", + "events": 1, + "users": 1, + "event_names": 1, + "first_event_timestamp": "2026-06-16T20:14:39.190Z", + "last_event_timestamp": "2026-06-16T20:14:39.190Z" + } + ], + "aggregate_pressure": [ + { + "tenant_class": "hot", + "event_count_aggregate_keys": 2, + "events": 27, + "avg_events_per_event_count_key": 13.5, + "max_events_per_event_count_key": 22, + "p95_events_per_event_count_key": 21.15 + }, + { + "tenant_class": "quiet", + "event_count_aggregate_keys": 5, + "events": 5, + "avg_events_per_event_count_key": 1, + "max_events_per_event_count_key": 1, + "p95_events_per_event_count_key": 1 + }, + { + "tenant_class": "medium", + "event_count_aggregate_keys": 3, + "events": 3, + "avg_events_per_event_count_key": 1, + "max_events_per_event_count_key": 1, + "p95_events_per_event_count_key": 1 + } + ], + "active_user_pressure": [ + { + "tenant_class": "hot", + "active_user_keys": 27, + "events_with_user": 27, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + }, + { + "tenant_class": "quiet", + "active_user_keys": 5, + "events_with_user": 5, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + }, + { + "tenant_class": "medium", + "active_user_keys": 3, + "events_with_user": 3, + "avg_events_per_user_day_key": 1, + "max_events_per_user_day_key": 1 + } + ], + "burst_seconds": [ + { + "second": "2026-06-16T20:14:36.000Z", + "events": 6 + }, + { + "second": "2026-06-16T20:14:33.000Z", + "events": 5 + }, + { + "second": "2026-06-16T20:14:34.000Z", + "events": 5 + }, + { + "second": "2026-06-16T20:14:37.000Z", + "events": 5 + }, + { + "second": "2026-06-16T20:14:38.000Z", + "events": 5 + }, + { + "second": "2026-06-16T20:14:35.000Z", + "events": 4 + }, + { + "second": "2026-06-16T20:14:39.000Z", + "events": 4 + }, + { + "second": "2026-06-16T20:14:32.000Z", + "events": 1 + } + ], + "partitions": [ + { + "partition": "events_2026_06", + "events": 35 + } + ], + "reconciliation": { + "status": "complete", + "k6_summary_path": "docs/benchmarks/evidence/hot-tenant-2026-06-16-post-docs-benchmark.json", + "k6_request_count": 35, + "k6_http_req_failed_rate": 0, + "persisted_events": 35, + "kafka_lag_after": { + "captured_at": "2026-06-16T20:14:43.302Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 207752, + "log_end_offset": 207752, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 208729, + "log_end_offset": 208729, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 207822, + "log_end_offset": 207822, + "lag": 0 + } + ], + "total_lag": 0 + }, + "notes": [] + }, + "postgres_snapshot": { + "locks": { + "locks_total": 2, + "waiting_locks": 0, + "waiting_pids": 0 + }, + "activity": { + "connections": 3, + "active_connections": 1, + "waiting_connections": 2, + "lock_wait_connections": 0 + }, + "database": { + "xact_commit": "636300", + "xact_rollback": "33", + "deadlocks": "0", + "conflicts": "0", + "temp_files": "224", + "temp_bytes": "644382720", + "blk_read_time": 0, + "blk_write_time": 0 + }, + "table_stats": [ + { + "relname": "daily_active_users", + "n_tup_ins": "40266", + "n_tup_upd": "0", + "n_tup_del": "20875", + "n_dead_tup": "216", + "seq_scan": "3", + "idx_scan": "624435" + }, + { + "relname": "daily_aggregates", + "n_tup_ins": "788", + "n_tup_upd": "1288176", + "n_tup_del": "170", + "n_dead_tup": "96", + "seq_scan": "78", + "idx_scan": "1290868" + }, + { + "relname": "event_dedup_keys", + "n_tup_ins": "624325", + "n_tup_upd": "0", + "n_tup_del": "26", + "n_dead_tup": "26", + "seq_scan": "32", + "idx_scan": "624425" + }, + { + "relname": "events", + "n_tup_ins": "0", + "n_tup_upd": "0", + "n_tup_del": "0", + "n_dead_tup": "0", + "seq_scan": "0", + "idx_scan": "0" + } + ], + "note": "PostgreSQL lock/activity/stat snapshots are captured after the run, not sampled continuously during load." + }, + "representative_query_plans": [ + { + "label": "hot-raw-event-count", + "planning_ms": 0.889, + "execution_ms": 1.193, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "events_2026_06" + ], + "actual_rows_sum": 28, + "plan_rows_sum": 2, + "shared_hit_blocks": 612, + "shared_read_blocks": 0 + } + }, + { + "label": "hot-aggregate-total-events", + "planning_ms": 0.797, + "execution_ms": 0.078, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "daily_aggregates" + ], + "actual_rows_sum": 2, + "plan_rows_sum": 2, + "shared_hit_blocks": 8, + "shared_read_blocks": 0 + } + }, + { + "label": "quiet-raw-event-count", + "planning_ms": 0.218, + "execution_ms": 0.123, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "events_2026_06" + ], + "actual_rows_sum": 2, + "plan_rows_sum": 2, + "shared_hit_blocks": 36, + "shared_read_blocks": 0 + } + }, + { + "label": "quiet-aggregate-total-events", + "planning_ms": 0.104, + "execution_ms": 0.074, + "plan_summary": { + "node_types": { + "Aggregate": 1, + "Index Scan": 1 + }, + "relations": [ + "daily_aggregates" + ], + "actual_rows_sum": 2, + "plan_rows_sum": 2, + "shared_hit_blocks": 8, + "shared_read_blocks": 0 + } + } + ], + "safe_claim_note": "Local hot-tenant PostgreSQL evidence for one run_id. Aggregate pressure is derived from raw events per aggregate key, not from a Postgres ON CONFLICT counter.", + "unsafe_claim_note": "Do not claim production lock behavior or long-duration contention from this after-run snapshot." +} diff --git a/docs/benchmarks/evidence/ingest-scale-snapshot-2026-06-16.json b/docs/benchmarks/evidence/ingest-scale-snapshot-2026-06-16.json new file mode 100644 index 0000000..43c7efb --- /dev/null +++ b/docs/benchmarks/evidence/ingest-scale-snapshot-2026-06-16.json @@ -0,0 +1,109 @@ +{ + "captured_at_utc": "2026-06-16T20:30:46Z", + "repo_commit": "112ba0414fa55bb431e814441e85ebd925107347", + "dirty_tree": true, + "environment": { + "os": "Darwin 25.5.0 arm64", + "node": "v25.3.0", + "docker_resources": { + "cpus": 12, + "memory_bytes": 8217165824 + } + }, + "ingest_runs": [ + { + "run_id": "2026-06-16-ingest-100rps", + "evidence_file": "docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-100rps.json", + "command": "RUN_ID=2026-06-16-ingest-100rps TENANT_KEYS_FILE=tmp/bench-tenants-load.json API_URL=http://localhost:3001 RATE=100 DURATION=2m PREALLOCATED_VUS=100 MAX_VUS=200 pnpm benchmark:ingest", + "target_request_rate_per_second": 100, + "duration": "2m", + "batch_size": 50, + "http_requests": 12000, + "actual_request_rate_per_second": 99.99836898493587, + "attempted_events_from_completed_requests": 600000, + "http_req_failed_rate": 0, + "checks_rate": 1, + "dropped_iterations": 0, + "http_req_duration_ms": { + "median": 3.1737705, + "p95": 10.950743749999999, + "p99": 86.58365791999998 + } + }, + { + "run_id": "2026-06-16-ingest-500rps", + "evidence_file": "docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-500rps.json", + "command": "RUN_ID=2026-06-16-ingest-500rps TENANT_KEYS_FILE=tmp/bench-tenants-load.json API_URL=http://localhost:3001 RATE=500 DURATION=2m PREALLOCATED_VUS=200 MAX_VUS=600 pnpm benchmark:ingest", + "target_request_rate_per_second": 500, + "duration": "2m", + "batch_size": 50, + "http_requests": 58882, + "actual_request_rate_per_second": 490.36287472599633, + "attempted_events_from_completed_requests": 2944100, + "http_req_failed_rate": 0, + "checks_rate": 1, + "dropped_iterations": 1119, + "http_req_duration_ms": { + "median": 7.7443124999999995, + "p95": 716.9442645999999, + "p99": 1491.4699039999998 + } + }, + { + "run_id": "2026-06-16-ingest-1000rps", + "evidence_file": "docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-1000rps.json", + "command": "RUN_ID=2026-06-16-ingest-1000rps TENANT_KEYS_FILE=tmp/bench-tenants-load.json API_URL=http://localhost:3001 RATE=1000 DURATION=5m PREALLOCATED_VUS=500 MAX_VUS=1400 pnpm benchmark:ingest", + "target_request_rate_per_second": 1000, + "duration": "5m", + "batch_size": 50, + "http_requests": 141102, + "actual_request_rate_per_second": 469.3658658253587, + "attempted_events_from_completed_requests": 7055100, + "http_req_failed_rate": 0.007391815849527293, + "checks_rate": 0.9926081841504727, + "dropped_iterations": 158898, + "http_req_duration_ms": { + "median": 2684.787085, + "p95": 4511.5618967, + "p99": 15000.34296126 + } + } + ], + "postgres_snapshot": { + "captured_at_postgres": "2026-06-16 20:30:46.148874+00", + "total_events_table_rows": 918158, + "persisted_events_by_ingest_run_id": [ + { + "run_id": "2026-06-16-ingest-100rps", + "persisted_events": 293630 + } + ], + "note": "At snapshot time, only the 100 RPS run had partially appeared in PostgreSQL. The 500 RPS and 1000 RPS run IDs were not visible yet because the worker was still consuming Kafka backlog." + }, + "kafka_lag_snapshot": { + "consumer_group": "pulseops-aggregators", + "topic": "events-raw", + "partitions": [ + { + "partition": 0, + "current_offset": 305774, + "log_end_offset": 3722428, + "lag": 3416654 + }, + { + "partition": 1, + "current_offset": 306765, + "log_end_offset": 3714078, + "lag": 3407313 + }, + { + "partition": 2, + "current_offset": 305939, + "log_end_offset": 3736277, + "lag": 3430338 + } + ], + "total_lag": 10254305 + }, + "interpretation": "These fixed-rate ingest runs prove local HTTP ingest behavior and show worker/database catch-up lag under high batch load. They do not prove that all attempted events were persisted or aggregated by the snapshot time." +} diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-clean-full-benchmark.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-clean-full-benchmark.json new file mode 100644 index 0000000..bbb94d5 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-clean-full-benchmark.json @@ -0,0 +1,227 @@ +{ + "metrics": { + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "min": 2.528291, + "med": 3.3937495, + "p(90)": 6.555958600000001, + "p(95)": 8.02049825, + "p(99)": 10.541285919999996, + "max": 56.95025, + "avg": 4.340055807499997 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0.9915190499999998, + "p(99)": 1.21418041, + "max": 1.419791, + "avg": 0.079472495 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 2911391, + "rate": 145564.5734383633 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 400 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 1.3333974099999997, + "max": 4.526959, + "avg": 0.0961014949999997, + "min": 0.002667, + "med": 0.003667, + "p(90)": 0.0080494, + "p(95)": 1.04615175 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "rate": 19.999316263375587, + "count": 400 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.22900166999999985, + "max": 1.699459, + "avg": 0.08047219500000002, + "min": 0.046375, + "med": 0.06764600000000001, + "p(90)": 0.09757110000000001, + "p(95)": 0.13362114999999997 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 400, + "fails": 0 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 30, + "min": 30, + "max": 30 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0.0985042, + "p(99)": 0.1583162499999999, + "max": 0.34725, + "avg": 0.05981103500000001, + "min": 0.029083, + "med": 0.0539375, + "p(90)": 0.0839705 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 4.199772577500003, + "min": 2.437708, + "med": 3.2683125000000004, + "p(90)": 6.3998256, + "p(95)": 7.662535449999997, + "p(99)": 10.415361999999996, + "max": 56.725042 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 4.852550012499995, + "min": 2.854708, + "med": 3.803292, + "p(90)": 7.584628900000001, + "p(95)": 9.2944354, + "p(99)": 12.248684589999998, + "max": 59.125167 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 6.555958600000001, + "p(95)": 8.02049825, + "p(99)": 10.541285919999996, + "max": 56.95025, + "avg": 4.340055807499997, + "min": 2.528291, + "med": 3.3937495 + } + }, + "data_received": { + "values": { + "count": 133600, + "rate": 6679.7716319674455 + }, + "type": "counter", + "contains": "data" + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 400, + "rate": 19.999316263375587 + } + } + }, + "root_group": { + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 400, + "fails": 0 + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 20000.68376 + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-clean-test-load.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-clean-test-load.json new file mode 100644 index 0000000..4bb3169 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-clean-test-load.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "checks": [ + { + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 201, + "fails": 0, + "name": "accepted or rate limited" + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "testRunDurationMs": 10004.610797, + "isStdOutTTY": false, + "isStdErrTTY": false + }, + "metrics": { + "http_req_failed": { + "values": { + "rate": 0, + "passes": 0, + "fails": 201 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + }, + "type": "rate", + "contains": "default" + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "max": 1.263834, + "avg": 0.09087830845771151, + "min": 0.043417, + "med": 0.075208, + "p(90)": 0.096208, + "p(95)": 0.105291, + "p(99)": 0.461958 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.055166, + "p(90)": 0.081042, + "p(95)": 0.101875, + "p(99)": 0.134959, + "max": 0.244083, + "avg": 0.05798605970149251, + "min": 0.028625 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 201, + "rate": 20.09073656920989 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 201, + "rate": 20.09073656920989 + } + }, + "http_req_duration{expected_response:true}": { + "values": { + "max": 15.146291, + "avg": 3.3262591094527356, + "min": 2.286625, + "med": 3.041333, + "p(90)": 3.7785, + "p(95)": 4.419041, + "p(99)": 12.743875 + }, + "type": "trend", + "contains": "time" + }, + "data_sent": { + "values": { + "count": 1230710, + "rate": 123014.28061239953 + }, + "type": "counter", + "contains": "data" + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 67134, + "rate": 6710.306014116103 + } + }, + "http_req_waiting": { + "contains": "time", + "values": { + "med": 2.901375, + "p(90)": 3.520666, + "p(95)": 4.245084, + "p(99)": 12.296875, + "max": 14.990791, + "avg": 3.177394741293531, + "min": 2.205625 + }, + "type": "trend" + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 201, + "fails": 0 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "vus_max": { + "values": { + "value": 30, + "min": 30, + "max": 30 + }, + "type": "gauge", + "contains": "default" + }, + "http_req_duration": { + "values": { + "p(99)": 12.743875, + "max": 15.146291, + "avg": 3.3262591094527356, + "min": 2.286625, + "med": 3.041333, + "p(90)": 3.7785, + "p(95)": 4.419041 + }, + "type": "trend", + "contains": "time" + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 1.232, + "p(99)": 3.667042, + "max": 3.86075, + "avg": 0.22695249253731342, + "min": 0.002583, + "med": 0.003666, + "p(90)": 1.014458 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.20657066666666668, + "min": 0, + "med": 0, + "p(90)": 0.956584, + "p(95)": 1.12125, + "p(99)": 2.68725, + "max": 3.777541 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 16.225083, + "max": 23.906417, + "avg": 4.0907821393034824, + "min": 2.697833, + "med": 3.490167, + "p(90)": 4.8045, + "p(95)": 6.157333 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-benchmark-smoke.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-benchmark-smoke.json new file mode 100644 index 0000000..540b2bf --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-benchmark-smoke.json @@ -0,0 +1,227 @@ +{ + "metrics": { + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 130.33080199999998, + "max": 167.344292, + "avg": 105.8638254380531, + "min": 102.930792, + "med": 104.9597495, + "p(90)": 106.9594995, + "p(95)": 109.39685449999999 + } + }, + "http_req_duration": { + "contains": "time", + "values": { + "avg": 4.272072637168142, + "min": 2.231583, + "med": 3.4361249999999997, + "p(90)": 4.750083500000001, + "p(95)": 7.169844249999999, + "p(99)": 22.09146849999999, + "max": 60.264041 + }, + "type": "trend" + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.06468750000000001, + "p(90)": 0.10312500000000001, + "p(95)": 0.12697925, + "p(99)": 0.2078855, + "max": 0.380083, + "avg": 0.07272346902654869, + "min": 0.03 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 2 + } + }, + "http_reqs": { + "values": { + "count": 226, + "rate": 14.961054126984388 + }, + "type": "counter", + "contains": "default" + }, + "http_req_tls_handshaking": { + "values": { + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0 + }, + "type": "trend", + "contains": "time" + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 4.640416500000001, + "p(95)": 7.042405749999999, + "p(99)": 21.69667774999999, + "max": 59.96675, + "avg": 4.164273765486724, + "min": 2.1605, + "med": 3.3567295 + } + }, + "data_sent": { + "contains": "data", + "values": { + "count": 848055, + "rate": 56140.69361796348 + }, + "type": "counter" + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 75484, + "rate": 4996.992078412786 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0, + "p(95)": 1.1056664999999994, + "p(99)": 2.965260749999997, + "max": 7.660917, + "avg": 0.1509601814159292, + "min": 0, + "med": 0 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "max": 0.181917, + "avg": 0.03507540265486725, + "min": 0.014291, + "med": 0.028125, + "p(90)": 0.0606875, + "p(95)": 0.06909375, + "p(99)": 0.11341649999999998 + } + }, + "checks": { + "contains": "default", + "values": { + "rate": 1, + "passes": 226, + "fails": 0 + }, + "type": "rate" + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "avg": 4.272072637168142, + "min": 2.231583, + "med": 3.4361249999999997, + "p(90)": 4.750083500000001, + "p(95)": 7.169844249999999, + "p(99)": 22.09146849999999, + "max": 60.264041 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 226, + "rate": 14.961054126984388 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "passes": 0, + "fails": 226, + "rate": 0 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 1.1724062499999997, + "p(99)": 3.251250249999998, + "max": 7.81325, + "avg": 0.16520997787610625, + "min": 0.003041, + "med": 0.0042705, + "p(90)": 0.019771500000000008 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 20, + "min": 20, + "max": 20 + } + } + }, + "root_group": { + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 226, + "fails": 0 + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e" + }, + "options": { + "noColor": false, + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "" + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 15105.887465 + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-test-load.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-test-load.json new file mode 100644 index 0000000..24a2b04 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-test-load.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 401, + "fails": 0, + "name": "accepted or rate limited", + "path": "::accepted or rate limited" + } + ], + "name": "" + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 20005.359177 + }, + "metrics": { + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "avg": 3.855344952618452, + "min": 2.154167, + "med": 3.292708, + "p(90)": 5.403875, + "p(95)": 6.048667, + "p(99)": 12.015042, + "max": 27.714208 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.057022369077306546, + "min": 0.00275, + "med": 0.003833, + "p(90)": 0.011209, + "p(95)": 0.172917, + "p(99)": 1.080709, + "max": 1.57525 + } + }, + "http_req_tls_handshaking": { + "contains": "time", + "values": { + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0 + }, + "type": "trend" + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 133934, + "rate": 6694.906040676483 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 20, + "min": 20, + "max": 20 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.047777950124688294, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 1.031083, + "max": 1.117459 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.029985122194513693, + "min": 0.014792, + "med": 0.024625, + "p(90)": 0.041833, + "p(95)": 0.054667, + "p(99)": 0.102458, + "max": 0.268375 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.215125, + "max": 0.483083, + "avg": 0.057932433915212043, + "min": 0.023084, + "med": 0.050583, + "p(90)": 0.08, + "p(95)": 0.099667 + } + }, + "http_req_duration": { + "values": { + "med": 3.292708, + "p(90)": 5.403875, + "p(95)": 6.048667, + "p(99)": 12.015042, + "max": 27.714208, + "avg": 3.855344952618452, + "min": 2.154167 + }, + "type": "trend", + "contains": "time" + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 401, + "rate": 20.044628864300847 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "min": 2.453417, + "med": 3.655375, + "p(90)": 6.290959, + "p(95)": 6.870084, + "p(99)": 12.935959, + "max": 41.133208, + "avg": 4.367285947630919 + } + }, + "data_sent": { + "values": { + "count": 1481414, + "rate": 74050.85741740493 + }, + "type": "counter", + "contains": "data" + }, + "http_req_failed": { + "thresholds": { + "rate<0.05": { + "ok": true + } + }, + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 401 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 401, + "rate": 20.044628864300847 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "med": 3.212833, + "p(90)": 5.308459, + "p(95)": 5.921125, + "p(99)": 11.852875, + "max": 27.325291, + "avg": 3.76742739650873, + "min": 2.100375 + } + }, + "checks": { + "contains": "default", + "values": { + "rate": 1, + "passes": 401, + "fails": 0 + }, + "type": "rate" + }, + "vus": { + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + }, + "type": "gauge" + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-validation-benchmark.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-validation-benchmark.json new file mode 100644 index 0000000..e15a93a --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-validation-benchmark.json @@ -0,0 +1,227 @@ +{ + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 15005.712674 + }, + "metrics": { + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.04525, + "p(90)": 0.064708, + "p(95)": 0.07775, + "p(99)": 0.152875, + "max": 0.665584, + "avg": 0.05097230179880085, + "min": 0.028 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 2.9517391992005324, + "min": 2.027292, + "med": 2.67125, + "p(90)": 3.651, + "p(95)": 4.336708, + "p(99)": 6.643792, + "max": 32.235333 + } + }, + "http_req_receiving": { + "values": { + "avg": 0.04322584210526314, + "min": 0.011375, + "med": 0.0385, + "p(90)": 0.062708, + "p(95)": 0.071542, + "p(99)": 0.108916, + "max": 0.955833 + }, + "type": "trend", + "contains": "time" + }, + "checks": { + "values": { + "rate": 1, + "passes": 1501, + "fails": 0 + }, + "type": "rate", + "contains": "default" + }, + "data_sent": { + "contains": "data", + "values": { + "count": 23365582, + "rate": 1557112.4482801089 + }, + "type": "counter" + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.002875, + "p(90)": 0.004458, + "p(95)": 0.00575, + "p(99)": 0.830375, + "max": 1.753958, + "avg": 0.01639382211858747, + "min": 0.000917 + } + }, + "http_req_connecting": { + "contains": "time", + "values": { + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0.785708, + "max": 1.181708, + "avg": 0.011684959360426381, + "min": 0 + }, + "type": "trend" + }, + "http_req_duration{expected_response:true}": { + "contains": "time", + "values": { + "avg": 3.045937343104597, + "min": 2.0885, + "med": 2.757584, + "p(90)": 3.782292, + "p(95)": 4.502375, + "p(99)": 6.747833, + "max": 32.383208 + }, + "type": "trend" + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 1501, + "rate": 100.0285712921015 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "min": 0, + "max": 1, + "value": 1 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "max": 20, + "value": 20, + "min": 20 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 1501 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 1501, + "rate": 100.0285712921015 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 500434, + "rate": 33349.56565355864 + } + }, + "iteration_duration": { + "contains": "time", + "values": { + "p(99)": 7.86925, + "max": 32.947791, + "avg": 3.6476826115922734, + "min": 2.532875, + "med": 3.28575, + "p(90)": 4.617667, + "p(95)": 5.434167 + }, + "type": "trend" + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 3.782292, + "p(95)": 4.502375, + "p(99)": 6.747833, + "max": 32.383208, + "avg": 3.045937343104597, + "min": 2.0885, + "med": 2.757584 + } + } + }, + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 1501, + "fails": 0 + } + ] + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-validation-test-load.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-validation-test-load.json new file mode 100644 index 0000000..f48f57b --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-validation-test-load.json @@ -0,0 +1,227 @@ +{ + "metrics": { + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 12001, + "rate": 100.0027410967156 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "med": 3.34975, + "p(90)": 6.990041, + "p(95)": 11.636583, + "p(99)": 67.326625, + "max": 311.567208, + "avg": 5.972092606449447, + "min": 2.01375 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 11.336292, + "p(99)": 66.104459, + "max": 311.407042, + "avg": 5.809683607616036, + "min": 1.960833, + "med": 3.233458, + "p(90)": 6.781542 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 50, + "min": 50, + "max": 50 + } + }, + "http_req_blocked": { + "values": { + "p(95)": 0.008542, + "p(99)": 0.032666, + "max": 7.156875, + "avg": 0.009473298891758871, + "min": 0.001042, + "med": 0.003625, + "p(90)": 0.005916 + }, + "type": "trend", + "contains": "time" + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "rate": 33385.9163650686, + "count": 4006534 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0, + "p(99)": 0, + "max": 4.973041, + "avg": 0.004373437380218314, + "min": 0, + "med": 0, + "p(90)": 0 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.07715083551370704, + "min": 0.0065, + "med": 0.053583, + "p(90)": 0.097292, + "p(95)": 0.129667, + "p(99)": 0.235083, + "max": 42.882709 + } + }, + "checks": { + "contains": "default", + "values": { + "fails": 0, + "rate": 1, + "passes": 12001 + }, + "type": "rate" + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "passes": 0, + "fails": 12001, + "rate": 0 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "http_req_duration": { + "values": { + "avg": 5.972092606449447, + "min": 2.01375, + "med": 3.34975, + "p(90)": 6.990041, + "p(95)": 11.636583, + "p(99)": 67.326625, + "max": 311.567208 + }, + "type": "trend", + "contains": "time" + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0.140666, + "p(99)": 0.381792, + "max": 108.914167, + "avg": 0.0852581633197228, + "min": 0.025334, + "med": 0.055042, + "p(90)": 0.090584 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 186820752, + "rate": 1556752.5451003842 + } + }, + "iteration_duration": { + "values": { + "min": 2.423125, + "med": 3.977333, + "p(90)": 8.176625, + "p(95)": 13.16225, + "p(99)": 73.528625, + "max": 316.095208, + "avg": 6.80467923631362 + }, + "type": "trend", + "contains": "time" + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 12001, + "rate": 100.0027410967156 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 13 + } + } + }, + "root_group": { + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "passes": 12001, + "fails": 0, + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef" + } + ], + "name": "", + "path": "" + }, + "options": { + "noColor": false, + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "" + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 120006.7105 + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-harness-smoke.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-harness-smoke.json new file mode 100644 index 0000000..4804860 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-harness-smoke.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 3, + "fails": 0 + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e" + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "testRunDurationMs": 2007.65471, + "isStdOutTTY": false, + "isStdErrTTY": false + }, + "metrics": { + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "max": 2.028667, + "avg": 0.6826803333333333, + "min": 0.009166, + "med": 0.010208, + "p(90)": 1.6249752000000002, + "p(95)": 1.8268210999999999, + "p(99)": 1.9882978199999999 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "fails": 0, + "rate": 1, + "passes": 3 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 3, + "rate": 1.4942808566917365 + } + }, + "data_received": { + "values": { + "rate": 499.08980613503996, + "count": 1002 + }, + "type": "counter", + "contains": "data" + }, + "http_req_failed": { + "thresholds": { + "rate<0.05": { + "ok": true + } + }, + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 3 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "min": 5.793917, + "med": 9.947417, + "p(90)": 69.4068162, + "p(95)": 76.8392411, + "p(99)": 82.78518102, + "max": 84.271666, + "avg": 33.33766666666667 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 82.78518102, + "max": 84.271666, + "avg": 33.33766666666667, + "min": 5.793917, + "med": 9.947417, + "p(90)": 69.4068162, + "p(95)": 76.8392411 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 9340, + "rate": 4652.194400500272 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.12473352000000001, + "max": 0.125541, + "avg": 0.096264, + "min": 0.078084, + "med": 0.085167, + "p(90)": 0.11746620000000002, + "p(95)": 0.1215036 + } + }, + "iteration_duration": { + "values": { + "p(90)": 72.00262500000001, + "p(95)": 79.6245, + "p(99)": 85.722, + "max": 87.246375, + "avg": 34.9765, + "min": 6.6555, + "med": 11.027625 + }, + "type": "trend", + "contains": "time" + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 3, + "rate": 1.4942808566917365 + } + }, + "http_req_receiving": { + "values": { + "p(90)": 0.17294980000000001, + "p(95)": 0.1770789, + "p(99)": 0.18038218, + "max": 0.181208, + "avg": 0.14818066666666665, + "min": 0.123417, + "med": 0.139917 + }, + "type": "trend", + "contains": "time" + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.35398633333333335, + "min": 0, + "med": 0, + "p(90)": 0.8495672000000001, + "p(95)": 0.9557631, + "p(99)": 1.04071982, + "max": 1.061959 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 33.093222000000004, + "min": 5.585333, + "med": 9.729416, + "p(90)": 69.1178168, + "p(95)": 76.5413669, + "p(99)": 82.48020698, + "max": 83.964917 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-harness-smoke2.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-harness-smoke2.json new file mode 100644 index 0000000..1ceb7e6 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-harness-smoke2.json @@ -0,0 +1,227 @@ +{ + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 2000.558709 + }, + "metrics": { + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 2, + "fails": 0 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "max": 0, + "value": 0, + "min": 0 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "fails": 2, + "rate": 0, + "passes": 0 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "http_req_duration": { + "contains": "time", + "values": { + "min": 6.122458, + "med": 6.2684584999999995, + "p(90)": 6.3852589, + "p(95)": 6.39985895, + "p(99)": 6.4115389899999995, + "max": 6.414459, + "avg": 6.2684584999999995 + }, + "type": "trend" + }, + "http_req_blocked": { + "values": { + "avg": 0.7033750000000001, + "min": 0.009292, + "med": 0.703375, + "p(90)": 1.2586414000000001, + "p(95)": 1.3280497, + "p(99)": 1.38357634, + "max": 1.397458 + }, + "type": "trend", + "contains": "time" + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "med": 8.070083499999999, + "p(90)": 8.8718167, + "p(95)": 8.97203335, + "p(99)": 9.05220667, + "max": 9.07225, + "avg": 8.070083499999999, + "min": 7.067917 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 6.073125, + "min": 5.9455, + "med": 6.073125, + "p(90)": 6.175225, + "p(95)": 6.1879875, + "p(99)": 6.1981975, + "max": 6.20075 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.11591699999999999, + "p(90)": 0.124117, + "p(95)": 0.125142, + "p(99)": 0.125962, + "max": 0.126167, + "avg": 0.11591699999999999, + "min": 0.105667 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 6231, + "rate": 3114.629914117657 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 2, + "rate": 0.9997207235171423 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 1, + "min": 1, + "max": 1 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.0794165, + "min": 0.050791, + "med": 0.0794165, + "p(90)": 0.1023169, + "p(95)": 0.10517945, + "p(99)": 0.10746949, + "max": 0.108042 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 668, + "rate": 333.9067216547255 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 2, + "rate": 0.9997207235171423 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "med": 6.2684584999999995, + "p(90)": 6.3852589, + "p(95)": 6.39985895, + "p(99)": 6.4115389899999995, + "max": 6.414459, + "avg": 6.2684584999999995, + "min": 6.122458 + } + }, + "http_req_tls_handshaking": { + "contains": "time", + "values": { + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0 + }, + "type": "trend" + }, + "http_req_connecting": { + "values": { + "min": 0, + "med": 0.4515205, + "p(90)": 0.8127369, + "p(95)": 0.85788895, + "p(99)": 0.8940105899999999, + "max": 0.903041, + "avg": 0.4515205 + }, + "type": "trend", + "contains": "time" + } + }, + "root_group": { + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "fails": 0, + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 2 + } + ], + "name": "" + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-harness-smoke3.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-harness-smoke3.json new file mode 100644 index 0000000..0c19272 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-harness-smoke3.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 2, + "fails": 0 + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 2003.098543 + }, + "metrics": { + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 2, + "rate": 0.998453125029306 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 2, + "fails": 0 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0.96927645, + "p(99)": 1.01008809, + "max": 1.020291, + "avg": 0.5101455, + "min": 0, + "med": 0.5101455, + "p(90)": 0.9182619000000001 + } + }, + "data_received": { + "values": { + "count": 666, + "rate": 332.4848906347589 + }, + "type": "counter", + "contains": "data" + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 2 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "vus_max": { + "contains": "default", + "values": { + "max": 1, + "value": 1, + "min": 1 + }, + "type": "gauge" + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 2.8106354000000002, + "p(99)": 2.92822708, + "max": 2.957625, + "avg": 1.487729, + "min": 0.017833, + "med": 1.487729, + "p(90)": 2.6636458000000003 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "max": 9.337375, + "avg": 7.7653125, + "min": 6.19325, + "med": 7.7653125, + "p(90)": 9.0229625, + "p(95)": 9.18016875, + "p(99)": 9.30593375 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 0.1355941, + "p(99)": 0.13875242, + "max": 0.139542, + "avg": 0.100063, + "min": 0.060584, + "med": 0.100063, + "p(90)": 0.1316462 + } + }, + "http_req_receiving": { + "contains": "time", + "values": { + "avg": 0.108708, + "min": 0.103041, + "med": 0.108708, + "p(90)": 0.1132416, + "p(95)": 0.1138083, + "p(99)": 0.11426166, + "max": 0.114375 + }, + "type": "trend" + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "http_req_duration": { + "contains": "time", + "values": { + "max": 9.337375, + "avg": 7.7653125, + "min": 6.19325, + "med": 7.7653125, + "p(90)": 9.0229625, + "p(95)": 9.18016875, + "p(99)": 9.30593375 + }, + "type": "trend" + }, + "http_reqs": { + "values": { + "count": 2, + "rate": 0.998453125029306 + }, + "type": "counter", + "contains": "default" + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 6231, + "rate": 3110.680711028803 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "min": 10.027458, + "med": 10.080521000000001, + "p(90)": 10.1229714, + "p(95)": 10.1282777, + "p(99)": 10.13252274, + "max": 10.133584, + "avg": 10.080521000000001 + } + }, + "http_req_waiting": { + "values": { + "avg": 7.5565415, + "min": 5.950667, + "med": 7.5565415, + "p(90)": 8.8412411, + "p(95)": 9.00182855, + "p(99)": 9.130298510000001, + "max": 9.162416 + }, + "type": "trend", + "contains": "time" + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-1000rps.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-1000rps.json new file mode 100644 index 0000000..48f1c92 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-1000rps.json @@ -0,0 +1,235 @@ +{ + "root_group": { + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 140059, + "fails": 1043 + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e" + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 300622.627834 + }, + "metrics": { + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 3953.1696978000004, + "p(95)": 4510.138712449998, + "p(99)": 15000.25416775, + "max": 60003.968542, + "avg": 2918.9001312622645, + "min": 0, + "med": 2684.517064 + } + }, + "http_req_tls_handshaking": { + "values": { + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0 + }, + "type": "trend", + "contains": "time" + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "rate": 469.3658658253587, + "count": 141102 + } + }, + "http_req_duration": { + "values": { + "med": 2684.787085, + "p(90)": 3953.4257401, + "p(95)": 4511.5618967, + "p(99)": 15000.34296126, + "max": 60004.044667, + "avg": 2919.1258463167496, + "min": 0 + }, + "type": "trend", + "contains": "time" + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "fails": 1043, + "rate": 0.9926081841504727, + "passes": 140059 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 2935.1728156463305, + "min": 23.086667, + "med": 2687.866814, + "p(90)": 3958.1979065, + "p(95)": 4526.686376699996, + "p(99)": 15002.54985992, + "max": 60026.784027 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0.007083, + "p(95)": 0.011416949999999993, + "p(99)": 0.8495347599999992, + "max": 58.261708, + "avg": 0.04477873327096605, + "min": 0, + "med": 0.002625 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0.7519153299999998, + "max": 58.236458, + "avg": 0.036854774418505785 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 141102, + "rate": 469.3658658253587 + } + }, + "http_req_duration{expected_response:true}": { + "contains": "time", + "values": { + "p(90)": 3927.5016066000003, + "p(95)": 4391.8283273, + "p(99)": 5949.410883500001, + "max": 41832.964006, + "avg": 2828.3018548125956, + "min": 20.935459, + "med": 2677.082126 + }, + "type": "trend" + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.08671321513514904, + "min": -0.936139, + "med": 0.033, + "p(90)": 0.120375, + "p(95)": 0.20949794999999974, + "p(99)": 0.78883357, + "max": 238.908709 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.13900183932190774, + "min": 0, + "med": 0.06275, + "p(90)": 0.1821125000000004, + "p(95)": 0.3453687499999992, + "p(99)": 1.358735409999998, + "max": 117.718625 + } + }, + "vus_max": { + "contains": "default", + "values": { + "value": 1400, + "min": 533, + "max": 1400 + }, + "type": "gauge" + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "min": 532, + "max": 1400, + "value": 1381 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 46760991, + "rate": 155547.14339674 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0.007391815849527293, + "passes": 1043, + "fails": 140059 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 2479269792, + "rate": 8247116.35934811 + } + }, + "dropped_iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 158898, + "rate": 528.563006533698 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-100rps.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-100rps.json new file mode 100644 index 0000000..dab0417 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-100rps.json @@ -0,0 +1,227 @@ +{ + "state": { + "isStdErrTTY": false, + "testRunDurationMs": 120001.95725, + "isStdOutTTY": false + }, + "metrics": { + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.17892282999999992, + "max": 2.901459, + "avg": 0.05441364616666656, + "min": 0.00625, + "med": 0.048167, + "p(90)": 0.07775, + "p(95)": 0.09734019999999993 + } + }, + "http_req_connecting": { + "values": { + "max": 19.189541, + "avg": 0.010293732749999996, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0 + }, + "type": "trend", + "contains": "time" + }, + "http_req_duration{expected_response:true}": { + "values": { + "p(99)": 86.58365791999998, + "max": 275.292708, + "avg": 6.013189061916644, + "min": 2.031, + "med": 3.1737705, + "p(90)": 6.360058400000001, + "p(95)": 10.950743749999999 + }, + "type": "trend", + "contains": "time" + }, + "http_reqs": { + "values": { + "count": 12000, + "rate": 99.99836898493587 + }, + "type": "counter", + "contains": "default" + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0.078875, + "p(95)": 0.09950209999999998, + "p(99)": 0.26509508999999987, + "max": 13.543542, + "avg": 0.06409193291666697, + "min": 0.025, + "med": 0.051667 + } + }, + "http_req_failed": { + "values": { + "rate": 0, + "passes": 0, + "fails": 12000 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + }, + "type": "rate", + "contains": "default" + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "max": 19.373583, + "avg": 0.014838529916666416, + "min": 0.001125, + "med": 0.003334, + "p(90)": 0.005542, + "p(95)": 0.008, + "p(99)": 0.05387999999999995 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "rate": 33385.87212918146, + "count": 4006370 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 6.360058400000001, + "p(95)": 10.950743749999999, + "p(99)": 86.58365791999998, + "max": 275.292708, + "avg": 6.013189061916644, + "min": 2.031, + "med": 3.1737705 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 210349400, + "rate": 1752883.0764133225 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "min": 2.513125, + "med": 3.7941665, + "p(90)": 7.4406625, + "p(95)": 12.20207355, + "p(99)": 87.80837258999995, + "max": 276.071375, + "avg": 6.748980011333332 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 22 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "max": 275.181333, + "avg": 5.8946834828333445, + "min": 1.96875, + "med": 3.0682915, + "p(90)": 6.1930417, + "p(95)": 10.816880949999984, + "p(99)": 86.41467532999997 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 12000, + "fails": 0 + } + }, + "iterations": { + "values": { + "count": 12000, + "rate": 99.99836898493587 + }, + "type": "counter", + "contains": "default" + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 100, + "min": 100, + "max": 100 + } + } + }, + "root_group": { + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 12000, + "fails": 0 + } + ], + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-500rps.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-500rps.json new file mode 100644 index 0000000..e9cc34b --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-500rps.json @@ -0,0 +1,235 @@ +{ + "metrics": { + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 58882, + "rate": 490.36287472599633 + } + }, + "http_req_tls_handshaking": { + "values": { + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0 + }, + "type": "trend", + "contains": "time" + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "min": 0.000875, + "med": 0.002333, + "p(90)": 0.00525, + "p(95)": 0.007791, + "p(99)": 0.5615395199999998, + "max": 75.217833, + "avg": 0.030981894348019016 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "avg": 127.61581967918873, + "min": 1.679084, + "med": 7.7443124999999995, + "p(90)": 332.27897480000007, + "p(95)": 716.9442645999999, + "p(99)": 1491.4699039999998, + "max": 5748.733878 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.02527826272884754, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0.46666147999999935, + "max": 74.93925 + } + }, + "http_req_receiving": { + "contains": "time", + "values": { + "min": 0.004958, + "med": 0.023334, + "p(90)": 0.051917, + "p(95)": 0.0717479499999999, + "p(99)": 0.19128951999999988, + "max": 8.831708, + "avg": 0.03527665441051562 + }, + "type": "trend" + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 58882, + "rate": 490.36287472599633 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 127.48641867737203, + "min": 1.639583, + "med": 7.647603999999999, + "p(90)": 332.16342470000006, + "p(95)": 716.7073443999997, + "p(99)": 1490.80971031, + "max": 5748.464252 + } + }, + "checks": { + "contains": "default", + "values": { + "rate": 1, + "passes": 58882, + "fails": 0 + }, + "type": "rate" + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 71, + "min": 1, + "max": 600 + } + }, + "http_req_failed": { + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 58882 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + }, + "type": "rate" + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 128.90433086070428, + "min": 2.065625, + "med": 8.445958000000001, + "p(90)": 333.4823251, + "p(95)": 721.3219604499998, + "p(99)": 1508.1773222699999, + "max": 5758.543461 + } + }, + "dropped_iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 1119, + "rate": 9.318909969403041 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 19657744, + "rate": 163707.5482909498 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 1491.4699039999998, + "max": 5748.733878, + "avg": 127.61581967918873, + "min": 1.679084, + "med": 7.7443124999999995, + "p(90)": 332.27897480000007, + "p(95)": 716.9442645999999 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.6997658299999995, + "max": 264.931084, + "avg": 0.09412434740667762, + "min": 0.016167, + "med": 0.038334, + "p(90)": 0.084083, + "p(95)": 0.15208089999999988 + } + }, + "data_sent": { + "values": { + "count": 1032148859, + "rate": 8595623.138555026 + }, + "type": "counter", + "contains": "data" + }, + "vus_max": { + "contains": "default", + "values": { + "value": 600, + "min": 200, + "max": 600 + }, + "type": "gauge" + } + }, + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 58882, + "fails": 0, + "name": "accepted or rate limited" + } + ] + }, + "options": { + "noColor": false, + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "" + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 120078.421583 + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-post-docs-benchmark.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-post-docs-benchmark.json new file mode 100644 index 0000000..5d98642 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-post-docs-benchmark.json @@ -0,0 +1,227 @@ +{ + "metrics": { + "iterations": { + "values": { + "count": 26, + "rate": 5.086583885680652 + }, + "type": "counter", + "contains": "default" + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "avg": 6.122129846153845, + "min": 3.572125, + "med": 6.1576664999999995, + "p(90)": 7.563479500000001, + "p(95)": 7.997791250000001, + "p(99)": 10.842103999999999, + "max": 11.758 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.0870625, + "p(90)": 0.16254200000000002, + "p(95)": 0.2227497499999999, + "p(99)": 0.427677, + "max": 0.489875, + "avg": 0.11679169230769232, + "min": 0.062708 + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 6.122129846153845, + "min": 3.572125, + "med": 6.1576664999999995, + "p(90)": 7.563479500000001, + "p(95)": 7.997791250000001, + "p(99)": 10.842103999999999, + "max": 11.758 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 1.6028750000000003, + "p(95)": 2.0645525, + "p(99)": 2.16817675, + "max": 2.182666, + "avg": 0.33748880769230766, + "min": 0, + "med": 0 + } + }, + "iteration_duration": { + "values": { + "p(95)": 113.026542, + "p(99)": 113.09201, + "max": 113.104791, + "avg": 108.90760584615383, + "min": 104.775709, + "med": 108.4995835, + "p(90)": 112.8746045 + }, + "type": "trend", + "contains": "time" + }, + "http_req_waiting": { + "values": { + "p(90)": 7.3032710000000005, + "p(95)": 7.84676075, + "p(99)": 10.627166749999999, + "max": 11.523125, + "avg": 5.907679538461537, + "min": 3.482334, + "med": 5.9527505000000005 + }, + "type": "trend", + "contains": "time" + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 26 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "http_req_sending": { + "contains": "time", + "values": { + "p(95)": 0.266459, + "p(99)": 0.40352099999999996, + "max": 0.44875, + "avg": 0.09765861538461536, + "min": 0.027083, + "med": 0.0556455, + "p(90)": 0.25485450000000004 + }, + "type": "trend" + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 51135, + "rate": 10003.941038241544 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 5, + "min": 5, + "max": 5 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "max": 2.395458, + "avg": 0.3933060769230771, + "min": 0.003583, + "med": 0.0104375, + "p(90)": 1.9455835000000001, + "p(95)": 2.20234375, + "p(99)": 2.3569685 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 0 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 26, + "rate": 5.086583885680652 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "passes": 26, + "fails": 0, + "rate": 1 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 8658, + "rate": 1693.832433931657 + } + } + }, + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 26, + "fails": 0 + } + ] + }, + "options": { + "summaryTimeUnit": "", + "noColor": false, + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ] + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 5111.485544 + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-post-docs-test-load.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-post-docs-test-load.json new file mode 100644 index 0000000..8ec8144 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-post-docs-test-load.json @@ -0,0 +1,227 @@ +{ + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 5011.866961 + }, + "metrics": { + "http_req_connecting": { + "contains": "time", + "values": { + "p(99)": 35.32613574999999, + "max": 44.834292, + "avg": 2.202153846153846, + "min": 0, + "med": 0, + "p(90)": 2.1553125, + "p(95)": 5.653656499999995 + }, + "type": "trend" + }, + "http_req_failed": { + "thresholds": { + "rate<0.05": { + "ok": true + } + }, + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 26 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.20220038461538456, + "min": 0.059042, + "med": 0.116146, + "p(90)": 0.3056460000000001, + "p(95)": 0.5319167499999997, + "p(99)": 1.3687192499999998, + "max": 1.631084 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 26, + "rate": 5.187687582755053 + } + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "p(99)": 0.80897875, + "max": 0.817458, + "avg": 0.12453038461538465, + "min": 0.012958, + "med": 0.051416500000000004, + "p(90)": 0.171292, + "p(95)": 0.6311974999999994 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 26, + "rate": 5.187687582755053 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "passes": 26, + "fails": 0, + "rate": 1 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "min": 0, + "max": 0, + "value": 0 + } + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 5, + "min": 5, + "max": 5 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "min": 6.723917, + "med": 11.547604, + "p(90)": 41.111042000000005, + "p(95)": 76.30976074999995, + "p(99)": 182.22751024999997, + "max": 214.147083, + "avg": 24.905038576923076 + } + }, + "data_received": { + "contains": "data", + "values": { + "rate": 1727.4999650574325, + "count": 8658 + }, + "type": "counter" + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "avg": 25.231769346153843, + "min": 6.89175, + "med": 11.721584, + "p(90)": 41.46833350000001, + "p(95)": 77.27474999999995, + "p(99)": 184.35549999999998, + "max": 216.595625 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 5.850677249999995, + "p(99)": 40.80979174999999, + "max": 52.0695, + "avg": 2.511048115384616, + "min": 0.003459, + "med": 0.008854, + "p(90)": 2.269854 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 51135, + "rate": 10202.784790160755 + } + }, + "iteration_duration": { + "values": { + "max": 296.155875, + "avg": 29.766737076923075, + "min": 7.472, + "med": 13.8296455, + "p(90)": 45.26102050000001, + "p(95)": 84.30818749999995, + "p(99)": 246.14434374999993 + }, + "type": "trend", + "contains": "time" + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(95)": 77.27474999999995, + "p(99)": 184.35549999999998, + "max": 216.595625, + "avg": 25.231769346153843, + "min": 6.89175, + "med": 11.721584, + "p(90)": 41.46833350000001 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "max": 0, + "avg": 0, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 0 + } + } + }, + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 26, + "fails": 0 + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16-smoke2.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-smoke2.json new file mode 100644 index 0000000..780db87 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16-smoke2.json @@ -0,0 +1,227 @@ +{ + "root_group": { + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 101, + "fails": 0 + } + ], + "name": "" + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "p(90)", + "p(95)", + "p(99)", + "max" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "testRunDurationMs": 10002.842379, + "isStdOutTTY": false, + "isStdErrTTY": false + }, + "metrics": { + "http_req_connecting": { + "contains": "time", + "values": { + "avg": 0.05252227722772276, + "min": 0, + "med": 0, + "p(90)": 0, + "p(95)": 0, + "p(99)": 1.006084, + "max": 1.495958 + }, + "type": "trend" + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "value": 5, + "min": 5, + "max": 5 + } + }, + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 2.409200871287129, + "min": 1.45975, + "med": 2.061916, + "p(90)": 2.598209, + "p(95)": 3.524167, + "p(99)": 8.150458, + "max": 20.483 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.057437712871287114, + "min": 0.02775, + "med": 0.051, + "p(90)": 0.074375, + "p(95)": 0.084042, + "p(99)": 0.178458, + "max": 0.29875 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "fails": 0, + "rate": 1, + "passes": 101 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 101 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "http_req_duration": { + "contains": "time", + "values": { + "avg": 2.495215742574257, + "min": 1.517625, + "med": 2.154041, + "p(90)": 2.672459, + "p(95)": 3.610917, + "p(99)": 8.429875, + "max": 20.827042 + }, + "type": "trend" + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 0, + "p(95)": 0, + "p(99)": 0, + "max": 0, + "avg": 0, + "min": 0, + "med": 0 + } + }, + "http_reqs": { + "contains": "default", + "values": { + "count": 101, + "rate": 10.097130012969085 + }, + "type": "counter" + }, + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.028577158415841592, + "min": 0.014917, + "med": 0.022333, + "p(90)": 0.040333, + "p(95)": 0.071833, + "p(99)": 0.086708, + "max": 0.122042 + } + }, + "data_sent": { + "contains": "data", + "values": { + "count": 48910, + "rate": 4889.6101874684955 + }, + "type": "counter" + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "med": 2.369667, + "p(90)": 3.087666, + "p(95)": 4.102459, + "p(99)": 12.852583, + "max": 22.613, + "avg": 2.81800202970297, + "min": 1.681625 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 2.672459, + "p(95)": 3.610917, + "p(99)": 8.429875, + "max": 20.827042, + "avg": 2.495215742574257, + "min": 1.517625, + "med": 2.154041 + } + }, + "http_req_blocked": { + "contains": "time", + "values": { + "p(90)": 0.0075, + "p(95)": 0.015, + "p(99)": 1.077708, + "max": 3.563625, + "avg": 0.07919023762376234, + "min": 0.002709, + "med": 0.003542 + }, + "type": "trend" + }, + "data_received": { + "values": { + "count": 33633, + "rate": 3362.3442943187056 + }, + "type": "counter", + "contains": "data" + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "max": 0, + "value": 0, + "min": 0 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 101, + "rate": 10.097130012969085 + } + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16T18-43-11-706Z.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16T18-43-11-706Z.json new file mode 100644 index 0000000..4d71293 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16T18-43-11-706Z.json @@ -0,0 +1,217 @@ +{ + "options": { + "noColor": false, + "summaryTrendStats": [ + "avg", + "min", + "med", + "max", + "p(90)", + "p(95)" + ], + "summaryTimeUnit": "" + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 120001.653625 + }, + "metrics": { + "http_req_waiting": { + "type": "trend", + "contains": "time", + "values": { + "avg": 1.4541744638780107, + "min": 0.280944, + "med": 1.140833, + "max": 89.058167, + "p(90)": 1.89225, + "p(95)": 2.625583 + } + }, + "vus_max": { + "contains": "default", + "values": { + "value": 50, + "min": 50, + "max": 50 + }, + "type": "gauge" + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 12001, + "fails": 0 + } + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "p(90)": 6.287437500000001, + "p(95)": 8.404708249999997, + "avg": 4.516907655405404, + "min": 2.435042, + "med": 3.486771, + "max": 32.294416 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 179020207, + "rate": 1491814.5008187173 + } + }, + "http_req_sending": { + "contains": "time", + "values": { + "avg": 0.08111168652612263, + "min": 0.023709, + "med": 0.063167, + "max": 6.799375, + "p(90)": 0.125333, + "p(95)": 0.168833 + }, + "type": "trend" + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "med": 1.979458, + "max": 93.942583, + "p(90)": 3.381583, + "p(95)": 4.240625, + "avg": 2.412555220148325, + "min": 0.879875 + } + }, + "vus": { + "contains": "default", + "values": { + "value": 0, + "min": 0, + "max": 1 + }, + "type": "gauge" + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "rate": 100.00695521665567, + "count": 12001 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 12001, + "rate": 100.00695521665567 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "min": 0, + "med": 0, + "max": 0, + "p(90)": 0, + "p(95)": 0, + "avg": 0 + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "rate": 41651.45103432736, + "count": 4998243 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0.9753353887176068, + "passes": 11705, + "fails": 296 + }, + "thresholds": { + "rate<0.05": { + "ok": false + } + } + }, + "http_req_duration": { + "type": "trend", + "contains": "time", + "values": { + "max": 89.88225, + "p(90)": 2.087625, + "p(95)": 2.799375, + "avg": 1.577413750520787, + "min": 0.412153, + "med": 1.252 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "med": 0, + "max": 2.023125, + "p(90)": 0, + "p(95)": 0, + "avg": 0.003944737271894009, + "min": 0 + } + }, + "http_req_receiving": { + "type": "trend", + "contains": "time", + "values": { + "med": 0.034625, + "max": 4.54525, + "p(90)": 0.062833, + "p(95)": 0.079375, + "avg": 0.0421276001166572, + "min": 0.01 + } + }, + "http_req_blocked": { + "values": { + "avg": 0.008443449295891817, + "min": 0.001, + "med": 0.003166, + "max": 2.065541, + "p(90)": 0.006834, + "p(95)": 0.009167 + }, + "type": "trend", + "contains": "time" + } + }, + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 12001, + "fails": 0 + } + ] + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/ingest-throughput-2026-06-16T18-45-48-067Z.json b/docs/benchmarks/evidence/ingest-throughput-2026-06-16T18-45-48-067Z.json new file mode 100644 index 0000000..3dce454 --- /dev/null +++ b/docs/benchmarks/evidence/ingest-throughput-2026-06-16T18-45-48-067Z.json @@ -0,0 +1,225 @@ +{ + "root_group": { + "name": "", + "path": "", + "id": "d41d8cd98f00b204e9800998ecf8427e", + "groups": [], + "checks": [ + { + "name": "accepted or rate limited", + "path": "::accepted or rate limited", + "id": "f582c15494cc8beb1c5d5e90e17b3cef", + "passes": 11909, + "fails": 0 + } + ] + }, + "options": { + "summaryTrendStats": [ + "avg", + "min", + "med", + "max", + "p(90)", + "p(95)" + ], + "summaryTimeUnit": "", + "noColor": false + }, + "state": { + "isStdOutTTY": false, + "isStdErrTTY": false, + "testRunDurationMs": 120004.909292 + }, + "metrics": { + "http_req_sending": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.08070749533965924, + "min": 0.023458, + "med": 0.047792, + "max": 82.206292, + "p(90)": 0.06775, + "p(95)": 0.08498359999999998 + } + }, + "http_req_blocked": { + "type": "trend", + "contains": "time", + "values": { + "avg": 0.012867810311528945, + "min": 0.001, + "med": 0.003125, + "max": 19.82, + "p(90)": 0.004542, + "p(95)": 0.005792 + } + }, + "http_req_failed": { + "type": "rate", + "contains": "default", + "values": { + "rate": 0, + "passes": 0, + "fails": 11909 + }, + "thresholds": { + "rate<0.05": { + "ok": true + } + } + }, + "data_received": { + "type": "counter", + "contains": "data", + "values": { + "count": 3975892, + "rate": 33131.077915535316 + } + }, + "data_sent": { + "type": "counter", + "contains": "data", + "values": { + "count": 177647431, + "rate": 1480334.696705968 + } + }, + "http_reqs": { + "type": "counter", + "contains": "default", + "values": { + "count": 11909, + "rate": 99.23760678009113 + } + }, + "http_req_connecting": { + "type": "trend", + "contains": "time", + "values": { + "med": 0, + "max": 19.750166, + "p(90)": 0, + "p(95)": 0, + "avg": 0.008609493240406415, + "min": 0 + } + }, + "http_req_duration": { + "contains": "time", + "values": { + "avg": 8.640312583172394, + "min": 1.81525, + "med": 2.940084, + "max": 1204.423625, + "p(90)": 4.430333200000001, + "p(95)": 7.643032799999988 + }, + "type": "trend" + }, + "http_req_duration{expected_response:true}": { + "type": "trend", + "contains": "time", + "values": { + "max": 1204.423625, + "p(90)": 4.430333200000001, + "p(95)": 7.643032799999988, + "avg": 8.640312583172394, + "min": 1.81525, + "med": 2.940084 + } + }, + "http_req_tls_handshaking": { + "type": "trend", + "contains": "time", + "values": { + "max": 0, + "p(90)": 0, + "p(95)": 0, + "avg": 0, + "min": 0, + "med": 0 + } + }, + "vus": { + "type": "gauge", + "contains": "default", + "values": { + "max": 50, + "value": 0, + "min": 0 + } + }, + "checks": { + "type": "rate", + "contains": "default", + "values": { + "rate": 1, + "passes": 11909, + "fails": 0 + } + }, + "iterations": { + "type": "counter", + "contains": "default", + "values": { + "count": 11909, + "rate": 99.23760678009113 + } + }, + "iteration_duration": { + "type": "trend", + "contains": "time", + "values": { + "med": 3.485458, + "max": 1400.422959, + "p(90)": 5.258917, + "p(95)": 8.73231619999998, + "avg": 11.063941509110787, + "min": 2.223958 + } + }, + "http_req_receiving": { + "values": { + "p(90)": 0.06867520000000002, + "p(95)": 0.08435859999999998, + "avg": 0.04846311386346516, + "min": 0.00425, + "med": 0.043542, + "max": 2.739916 + }, + "type": "trend", + "contains": "time" + }, + "vus_max": { + "type": "gauge", + "contains": "default", + "values": { + "min": 50, + "max": 60, + "value": 60 + } + }, + "dropped_iterations": { + "contains": "default", + "values": { + "count": 92, + "rate": 0.7666353030286661 + }, + "type": "counter" + }, + "http_req_waiting": { + "contains": "time", + "values": { + "p(90)": 4.288283600000001, + "p(95)": 7.426033399999994, + "avg": 8.511141973969238, + "min": 1.7675, + "med": 2.842708, + "max": 1198.954334 + }, + "type": "trend" + } + } +} \ No newline at end of file diff --git a/docs/benchmarks/evidence/kafka-lag-2026-06-16-smoke2.txt b/docs/benchmarks/evidence/kafka-lag-2026-06-16-smoke2.txt new file mode 100644 index 0000000..c7597a1 --- /dev/null +++ b/docs/benchmarks/evidence/kafka-lag-2026-06-16-smoke2.txt @@ -0,0 +1,9 @@ +Command: +docker exec pulseops-kafka /opt/kafka/bin/kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group pulseops-aggregators + +Captured after the reduced 2026-06-16-smoke2 benchmark run. + +GROUP TOPIC PARTITION CURRENT-OFFSET LOG-END-OFFSET LAG CONSUMER-ID HOST CLIENT-ID +pulseops-aggregators events-raw 0 170949 203273 32324 pulseops-worker-b6cce9c6-fd6f-47e2-afcd-fde86002447c /172.23.0.7 pulseops-worker +pulseops-aggregators events-raw 1 171238 204119 32881 pulseops-worker-b6cce9c6-fd6f-47e2-afcd-fde86002447c /172.23.0.7 pulseops-worker +pulseops-aggregators events-raw 2 170921 203231 32310 pulseops-worker-b6cce9c6-fd6f-47e2-afcd-fde86002447c /172.23.0.7 pulseops-worker diff --git a/docs/benchmarks/evidence/latest-run-id.txt b/docs/benchmarks/evidence/latest-run-id.txt new file mode 100644 index 0000000..78b90fa --- /dev/null +++ b/docs/benchmarks/evidence/latest-run-id.txt @@ -0,0 +1 @@ +2026-06-16-clean-full-benchmark diff --git a/docs/benchmarks/evidence/pre-reset-state-2026-06-16-final-validation.json b/docs/benchmarks/evidence/pre-reset-state-2026-06-16-final-validation.json new file mode 100644 index 0000000..d1d0a1a --- /dev/null +++ b/docs/benchmarks/evidence/pre-reset-state-2026-06-16-final-validation.json @@ -0,0 +1,217 @@ +{ + "captured_at_utc": "2026-06-16T20:43:59.846Z", + "purpose": "Pre-reset snapshot before docker compose down -v. Repo files remain on disk; Docker volumes will be removed after this snapshot.", + "git": { + "commit": "112ba0414fa55bb431e814441e85ebd925107347", + "status_short": "M .env.example\n M README.md\n M docker-compose.yml\n M migrations/006_performance_optimizations.sql\n M package.json\n M pnpm-lock.yaml\n M scripts/generate-api-key-hash.js\n M scripts/health-check.sh\n M scripts/init-db.sql\n M scripts/seed-demo-data.sql\n M scripts/test-hash.js\n M services/graphql-api/package.json\n M services/graphql-api/src/index.ts\n M services/graphql-api/src/resolvers.ts\n M services/graphql-api/tsconfig.json\n M services/ingest-api/package.json\n M services/ingest-api/src/index.ts\n M services/ingest-api/src/metrics.ts\n M services/ingest-api/src/middleware/auth.ts\n M services/ingest-api/tests/integration/ingest.test.ts\n M services/ingest-api/tsconfig.json\n M services/worker/Dockerfile\n M services/worker/package.json\n M services/worker/src/aggregators/daily.ts\n M services/worker/src/index.ts\n M services/worker/tests/integration/aggregates.test.ts\n M services/worker/tsconfig.json\n M web/src/lib/graphql.ts\n?? docs/\n?? migrations/007_article_evidence_schema.sql\n?? scripts/bench/\n?? scripts/benchmark-report.js\n?? scripts/capture-query-plans.sh\n?? scripts/generate-skewed-events.ts\n?? scripts/measure-dashboard-cache.ts\n?? scripts/measure-hot-tenant-db.ts\n?? scripts/measure-worker-catchup.ts\n?? scripts/prove-worker-retry-offsets.ts\n?? scripts/run-benchmark.js\n?? scripts/run-k6.js\n?? scripts/seed-benchmark-tenants.ts\n?? scripts/verify-fresh-migration.sh\n?? services/graphql-api/src/auth.ts\n?? services/graphql-api/src/metrics.ts\n?? services/graphql-api/tests/\n?? services/ingest-api/scripts/\n?? services/ingest-api/src/events.ts\n?? services/ingest-api/tests/unit/auth.test.ts\n?? services/ingest-api/tests/unit/events.test.ts\n?? services/worker/src/metrics.ts\n?? services/worker/src/processing.ts\n?? services/worker/tests/unit/\n?? tests/integration/\n?? tests/load/\n?? web/src/vite-env.d.ts" + }, + "services": { + "compose_ps": "NAME IMAGE COMMAND SERVICE CREATED STATUS PORTS\npulseops-graphql-api pulseops-graphql-api \"docker-entrypoint.s…\" graphql-api About an hour ago Up About an hour (healthy) 0.0.0.0:3002->3002/tcp, [::]:3002->3002/tcp\npulseops-ingest-api pulseops-ingest-api \"docker-entrypoint.s…\" ingest-api About an hour ago Up About an hour (healthy) 0.0.0.0:3001->3001/tcp, [::]:3001->3001/tcp\npulseops-kafka apache/kafka:latest \"/__cacert_entrypoin…\" kafka 2 hours ago Up 2 hours (healthy) 0.0.0.0:9092->9092/tcp, [::]:9092->9092/tcp\npulseops-postgres postgres:16-alpine \"docker-entrypoint.s…\" postgres 2 hours ago Up 2 hours (healthy) 0.0.0.0:5432->5432/tcp, [::]:5432->5432/tcp\npulseops-redis redis:7-alpine \"docker-entrypoint.s…\" redis 2 hours ago Up 2 hours (healthy) 0.0.0.0:6379->6379/tcp, [::]:6379->6379/tcp\npulseops-worker pulseops-worker \"docker-entrypoint.s…\" worker About an hour ago Up 47 minutes (healthy) 0.0.0.0:3003->3003/tcp, [::]:3003->3003/tcp", + "health": "> pulseops@1.0.0 health /Users/dev/Documents/github_improver/repos/pulseops\n> ./scripts/health-check.sh\n\nStarting PulseOps health check...\nPostgreSQL: ✓ Connected\nRedis: ✓ Connected\nKafka: ✓ Running\nIngest API: ✓ Healthy (http://localhost:3001)\nGraphQL API: ✓ Healthy (http://localhost:3002)\nWorker: ✓ Running\n\nHealth check complete!", + "docker_stats": "CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS\n877172b9f832 pulseops-worker 42.82% 161.9MiB / 7.653GiB 2.07% 1.26GB / 2.44GB 1.37MB / 0B 23\n7c17621fe4dc pulseops-postgres 50.58% 1.245GiB / 7.653GiB 16.27% 3.9GB / 1.19GB 2.55GB / 64.5GB 7\n6cad8cc35f62 pulseops-kafka 9.46% 1.035GiB / 7.653GiB 13.52% 8.33GB / 1.13GB 411MB / 8.32GB 123\n19815e532e02 pulseops-ingest-api 0.79% 107.4MiB / 7.653GiB 1.37% 4.12GB / 7.86GB 13.6MB / 0B 23\ne034973aa1bb pulseops-graphql-api 0.17% 132MiB / 7.653GiB 1.68% 145MB / 124MB 8.43MB / 0B 24\nb84b6535139b pulseops-redis 2.34% 8.219MiB / 7.653GiB 0.10% 413MB / 199MB 12.3MB / 2.3MB 6" + }, + "kafka": { + "lag": { + "raw": "GROUP TOPIC PARTITION CURRENT-OFFSET LOG-END-OFFSET LAG CONSUMER-ID HOST CLIENT-ID\npulseops-aggregators events-raw 0 457707 3947241 3489534 pulseops-worker-fd3c0057-b4a2-4014-b29f-7f6a8d0085c5 /172.23.0.7 pulseops-worker\npulseops-aggregators events-raw 1 458730 3939757 3481027 pulseops-worker-fd3c0057-b4a2-4014-b29f-7f6a8d0085c5 /172.23.0.7 pulseops-worker\npulseops-aggregators events-raw 2 458536 3961071 3502535 pulseops-worker-fd3c0057-b4a2-4014-b29f-7f6a8d0085c5 /172.23.0.7 pulseops-worker", + "partitions": [ + { + "group": "pulseops-aggregators", + "topic": "events-raw", + "partition": 0, + "current_offset": 457707, + "log_end_offset": 3947241, + "lag": 3489534 + }, + { + "group": "pulseops-aggregators", + "topic": "events-raw", + "partition": 1, + "current_offset": 458730, + "log_end_offset": 3939757, + "lag": 3481027 + }, + { + "group": "pulseops-aggregators", + "topic": "events-raw", + "partition": 2, + "current_offset": 458536, + "log_end_offset": 3961071, + "lag": 3502535 + } + ], + "total_lag": 10473096 + } + }, + "postgres": { + "now": "2026-06-16 20:44:05.061099+00", + "total_events": "1375306", + "daily_aggregates": "1214", + "daily_metrics": "1214", + "event_partitions": "7", + "run_id_counts": "2026-06-16-ingest-100rps|600000\n2026-06-16-ingest-500rps|150998\n2026-06-16T18-43-48-051Z|106050\n2026-06-16T18-43-48-047Z|94400\n2026-06-16T18-43-48-050Z|82650\n2026-06-16T18-43-48-052Z|82600\n2026-06-16T18-43-48-048Z|70850\n2026-06-16T18-43-48-053Z|70800\n2026-06-16T18-43-48-056Z|35300\n2026-06-16T18-43-48-054Z|23550\n2026-06-16T18-43-48-055Z|11750\n2026-06-16T18-43-48-057Z|11750\n2026-06-16-final-benchmark-smoke|7449\n2026-06-16-final-test-load|4010\n2026-06-16T18-41-11-690Z|3900\n2026-06-16T18-41-11-687Z|2700\n2026-06-16T18-41-11-692Z|2400\n2026-06-16T18-41-11-688Z|1500\n2026-06-16T18-41-11-693Z|1450\n2026-06-16-worker-catchup-smoke|1000\n2026-06-16T18-41-11-691Z|900\n2026-06-16T18-41-11-695Z|750\n2026-06-16T18-41-11-689Z|600\n2026-06-16T18-45-41-209Z|600\n2026-06-16T18-45-41-171Z|600\n2026-06-16T18-45-41-119Z|600\n2026-06-16T18-45-41-191Z|600\n2026-06-16T18-45-41-229Z|600\n2026-06-16T18-45-41-261Z|550\n2026-06-16T18-45-41-287Z|550\n2026-06-16T18-45-41-310Z|550\n2026-06-16T18-45-41-438Z|550\n2026-06-16T18-45-41-458Z|550\n2026-06-16-smoke2|350\n2026-06-16-post-docs-benchmark|345\n2026-06-16-real-tenants-smoke|325\n2026-06-16-hot-db-smoke|324\n2026-06-16T18-41-11-694Z|300\n2026-06-16T18-41-11-683Z|300\n2026-06-16-post-docs-test-load|130\n2026-06-16-harness-smoke3|63\n2026-06-16-harness-smoke|53\n2026-06-16-harness-smoke2|52\n2026-06-16T19-16-43-023Z|10\n2026-06-16-worker-metrics-smoke|5" + }, + "validation_results": { + "passed_commands": [ + "pnpm test", + "pnpm test:integration", + "pnpm test:load (RUN_ID=2026-06-16-final-validation-test-load)", + "pnpm db:verify:fresh", + "pnpm typecheck", + "pnpm lint (warnings only)", + "pnpm health", + "pnpm benchmark:dashboard (RUN_ID=2026-06-16-final-validation-benchmark)", + "pnpm benchmark:cache (RUN_ID=2026-06-16-final-validation-benchmark)" + ], + "partial_or_failed_commands": [ + { + "command": "RUN_ID=2026-06-16-final-validation-benchmark pnpm benchmark", + "result": "partial/fail", + "reason": "benchmark:hot-db timed out waiting for Kafka lag to reach zero" + } + ] + }, + "k6_summaries": { + "final_validation_benchmark": { + "ingest": { + "file": "docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-validation-benchmark.json", + "requests": 1501, + "request_rate": 100.0285712921015, + "failed_rate": 0, + "checks_rate": 1, + "dropped_iterations": 0, + "p50_ms": 2.757584, + "p95_ms": 4.502375, + "p99_ms": 6.747833 + }, + "hot_tenant": { + "file": "docs/benchmarks/evidence/hot-tenant-2026-06-16-final-validation-benchmark.json", + "requests": 175, + "request_rate": 8.748286668614424, + "failed_rate": 0, + "checks_rate": 1, + "dropped_iterations": 0, + "p50_ms": 1.818792, + "p95_ms": 3.6478167999999993, + "p99_ms": 7.89493624 + }, + "dashboard": { + "file": "docs/benchmarks/evidence/dashboard-query-2026-06-16-final-validation-benchmark.json", + "requests": 27464, + "request_rate": 1830.6128430612644, + "failed_rate": 0, + "checks_rate": 0.9993628022138071, + "dropped_iterations": 0, + "p50_ms": 4.5048335, + "p95_ms": 6.668620549999999, + "p99_ms": 12.172302079999996 + } + }, + "final_validation_test_load": { + "ingest": { + "file": "docs/benchmarks/evidence/ingest-throughput-2026-06-16-final-validation-test-load.json", + "requests": 12001, + "request_rate": 100.0027410967156, + "failed_rate": 0, + "checks_rate": 1, + "dropped_iterations": 0, + "p50_ms": 3.34975, + "p95_ms": 11.636583, + "p99_ms": 67.326625 + } + }, + "ingest_scale": { + "ingest_100rps": { + "file": "docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-100rps.json", + "requests": 12000, + "request_rate": 99.99836898493587, + "failed_rate": 0, + "checks_rate": 1, + "dropped_iterations": 0, + "p50_ms": 3.1737705, + "p95_ms": 10.950743749999999, + "p99_ms": 86.58365791999998 + }, + "ingest_500rps": { + "file": "docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-500rps.json", + "requests": 58882, + "request_rate": 490.36287472599633, + "failed_rate": 0, + "checks_rate": 1, + "dropped_iterations": 1119, + "p50_ms": 7.7443124999999995, + "p95_ms": 716.9442645999999, + "p99_ms": 1491.4699039999998 + }, + "ingest_1000rps": { + "file": "docs/benchmarks/evidence/ingest-throughput-2026-06-16-ingest-1000rps.json", + "requests": 141102, + "request_rate": 469.3658658253587, + "failed_rate": 0.007391815849527293, + "checks_rate": 0.9926081841504727, + "dropped_iterations": 158898, + "p50_ms": 2684.787085, + "p95_ms": 4511.5618967, + "p99_ms": 15000.34296126 + } + } + }, + "benchmark_reports": [ + "backpressure-2026-06-16-final-benchmark-smoke.json", + "backpressure-2026-06-16-harness-smoke.json", + "backpressure-2026-06-16-harness-smoke2.json", + "backpressure-2026-06-16-harness-smoke3.json", + "backpressure-2026-06-16-post-docs-benchmark.json", + "backpressure-2026-06-16-smoke2.json", + "dashboard-cache-2026-06-16-cache-smoke.json", + "dashboard-cache-2026-06-16-final-benchmark-smoke.json", + "dashboard-cache-2026-06-16-final-validation-benchmark.json", + "dashboard-cache-2026-06-16-harness-smoke2.json", + "dashboard-cache-2026-06-16-harness-smoke3.json", + "dashboard-cache-2026-06-16-post-docs-benchmark.json", + "dashboard-cache-2026-06-16T19-16-42-486Z.json", + "dashboard-query-2026-06-16-final-benchmark-smoke.json", + "dashboard-query-2026-06-16-final-validation-benchmark.json", + "dashboard-query-2026-06-16-harness-smoke.json", + "dashboard-query-2026-06-16-harness-smoke2.json", + "dashboard-query-2026-06-16-harness-smoke3.json", + "dashboard-query-2026-06-16-post-docs-benchmark.json", + "dashboard-query-2026-06-16-smoke2.json", + "hot-tenant-2026-06-16-final-benchmark-smoke.json", + "hot-tenant-2026-06-16-final-validation-benchmark.json", + "hot-tenant-2026-06-16-harness-smoke.json", + "hot-tenant-2026-06-16-harness-smoke2.json", + "hot-tenant-2026-06-16-harness-smoke3.json", + "hot-tenant-2026-06-16-hot-db-smoke.json", + "hot-tenant-2026-06-16-post-docs-benchmark.json", + "hot-tenant-2026-06-16-real-tenants-smoke.json", + "hot-tenant-2026-06-16-smoke2.json", + "hot-tenant-db-2026-06-16-final-benchmark-smoke.json", + "hot-tenant-db-2026-06-16-hot-db-smoke.json", + "hot-tenant-db-2026-06-16-post-docs-benchmark.json", + "ingest-scale-snapshot-2026-06-16.json", + "ingest-throughput-2026-06-16-final-benchmark-smoke.json", + "ingest-throughput-2026-06-16-final-test-load.json", + "ingest-throughput-2026-06-16-final-validation-benchmark.json", + "ingest-throughput-2026-06-16-final-validation-test-load.json", + "ingest-throughput-2026-06-16-harness-smoke.json", + "ingest-throughput-2026-06-16-harness-smoke2.json", + "ingest-throughput-2026-06-16-harness-smoke3.json", + "ingest-throughput-2026-06-16-ingest-1000rps.json", + "ingest-throughput-2026-06-16-ingest-100rps.json", + "ingest-throughput-2026-06-16-ingest-500rps.json", + "ingest-throughput-2026-06-16-post-docs-benchmark.json", + "ingest-throughput-2026-06-16-post-docs-test-load.json", + "ingest-throughput-2026-06-16-smoke2.json", + "ingest-throughput-2026-06-16T18-43-11-706Z.json", + "ingest-throughput-2026-06-16T18-45-48-067Z.json", + "worker-catchup-2026-06-16-final-benchmark-smoke.json", + "worker-catchup-2026-06-16-harness-smoke2.json", + "worker-catchup-2026-06-16-harness-smoke3.json", + "worker-catchup-2026-06-16-post-docs-benchmark.json", + "worker-catchup-2026-06-16-worker-catchup-smoke.json", + "worker-catchup-2026-06-16-worker-metrics-smoke.json", + "worker-catchup-2026-06-16T19-16-43-023Z.json", + "worker-retry-offsets-2026-06-16-worker-retry-proof-2.json", + "worker-retry-offsets-2026-06-16-worker-retry-proof-3.json" + ] +} diff --git a/docs/benchmarks/evidence/tenant-skew-2026-06-16-real-tenants-smoke.txt b/docs/benchmarks/evidence/tenant-skew-2026-06-16-real-tenants-smoke.txt new file mode 100644 index 0000000..08507bf --- /dev/null +++ b/docs/benchmarks/evidence/tenant-skew-2026-06-16-real-tenants-smoke.txt @@ -0,0 +1,31 @@ +Commands: + +pnpm benchmark:seed-tenants -- --tenants 100 --hot-tenants 1 --medium-tenants 10 --manifest tmp/benchmark-tenants.json +RUN_ID=2026-06-16-real-tenants-smoke TENANT_KEYS_FILE=tmp/benchmark-tenants.json HOT_TENANT_RATIO=0.75 START_RATE=5 PEAK_RATE=20 RAMP_DURATION=5s HOLD_DURATION=10s RAMP_DOWN_DURATION=5s PREALLOCATED_VUS=10 MAX_VUS=50 API_URL=http://localhost:3001 pnpm benchmark:hot-tenant + +k6 summary: + +docs/benchmarks/evidence/hot-tenant-2026-06-16-real-tenants-smoke.json + +Authenticated benchmark API keys used in the last 10 minutes: + +hot:1 +medium:10 +quiet:89 +distinct_used_projects:100 + +Persisted event distribution for run_id = 2026-06-16-real-tenants-smoke: + +hot:247 +medium:24 +quiet:54 +distinct_orgs:54 +events_total:325 + +Kafka consumer lag after catch-up: + +GROUP TOPIC PARTITION CURRENT-OFFSET LOG-END-OFFSET LAG CONSUMER-ID HOST CLIENT-ID +pulseops-aggregators events-raw 0 203377 203377 0 pulseops-worker-b6cce9c6-fd6f-47e2-afcd-fde86002447c /172.23.0.7 pulseops-worker +pulseops-aggregators events-raw 1 204226 204226 0 pulseops-worker-b6cce9c6-fd6f-47e2-afcd-fde86002447c /172.23.0.7 pulseops-worker +pulseops-aggregators events-raw 2 203345 203345 0 pulseops-worker-b6cce9c6-fd6f-47e2-afcd-fde86002447c /172.23.0.7 pulseops-worker +TOTAL_LAG 0 diff --git a/docs/benchmarks/evidence/worker-catchup-2026-06-16-clean-full-benchmark.json b/docs/benchmarks/evidence/worker-catchup-2026-06-16-clean-full-benchmark.json new file mode 100644 index 0000000..1bdb2e3 --- /dev/null +++ b/docs/benchmarks/evidence/worker-catchup-2026-06-16-clean-full-benchmark.json @@ -0,0 +1,136 @@ +{ + "run_id": "2026-06-16-clean-full-benchmark", + "captured_at": "2026-06-16T20:46:29.857Z", + "requested_events": 200, + "accepted_events": 200, + "persisted_events": 200, + "batch_size": 20, + "batch_statuses": { + "202": 10 + }, + "acceptance_duration_ms": 106.692, + "catchup_duration_ms": 1860.784, + "accepted_events_per_second": 1874.56, + "persisted_events_per_second_until_caught_up": 107.48, + "kafka_lag_before": { + "captured_at": "2026-06-16T20:46:26.841Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 2816, + "log_end_offset": 2816, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 2814, + "log_end_offset": 2814, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 2794, + "log_end_offset": 2794, + "lag": 0 + } + ], + "total_lag": 0 + }, + "kafka_lag_after": { + "captured_at": "2026-06-16T20:46:29.855Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 2880, + "log_end_offset": 2880, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 2879, + "log_end_offset": 2879, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 2865, + "log_end_offset": 2865, + "lag": 0 + } + ], + "total_lag": 0 + }, + "batches": [ + { + "status": 202, + "accepted": 20, + "duration_ms": 63.057 + }, + { + "status": 202, + "accepted": 20, + "duration_ms": 7.933 + }, + { + "status": 202, + "accepted": 20, + "duration_ms": 6.459 + }, + { + "status": 202, + "accepted": 20, + "duration_ms": 4.249 + }, + { + "status": 202, + "accepted": 20, + "duration_ms": 4.533 + }, + { + "status": 202, + "accepted": 20, + "duration_ms": 4.396 + }, + { + "status": 202, + "accepted": 20, + "duration_ms": 4.872 + }, + { + "status": 202, + "accepted": 20, + "duration_ms": 3.316 + }, + { + "status": 202, + "accepted": 20, + "duration_ms": 3.688 + }, + { + "status": 202, + "accepted": 20, + "duration_ms": 3.239 + } + ], + "samples": [ + { + "elapsed_ms": 1350.02, + "persisted_events": 14, + "kafka_lag": 0, + "active_db_connections": 1 + }, + { + "elapsed_ms": 1860.784, + "persisted_events": 200, + "kafka_lag": 0, + "active_db_connections": 1 + } + ], + "safe_claim_note": "Local worker catch-up smoke measurement from HTTP acceptance through Kafka to persisted PostgreSQL rows." +} diff --git a/docs/benchmarks/evidence/worker-catchup-2026-06-16-final-benchmark-smoke.json b/docs/benchmarks/evidence/worker-catchup-2026-06-16-final-benchmark-smoke.json new file mode 100644 index 0000000..e8ec605 --- /dev/null +++ b/docs/benchmarks/evidence/worker-catchup-2026-06-16-final-benchmark-smoke.json @@ -0,0 +1,186 @@ +{ + "run_id": "2026-06-16-final-benchmark-smoke", + "captured_at": "2026-06-16T19:58:39.163Z", + "requested_events": 200, + "accepted_events": 200, + "persisted_events": 200, + "batch_size": 10, + "batch_statuses": { + "202": 20 + }, + "acceptance_duration_ms": 133.827, + "catchup_duration_ms": 2157.11, + "accepted_events_per_second": 1494.46, + "persisted_events_per_second_until_caught_up": 92.72, + "kafka_lag_before": { + "captured_at": "2026-06-16T19:58:35.742Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 206025, + "log_end_offset": 206025, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 206955, + "log_end_offset": 206955, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 206077, + "log_end_offset": 206077, + "lag": 0 + } + ], + "total_lag": 0 + }, + "kafka_lag_after": { + "captured_at": "2026-06-16T19:58:39.161Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 206089, + "log_end_offset": 206089, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 207020, + "log_end_offset": 207020, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 206148, + "log_end_offset": 206148, + "lag": 0 + } + ], + "total_lag": 0 + }, + "batches": [ + { + "status": 202, + "accepted": 10, + "duration_ms": 67.852 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 6.125 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 4.532 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 6.914 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 3.041 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 2.792 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 2.511 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 3.014 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 4.165 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 4.164 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 2.532 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 2.554 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 3.308 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 3.06 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 3.473 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 1.955 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 2.892 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 2.514 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 2.455 + }, + { + "status": 202, + "accepted": 10, + "duration_ms": 2.798 + } + ], + "samples": [ + { + "elapsed_ms": 1589.308, + "persisted_events": 13, + "kafka_lag": 0, + "active_db_connections": 1 + }, + { + "elapsed_ms": 2157.11, + "persisted_events": 200, + "kafka_lag": 0, + "active_db_connections": 1 + } + ], + "safe_claim_note": "Local worker catch-up smoke measurement from HTTP acceptance through Kafka to persisted PostgreSQL rows." +} diff --git a/docs/benchmarks/evidence/worker-catchup-2026-06-16-harness-smoke2.json b/docs/benchmarks/evidence/worker-catchup-2026-06-16-harness-smoke2.json new file mode 100644 index 0000000..36cce45 --- /dev/null +++ b/docs/benchmarks/evidence/worker-catchup-2026-06-16-harness-smoke2.json @@ -0,0 +1,85 @@ +{ + "run_id": "2026-06-16-harness-smoke2", + "captured_at": "2026-06-16T19:17:15.897Z", + "requested_events": 10, + "accepted_events": 10, + "persisted_events": 29, + "batch_size": 10, + "batch_statuses": { + "202": 1 + }, + "acceptance_duration_ms": 61.357, + "catchup_duration_ms": 211.322, + "accepted_events_per_second": 162.98, + "persisted_events_per_second_until_caught_up": 137.23, + "kafka_lag_before": { + "captured_at": "2026-06-16T19:17:14.455Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 203729, + "log_end_offset": 203729, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 204589, + "log_end_offset": 204589, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 203726, + "log_end_offset": 203726, + "lag": 0 + } + ], + "total_lag": 0 + }, + "kafka_lag_after": { + "captured_at": "2026-06-16T19:17:15.892Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 203734, + "log_end_offset": 203734, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 204591, + "log_end_offset": 204591, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 203729, + "log_end_offset": 203729, + "lag": 0 + } + ], + "total_lag": 0 + }, + "batches": [ + { + "status": 202, + "accepted": 10, + "duration_ms": 60.82 + } + ], + "samples": [ + { + "elapsed_ms": 211.322, + "persisted_events": 29, + "kafka_lag": 0, + "active_db_connections": 1 + } + ], + "safe_claim_note": "Local worker catch-up smoke measurement from HTTP acceptance through Kafka to persisted PostgreSQL rows." +} diff --git a/docs/benchmarks/evidence/worker-catchup-2026-06-16-harness-smoke3.json b/docs/benchmarks/evidence/worker-catchup-2026-06-16-harness-smoke3.json new file mode 100644 index 0000000..0ec2139 --- /dev/null +++ b/docs/benchmarks/evidence/worker-catchup-2026-06-16-harness-smoke3.json @@ -0,0 +1,91 @@ +{ + "run_id": "2026-06-16-harness-smoke3", + "captured_at": "2026-06-16T19:17:44.567Z", + "requested_events": 10, + "accepted_events": 10, + "persisted_events": 10, + "batch_size": 10, + "batch_statuses": { + "202": 1 + }, + "acceptance_duration_ms": 53.877, + "catchup_duration_ms": 1896.242, + "accepted_events_per_second": 185.61, + "persisted_events_per_second_until_caught_up": 5.27, + "kafka_lag_before": { + "captured_at": "2026-06-16T19:17:41.558Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 203747, + "log_end_offset": 203747, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 204608, + "log_end_offset": 204608, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 203742, + "log_end_offset": 203742, + "lag": 0 + } + ], + "total_lag": 0 + }, + "kafka_lag_after": { + "captured_at": "2026-06-16T19:17:44.566Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 203752, + "log_end_offset": 203752, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 204610, + "log_end_offset": 204610, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 203745, + "log_end_offset": 203745, + "lag": 0 + } + ], + "total_lag": 0 + }, + "batches": [ + { + "status": 202, + "accepted": 10, + "duration_ms": 53.429 + } + ], + "samples": [ + { + "elapsed_ms": 1289.286, + "persisted_events": 7, + "kafka_lag": 0, + "active_db_connections": 1 + }, + { + "elapsed_ms": 1896.242, + "persisted_events": 10, + "kafka_lag": 0, + "active_db_connections": 1 + } + ], + "safe_claim_note": "Local worker catch-up smoke measurement from HTTP acceptance through Kafka to persisted PostgreSQL rows." +} diff --git a/docs/benchmarks/evidence/worker-catchup-2026-06-16-post-docs-benchmark.json b/docs/benchmarks/evidence/worker-catchup-2026-06-16-post-docs-benchmark.json new file mode 100644 index 0000000..f9306d4 --- /dev/null +++ b/docs/benchmarks/evidence/worker-catchup-2026-06-16-post-docs-benchmark.json @@ -0,0 +1,106 @@ +{ + "run_id": "2026-06-16-post-docs-benchmark", + "captured_at": "2026-06-16T20:14:53.974Z", + "requested_events": 20, + "accepted_events": 20, + "persisted_events": 20, + "batch_size": 5, + "batch_statuses": { + "202": 4 + }, + "acceptance_duration_ms": 72.512, + "catchup_duration_ms": 1862.986, + "accepted_events_per_second": 275.82, + "persisted_events_per_second_until_caught_up": 10.74, + "kafka_lag_before": { + "captured_at": "2026-06-16T20:14:50.975Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 207752, + "log_end_offset": 207752, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 208729, + "log_end_offset": 208729, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 207822, + "log_end_offset": 207822, + "lag": 0 + } + ], + "total_lag": 0 + }, + "kafka_lag_after": { + "captured_at": "2026-06-16T20:14:53.972Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 207764, + "log_end_offset": 207764, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 208732, + "log_end_offset": 208732, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 207827, + "log_end_offset": 207827, + "lag": 0 + } + ], + "total_lag": 0 + }, + "batches": [ + { + "status": 202, + "accepted": 5, + "duration_ms": 56.388 + }, + { + "status": 202, + "accepted": 5, + "duration_ms": 6.262 + }, + { + "status": 202, + "accepted": 5, + "duration_ms": 4.925 + }, + { + "status": 202, + "accepted": 5, + "duration_ms": 4.308 + } + ], + "samples": [ + { + "elapsed_ms": 1291.916, + "persisted_events": 2, + "kafka_lag": 0, + "active_db_connections": 1 + }, + { + "elapsed_ms": 1862.986, + "persisted_events": 20, + "kafka_lag": 0, + "active_db_connections": 1 + } + ], + "safe_claim_note": "Local worker catch-up smoke measurement from HTTP acceptance through Kafka to persisted PostgreSQL rows." +} diff --git a/docs/benchmarks/evidence/worker-catchup-2026-06-16-worker-catchup-smoke.json b/docs/benchmarks/evidence/worker-catchup-2026-06-16-worker-catchup-smoke.json new file mode 100644 index 0000000..91ac316 --- /dev/null +++ b/docs/benchmarks/evidence/worker-catchup-2026-06-16-worker-catchup-smoke.json @@ -0,0 +1,136 @@ +{ + "run_id": "2026-06-16-worker-catchup-smoke", + "captured_at": "2026-06-16T19:10:30.063Z", + "requested_events": 1000, + "accepted_events": 1000, + "persisted_events": 1000, + "batch_size": 100, + "batch_statuses": { + "202": 10 + }, + "acceptance_duration_ms": 149.551, + "catchup_duration_ms": 2297.252, + "accepted_events_per_second": 6686.69, + "persisted_events_per_second_until_caught_up": 435.3, + "kafka_lag_before": { + "captured_at": "2026-06-16T19:10:26.590Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 203380, + "log_end_offset": 203380, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 204230, + "log_end_offset": 204230, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 203349, + "log_end_offset": 203349, + "lag": 0 + } + ], + "total_lag": 0 + }, + "kafka_lag_after": { + "captured_at": "2026-06-16T19:10:30.061Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 203700, + "log_end_offset": 203700, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 204558, + "log_end_offset": 204558, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 203701, + "log_end_offset": 203701, + "lag": 0 + } + ], + "total_lag": 0 + }, + "batches": [ + { + "status": 202, + "accepted": 100, + "duration_ms": 51.672 + }, + { + "status": 202, + "accepted": 100, + "duration_ms": 12.436 + }, + { + "status": 202, + "accepted": 100, + "duration_ms": 11.68 + }, + { + "status": 202, + "accepted": 100, + "duration_ms": 11.872 + }, + { + "status": 202, + "accepted": 100, + "duration_ms": 10.805 + }, + { + "status": 202, + "accepted": 100, + "duration_ms": 9.01 + }, + { + "status": 202, + "accepted": 100, + "duration_ms": 12.815 + }, + { + "status": 202, + "accepted": 100, + "duration_ms": 7.025 + }, + { + "status": 202, + "accepted": 100, + "duration_ms": 9.722 + }, + { + "status": 202, + "accepted": 100, + "duration_ms": 10.748 + } + ], + "samples": [ + { + "elapsed_ms": 1725.634, + "persisted_events": 33, + "kafka_lag": 456, + "active_db_connections": 1 + }, + { + "elapsed_ms": 2297.252, + "persisted_events": 1000, + "kafka_lag": 0, + "active_db_connections": 1 + } + ], + "safe_claim_note": "Local worker catch-up smoke measurement from HTTP acceptance through Kafka to persisted PostgreSQL rows." +} diff --git a/docs/benchmarks/evidence/worker-catchup-2026-06-16-worker-metrics-smoke.json b/docs/benchmarks/evidence/worker-catchup-2026-06-16-worker-metrics-smoke.json new file mode 100644 index 0000000..774a620 --- /dev/null +++ b/docs/benchmarks/evidence/worker-catchup-2026-06-16-worker-metrics-smoke.json @@ -0,0 +1,109 @@ +{ + "run_id": "2026-06-16-worker-metrics-smoke", + "captured_at": "2026-06-16T19:35:15.538Z", + "requested_events": 5, + "accepted_events": 5, + "persisted_events": 5, + "batch_size": 5, + "batch_statuses": { + "202": 1 + }, + "acceptance_duration_ms": 53.724, + "catchup_duration_ms": 6953.055, + "accepted_events_per_second": 93.07, + "persisted_events_per_second_until_caught_up": 0.72, + "kafka_lag_before": { + "captured_at": "2026-06-16T19:35:07.343Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 203773, + "log_end_offset": 203773, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 204638, + "log_end_offset": 204638, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 203770, + "log_end_offset": 203770, + "lag": 0 + } + ], + "total_lag": 0 + }, + "kafka_lag_after": { + "captured_at": "2026-06-16T19:35:15.535Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 203776, + "log_end_offset": 203776, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 204640, + "log_end_offset": 204640, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 203770, + "log_end_offset": 203770, + "lag": 0 + } + ], + "total_lag": 0 + }, + "batches": [ + { + "status": 202, + "accepted": 5, + "duration_ms": 53.26 + } + ], + "samples": [ + { + "elapsed_ms": 2052.517, + "persisted_events": 0, + "kafka_lag": 5, + "active_db_connections": 1 + }, + { + "elapsed_ms": 3627.924, + "persisted_events": 0, + "kafka_lag": 5, + "active_db_connections": 1 + }, + { + "elapsed_ms": 5096.917, + "persisted_events": 0, + "kafka_lag": 5, + "active_db_connections": 1 + }, + { + "elapsed_ms": 6641.697, + "persisted_events": 0, + "kafka_lag": 5, + "active_db_connections": 1 + }, + { + "elapsed_ms": 6953.055, + "persisted_events": 5, + "kafka_lag": 0, + "active_db_connections": 1 + } + ], + "safe_claim_note": "Local worker catch-up smoke measurement from HTTP acceptance through Kafka to persisted PostgreSQL rows." +} diff --git a/docs/benchmarks/evidence/worker-catchup-2026-06-16T19-16-43-023Z.json b/docs/benchmarks/evidence/worker-catchup-2026-06-16T19-16-43-023Z.json new file mode 100644 index 0000000..46d341d --- /dev/null +++ b/docs/benchmarks/evidence/worker-catchup-2026-06-16T19-16-43-023Z.json @@ -0,0 +1,91 @@ +{ + "run_id": "2026-06-16T19-16-43-023Z", + "captured_at": "2026-06-16T19:16:47.188Z", + "requested_events": 10, + "accepted_events": 10, + "persisted_events": 10, + "batch_size": 10, + "batch_statuses": { + "202": 1 + }, + "acceptance_duration_ms": 63.98, + "catchup_duration_ms": 1835.212, + "accepted_events_per_second": 156.3, + "persisted_events_per_second_until_caught_up": 5.45, + "kafka_lag_before": { + "captured_at": "2026-06-16T19:16:44.184Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 203711, + "log_end_offset": 203711, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 204571, + "log_end_offset": 204571, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 203710, + "log_end_offset": 203710, + "lag": 0 + } + ], + "total_lag": 0 + }, + "kafka_lag_after": { + "captured_at": "2026-06-16T19:16:47.187Z", + "partitions": [ + { + "topic": "events-raw", + "partition": 0, + "current_offset": 203716, + "log_end_offset": 203716, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 1, + "current_offset": 204573, + "log_end_offset": 204573, + "lag": 0 + }, + { + "topic": "events-raw", + "partition": 2, + "current_offset": 203713, + "log_end_offset": 203713, + "lag": 0 + } + ], + "total_lag": 0 + }, + "batches": [ + { + "status": 202, + "accepted": 10, + "duration_ms": 63.377 + } + ], + "samples": [ + { + "elapsed_ms": 1274.469, + "persisted_events": 4, + "kafka_lag": 0, + "active_db_connections": 1 + }, + { + "elapsed_ms": 1835.212, + "persisted_events": 10, + "kafka_lag": 0, + "active_db_connections": 1 + } + ], + "safe_claim_note": "Local worker catch-up smoke measurement from HTTP acceptance through Kafka to persisted PostgreSQL rows." +} diff --git a/docs/benchmarks/evidence/worker-metrics-2026-06-16.txt b/docs/benchmarks/evidence/worker-metrics-2026-06-16.txt new file mode 100644 index 0000000..b0dbe81 --- /dev/null +++ b/docs/benchmarks/evidence/worker-metrics-2026-06-16.txt @@ -0,0 +1,7 @@ +worker_events_processed_total{topic="events-raw",partition="0",result="inserted"} 3 +worker_events_processed_total{topic="events-raw",partition="1",result="inserted"} 2 +worker_event_processing_duration_seconds_count{topic="events-raw",partition="0",result="inserted"} 3 +worker_event_processing_duration_seconds_count{topic="events-raw",partition="1",result="inserted"} 2 +worker_cache_invalidations_total 5 +worker_db_connections_active 0 +worker_db_connections_idle 1 diff --git a/docs/benchmarks/evidence/worker-retry-offsets-2026-06-16-worker-retry-proof-2.json b/docs/benchmarks/evidence/worker-retry-offsets-2026-06-16-worker-retry-proof-2.json new file mode 100644 index 0000000..71aee95 --- /dev/null +++ b/docs/benchmarks/evidence/worker-retry-offsets-2026-06-16-worker-retry-proof-2.json @@ -0,0 +1,27 @@ +{ + "run_id": "2026-06-16-worker-retry-proof-2", + "kafka_group_id": "pulseops-retry-proof-2026-06-16-worker-retry-proof-2", + "warmup_event_id": "8b83ffb4-34a6-4269-af2c-09bab8e98fd3", + "probe_event_id": "78e94b33-861b-4924-9c74-c17ff57dee35", + "crash_exit_code": 86, + "warmup_state": { + "events": 1, + "dedupe_keys": 1, + "event_count_aggregate": 1 + }, + "after_crash_state": { + "events": 1, + "dedupe_keys": 1, + "event_count_aggregate": 1 + }, + "after_replay_state": { + "events": 1, + "dedupe_keys": 1, + "event_count_aggregate": 1 + }, + "kafka_lag_after_warmup": 0, + "kafka_lag_after_crash": 1, + "kafka_lag_after_replay": 0, + "output": "docs/benchmarks/evidence/worker-retry-offsets-2026-06-16-worker-retry-proof-2.json", + "proof_note": "HTTP ingest reached Kafka; controlled worker exited after processEvent and before commit; restart replayed the message without double-counting and committed lag to zero." +} diff --git a/docs/benchmarks/evidence/worker-retry-offsets-2026-06-16-worker-retry-proof-3.json b/docs/benchmarks/evidence/worker-retry-offsets-2026-06-16-worker-retry-proof-3.json new file mode 100644 index 0000000..2d79991 --- /dev/null +++ b/docs/benchmarks/evidence/worker-retry-offsets-2026-06-16-worker-retry-proof-3.json @@ -0,0 +1,27 @@ +{ + "run_id": "2026-06-16-worker-retry-proof-3", + "kafka_group_id": "pulseops-retry-proof-2026-06-16-worker-retry-proof-3", + "warmup_event_id": "761e1044-2e47-4b95-987c-ad6ac2486f94", + "probe_event_id": "942f6d4b-4e83-45a1-9f93-18406e40b91b", + "crash_exit_code": 86, + "warmup_state": { + "events": 1, + "dedupe_keys": 1, + "event_count_aggregate": 1 + }, + "after_crash_state": { + "events": 1, + "dedupe_keys": 1, + "event_count_aggregate": 1 + }, + "after_replay_state": { + "events": 1, + "dedupe_keys": 1, + "event_count_aggregate": 1 + }, + "kafka_lag_after_warmup": 0, + "kafka_lag_after_crash": 1, + "kafka_lag_after_replay": 0, + "output": "docs/benchmarks/evidence/worker-retry-offsets-2026-06-16-worker-retry-proof-3.json", + "proof_note": "HTTP ingest reached Kafka; controlled worker exited after processEvent and before commit; restart replayed the message without double-counting and committed lag to zero." +} diff --git a/docs/migrations/evidence/fresh-migration-2026-06-16-final-fresh-migration.txt b/docs/migrations/evidence/fresh-migration-2026-06-16-final-fresh-migration.txt new file mode 100644 index 0000000..662c112 --- /dev/null +++ b/docs/migrations/evidence/fresh-migration-2026-06-16-final-fresh-migration.txt @@ -0,0 +1,25 @@ +# Fresh Migration Verification + +run_id: 2026-06-16-final-fresh-migration +postgres_image: postgres:16-alpine +container: pulseops-migration-proof-2026-06-16-final-fresh-migration +host_port_requested: auto + +host_port_actual: 53061 +## Migration Output + +> @pulseops/ingest-api@1.0.0 db:migrate /Users/dev/Documents/github_improver/repos/pulseops/services/ingest-api +> node -r ts-node/register scripts/migrate.ts + +Applying scripts/init-db.sql +Applying migrations/006_performance_optimizations.sql +Applying migrations/007_article_evidence_schema.sql + +## Schema Checks +schema_migrations: 006_performance_optimizations.sql, 007_article_evidence_schema.sql +event_partitions: 7 +mv_dashboard_metrics_exists: t +expected_indexes: idx_events_event_id_time, idx_events_org_project_time, idx_events_tenant_metric_time, idx_mv_dashboard_unique +unwanted_duplicate_objects: none + +PASS: fresh migration produced the expected schema without known duplicate index/constraint drift. diff --git a/docs/migrations/evidence/fresh-migration-2026-06-16-fresh-migration.txt b/docs/migrations/evidence/fresh-migration-2026-06-16-fresh-migration.txt new file mode 100644 index 0000000..a81cbc6 --- /dev/null +++ b/docs/migrations/evidence/fresh-migration-2026-06-16-fresh-migration.txt @@ -0,0 +1,24 @@ +# Fresh Migration Verification + +run_id: 2026-06-16-fresh-migration +postgres_image: postgres:16-alpine +container: pulseops-migration-proof-2026-06-16-fresh-migration +host_port: 55955 + +## Migration Output + +> @pulseops/ingest-api@1.0.0 db:migrate /Users/dev/Documents/github_improver/repos/pulseops/services/ingest-api +> node -r ts-node/register scripts/migrate.ts + +Applying scripts/init-db.sql +Applying migrations/006_performance_optimizations.sql +Applying migrations/007_article_evidence_schema.sql + +## Schema Checks +schema_migrations: 006_performance_optimizations.sql, 007_article_evidence_schema.sql +event_partitions: 7 +mv_dashboard_metrics_exists: t +expected_indexes: idx_events_event_id_time, idx_events_org_project_time, idx_events_tenant_metric_time, idx_mv_dashboard_unique +unwanted_duplicate_objects: none + +PASS: fresh migration produced the expected schema without known duplicate index/constraint drift. diff --git a/docs/migrations/evidence/fresh-migration-2026-06-16T20-40-59Z.txt b/docs/migrations/evidence/fresh-migration-2026-06-16T20-40-59Z.txt new file mode 100644 index 0000000..7fc96fc --- /dev/null +++ b/docs/migrations/evidence/fresh-migration-2026-06-16T20-40-59Z.txt @@ -0,0 +1,25 @@ +# Fresh Migration Verification + +run_id: 2026-06-16T20-40-59Z +postgres_image: postgres:16-alpine +container: pulseops-migration-proof-2026-06-16T20-40-59Z +host_port_requested: auto + +host_port_actual: 58636 +## Migration Output + +> @pulseops/ingest-api@1.0.0 db:migrate /Users/dev/Documents/github_improver/repos/pulseops/services/ingest-api +> node -r ts-node/register scripts/migrate.ts + +Applying scripts/init-db.sql +Applying migrations/006_performance_optimizations.sql +Applying migrations/007_article_evidence_schema.sql + +## Schema Checks +schema_migrations: 006_performance_optimizations.sql, 007_article_evidence_schema.sql +event_partitions: 7 +mv_dashboard_metrics_exists: t +expected_indexes: idx_events_event_id_time, idx_events_org_project_time, idx_events_tenant_metric_time, idx_mv_dashboard_unique +unwanted_duplicate_objects: none + +PASS: fresh migration produced the expected schema without known duplicate index/constraint drift. diff --git a/docs/migrations/safe-migration-example.md b/docs/migrations/safe-migration-example.md new file mode 100644 index 0000000..7e338b7 --- /dev/null +++ b/docs/migrations/safe-migration-example.md @@ -0,0 +1,113 @@ +# Safe Migration Example + +This document shows how PulseOps documents performance-oriented PostgreSQL migrations without overstating safety. It includes both a reusable concurrent-index example and the current fresh-schema verification command used for article evidence. + +## Current Migration Proof + +PulseOps applies `scripts/init-db.sql` first, then SQL files under `migrations/` through `services/ingest-api/scripts/migrate.ts`. The current migration proof command starts a throwaway PostgreSQL 16 container, runs the same migrator against a blank database, and verifies that: + +- `schema_migrations` records `006_performance_optimizations.sql` and `007_article_evidence_schema.sql`. +- event partitions are created for the local benchmark window. +- `mv_dashboard_metrics` exists for query-plan evidence. +- expected dashboard/query-plan indexes exist. +- known duplicate legacy objects do not reappear on a fresh install. + +Run: + +```bash +pnpm db:verify:fresh +``` + +The command writes raw output to: + +```text +docs/migrations/evidence/fresh-migration-.txt +``` + +The checked local evidence file is: + +```text +docs/migrations/evidence/fresh-migration-2026-06-16-fresh-migration.txt +``` + +This proves the local fresh-schema path is reproducible for the checked command and PostgreSQL image. It does not prove online production migration safety for an existing large table. + +## Scenario + +Add an index for a common dashboard query pattern: + +```sql +CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_events_org_project_timestamp +ON events(org_id, project_id, timestamp DESC); +``` + +## Why This Is Safer + +- `CREATE INDEX CONCURRENTLY` avoids taking the same blocking write lock as a normal `CREATE INDEX`. +- `IF NOT EXISTS` makes repeated local/dev runs idempotent. +- The index matches the resolver shape that filters by `org_id`, `project_id`, and date range. + +## Remaining Risks + +- Concurrent index builds still consume CPU, memory, disk, and I/O. +- The command cannot run inside a transaction block. +- Failed concurrent index builds can leave invalid indexes that need manual cleanup. +- Large tables may need a maintenance window even when writes are not blocked. +- Partitioned tables may require indexes on partitions depending on the PostgreSQL version and table definition. + +## Preflight Checks + +```sql +SELECT schemaname, relname, n_live_tup +FROM pg_stat_user_tables +WHERE relname = 'events'; + +SELECT pid, state, wait_event_type, wait_event, query +FROM pg_stat_activity +WHERE datname = current_database(); +``` + +## Execution + +Run the index creation as a standalone statement: + +```sql +CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_events_org_project_timestamp +ON events(org_id, project_id, timestamp DESC); +``` + +## Verification + +Check that PostgreSQL can use the index for tenant/project date-range queries: + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT COUNT(*) +FROM events +WHERE org_id = '00000000-0000-0000-0000-000000000001' + AND project_id = '00000000-0000-0000-0000-000000000002' + AND timestamp >= '2026-06-01' + AND timestamp < '2026-07-01'; +``` + +Store the plan output under `docs/query-plans/` with the date, schema version, and dataset size. + +## Rollback + +Rollback is also a standalone concurrent statement: + +```sql +DROP INDEX CONCURRENTLY IF EXISTS idx_events_org_project_timestamp; +``` + +## Public Wording + +Safe: + +> The migration uses PostgreSQL's concurrent index build to reduce write blocking risk, and the benchmark report includes query plans before and after the change. + +Unsafe without evidence: + +> The migration is zero-downtime. + +> The migration has no production impact. diff --git a/docs/observability.md b/docs/observability.md new file mode 100644 index 0000000..f5d141f --- /dev/null +++ b/docs/observability.md @@ -0,0 +1,73 @@ +# PulseOps Observability Map + +This document maps operational questions to metrics, logs, and traces that should be collected while running PulseOps benchmarks or production-like tests. + +The ingest API and GraphQL API expose Prometheus text metrics at `GET /metrics`. A checked GraphQL scrape is saved at `docs/benchmarks/evidence/graphql-metrics-2026-06-16.txt`. The worker exposes Prometheus text metrics at `GET /metrics` on `WORKER_METRICS_PORT` or `METRICS_PORT`, defaulting to port `3003`; a checked scrape is saved at `docs/benchmarks/evidence/worker-metrics-2026-06-16.txt`. + +## Ingest API + +| Question | Evidence | Current names or source | +| --- | --- | --- | +| Are clients getting accepted, rejected, or rate-limited? | HTTP status counts by route and API key/org. | `http_requests_total`, Fastify access logs. | +| How long does request acceptance take? | HTTP duration histogram by route/status. | `http_request_duration_seconds`, k6 `http_req_duration`. | +| Are single and batch paths behaving differently? | Ingestion duration by type and batch size. | `event_ingestion_duration_seconds`, `events_ingested_batch_total`. | +| Is Kafka publish failing? | Produce error counts and logs containing topic/error. | `kafka_produce_errors_total`, ingest error logs. | +| Which tenant is driving load? | Events accepted by org/project. | `events_ingested_total{org_id,project_id}` and benchmark labels. | +| Are clients hitting limits before backend saturation? | Rate-limit hit rate. | HTTP `429` responses. `rate_limit_hits_total` is defined but not yet wired to the Fastify rate-limit callback. | + +## Kafka And Worker + +| Question | Evidence | Current names or source | +| --- | --- | --- | +| Is the queue absorbing bursts? | Produced messages, consumed messages, consumer lag. | Kafka broker metrics, `kafka_messages_produced_total`, `worker_events_processed_total`. | +| Is one tenant creating partition skew? | Messages per partition and lag per partition. | Kafka consumer group lag by partition, `worker_events_processed_total{partition=...}`, benchmark hot tenant labels. | +| Is the worker keeping up? | Processed count, error count, processing rate, processing duration. | `worker_events_processed_total`, `worker_processing_errors_total`, `worker_event_processing_duration_seconds`, worker logs with `processed`, `errors`, `error_rate`. | +| Are retries occurring? | Failed processing logs, error counters, and uncommitted offsets. | `worker_processing_errors_total`, worker `Event processing failed, will retry` logs. | +| Are aggregate cache versions moving? | Cache invalidation count after successful aggregate updates. | `worker_cache_invalidations_total`, worker `Aggregates updated and cache invalidated` logs. | +| Is data freshness acceptable? | Time between event timestamp/ingest and aggregate visibility. | Benchmark dashboard probes. `data_lag_seconds` is defined but not yet wired. | + +## PostgreSQL + +| Question | Evidence | Current names or source | +| --- | --- | --- | +| Are raw event writes slow? | Insert duration and error logs. | Worker structured logs include `db_write_duration_ms`; Prometheus currently covers full worker processing duration via `worker_event_processing_duration_seconds`. | +| Are aggregate updates contended? | `ON CONFLICT DO UPDATE` latency, lock waits, deadlocks. | PostgreSQL `pg_stat_activity`, `pg_locks`, `pg_stat_database`, app query timing. | +| Are dashboard queries using the intended indexes? | `EXPLAIN (ANALYZE, BUFFERS)` plans. | Store plans under `docs/query-plans/`. | +| Are partitions being pruned? | Query plan scans only relevant monthly partitions. | `EXPLAIN` partition scan nodes. | +| Is connection pool pressure visible? | Active/idle connections and wait time. | `db_connections_active`, `db_connections_idle`, PostgreSQL connection stats. | + +## Redis And GraphQL + +| Question | Evidence | Current names or source | +| --- | --- | --- | +| Are dashboard queries cache hits or misses? | Cache hit/miss counters by query family. | `graphql_redis_cache_hits_total`, `graphql_redis_cache_misses_total`, GraphQL structured logs with `cache_result`. | +| Does cache improve latency? | Compare cold run after cache clear with warm repeated run. | `graphql_query_duration_seconds`, k6 `dashboard-query.js`, `scripts/measure-dashboard-cache.ts`. | +| Are filtered queries bypassing aggregates? | Resolver path and SQL query timing. | GraphQL resolver logs and database timing. | +| Are hot tenant dashboards slower than quiet tenant dashboards? | Latency grouped by org/project labels. | k6 tags, benchmark report table. | + +## Benchmark Labels + +Use consistent labels so results can be compared across runs: + +| Label | Purpose | +| --- | --- | +| `benchmark` | Script name, such as `ingest-throughput` or `hot-tenant`. | +| `scenario` | k6 scenario name. | +| `tenant_class` | `hot`, `medium`, or `quiet`. | +| `event_name` | Synthetic event type. | +| `endpoint` | `single`, `batch`, or `graphql`. | +| `run_id` | Timestamp or manually supplied run identifier. | + +## Minimum Evidence For Public Claims + +Before publishing a performance claim, capture: + +- Git commit SHA and dirty tree status. +- Hardware or container limits. +- Service versions for Node.js, PostgreSQL, Redis, Kafka, and k6. +- Dataset shape: tenants, events, days, hot tenant ratio, late arrival ratio, duplicate ratio. +- k6 command and environment variables. +- Raw k6 summary JSON. +- PostgreSQL query plans for dashboard queries. +- Kafka consumer lag during the run. +- Notes about failures, retries, rate limits, or backpressure. diff --git a/docs/query-plans/2026-06-16-aggregate-daily-dashboard.md b/docs/query-plans/2026-06-16-aggregate-daily-dashboard.md new file mode 100644 index 0000000..1af8db4 --- /dev/null +++ b/docs/query-plans/2026-06-16-aggregate-daily-dashboard.md @@ -0,0 +1,104 @@ +# Daily Aggregate Dashboard Query + +Captured: 2026-06-16T18:46:50Z + +## Table Row Counts + +```text +events=130748 +daily_aggregates=14 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT date, metric_name, SUM(metric_value) AS metric_value +FROM daily_aggregates +WHERE org_id = '00000000-0000-0000-0000-000000000001' + AND project_id = '00000000-0000-0000-0000-000000000002' + AND metric_name IN ('dau', 'event_count', 'total_events') + AND date >= CURRENT_DATE - INTERVAL '90 days' +GROUP BY date, metric_name +ORDER BY date ASC, metric_name ASC; +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort (cost=7.51..7.52 rows=3 width=47) (actual time=0.051..0.051 rows=6 loops=1) + Sort Key: date, metric_name + Sort Method: quicksort Memory: 25kB + Buffers: shared hit=13 + -> HashAggregate (cost=7.45..7.49 rows=3 width=47) (actual time=0.028..0.029 rows=6 loops=1) + Group Key: date, metric_name + Batches: 1 Memory Usage: 24kB + Buffers: shared hit=7 + -> Seq Scan on daily_aggregates (cost=0.00..7.37 rows=11 width=21) (actual time=0.006..0.021 rows=14 loops=1) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid) AND ((metric_name)::text = ANY ('{dau,event_count,total_events}'::text[])) AND (date >= (CURRENT_DATE - '90 days'::interval))) + Buffers: shared hit=7 + Planning: + Buffers: shared hit=332 read=7 + Planning Time: 1.024 ms + Execution Time: 0.097 ms +(15 rows) + +``` + +## Interpretation + +This plan is the dashboard aggregate read path. It should touch aggregate rows rather than raw event rows. diff --git a/docs/query-plans/2026-06-16-clean-full-benchmark-aggregate-daily-dashboard.md b/docs/query-plans/2026-06-16-clean-full-benchmark-aggregate-daily-dashboard.md new file mode 100644 index 0000000..3432a0d --- /dev/null +++ b/docs/query-plans/2026-06-16-clean-full-benchmark-aggregate-daily-dashboard.md @@ -0,0 +1,120 @@ +# Daily Aggregate Dashboard Query + +Captured: 2026-06-16T20:47:32Z +Run ID: 2026-06-16-clean-full-benchmark +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-clean-full-benchmark ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=18364 +target_tenant_events_for_run_id=5309 +``` + +## Table Row Counts + +```text +events=18364 +daily_aggregates=628 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT date, metric_name, SUM(metric_value) AS metric_value +FROM daily_aggregates +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND metric_name IN ('dau', 'event_count', 'total_events') + AND date >= CURRENT_DATE - INTERVAL '90 days' +GROUP BY date, metric_name +ORDER BY date ASC, metric_name ASC; +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + GroupAggregate (cost=8.32..8.39 rows=1 width=46) (actual time=0.065..0.067 rows=6 loops=1) + Group Key: date, metric_name + Buffers: shared hit=14 + -> Incremental Sort (cost=8.32..8.36 rows=2 width=19) (actual time=0.057..0.057 rows=10 loops=1) + Sort Key: date, metric_name + Presorted Key: date + Full-sort Groups: 1 Sort Method: quicksort Average Memory: 25kB Peak Memory: 25kB + Buffers: shared hit=14 + -> Index Scan Backward using idx_daily_aggregates_org_project_date on daily_aggregates (cost=0.28..8.31 rows=1 width=19) (actual time=0.029..0.032 rows=10 loops=1) + Index Cond: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND (date >= (CURRENT_DATE - '90 days'::interval))) + Filter: ((metric_name)::text = ANY ('{dau,event_count,total_events}'::text[])) + Buffers: shared hit=8 + Planning: + Buffers: shared hit=339 + Planning Time: 0.854 ms + Execution Time: 0.114 ms +(16 rows) + +``` + +## Interpretation + +This plan is the dashboard aggregate read path. It should touch aggregate rows rather than raw event rows. diff --git a/docs/query-plans/2026-06-16-clean-full-benchmark-graphql-cache-path.md b/docs/query-plans/2026-06-16-clean-full-benchmark-graphql-cache-path.md new file mode 100644 index 0000000..3f23f40 --- /dev/null +++ b/docs/query-plans/2026-06-16-clean-full-benchmark-graphql-cache-path.md @@ -0,0 +1,83 @@ +# Dashboard Cache Evidence Note + +Captured: 2026-06-16T20:47:33Z +Run ID: 2026-06-16-clean-full-benchmark +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## Benchmark Run Counts + +```text +events_for_run_id=18364 +target_tenant_events_for_run_id=5309 +``` + +## Table Row Counts + +```text +events=18364 +daily_aggregates=628 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## GraphQL Query Text + +```graphql +query DashboardCacheBenchmark($orgId: ID!, $projectId: ID!, $startDate: Date!, $endDate: Date!) { + metrics(orgId: $orgId, projectId: $projectId, startDate: $startDate, endDate: $endDate) { + totalEvents + dailyActiveUsers { date value } + topEvents { eventName count } + dateRange { start end } + } +} +``` + +## EXPLAIN ANALYZE + +No PostgreSQL EXPLAIN ANALYZE is recorded for cached vs uncached GraphQL timing. The warm path is served through Redis and resolver-level cache behavior, so a PostgreSQL plan would not represent the cached request. + +## Dashboard Cache Evidence + +Run-scoped cache evidence: `docs/benchmarks/evidence/dashboard-cache-2026-06-16-clean-full-benchmark.json`. + +## Interpretation + +Use the dashboard cache JSON evidence for cold and warm GraphQL timings. Use the PostgreSQL plans in the other files from this run for database access paths; do not invent a cached GraphQL EXPLAIN plan. diff --git a/docs/query-plans/2026-06-16-clean-full-benchmark-materialized-dashboard.md b/docs/query-plans/2026-06-16-clean-full-benchmark-materialized-dashboard.md new file mode 100644 index 0000000..127be51 --- /dev/null +++ b/docs/query-plans/2026-06-16-clean-full-benchmark-materialized-dashboard.md @@ -0,0 +1,109 @@ +# Materialized Dashboard Metrics Query + +Captured: 2026-06-16T20:47:32Z +Run ID: 2026-06-16-clean-full-benchmark +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-clean-full-benchmark ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=18364 +target_tenant_events_for_run_id=5309 +``` + +## Table Row Counts + +```text +events=18364 +daily_aggregates=628 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT date, event_count, unique_users, unique_sessions, events_by_name +FROM mv_dashboard_metrics +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND date >= CURRENT_DATE - INTERVAL '30 days' +ORDER BY date DESC; +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Index Scan using idx_mv_dashboard_org_project_date on mv_dashboard_metrics (cost=0.15..8.18 rows=1 width=84) (actual time=0.006..0.006 rows=0 loops=1) + Index Cond: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND (date >= (CURRENT_DATE - '30 days'::interval))) + Buffers: shared hit=2 + Planning: + Buffers: shared hit=114 + Planning Time: 0.290 ms + Execution Time: 0.019 ms +(7 rows) + +``` + +## Interpretation + +This plan captures the materialized dashboard read path. It is not Redis cache evidence; Redis cache timing is captured through GraphQL/load-test summaries. diff --git a/docs/query-plans/2026-06-16-clean-full-benchmark-partition-pruning-24h.md b/docs/query-plans/2026-06-16-clean-full-benchmark-partition-pruning-24h.md new file mode 100644 index 0000000..3c3f730 --- /dev/null +++ b/docs/query-plans/2026-06-16-clean-full-benchmark-partition-pruning-24h.md @@ -0,0 +1,115 @@ +# Partition Pruning Over Last 24 Hours + +Captured: 2026-06-16T20:47:31Z +Run ID: 2026-06-16-clean-full-benchmark +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-clean-full-benchmark ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=18364 +target_tenant_events_for_run_id=5309 +``` + +## Table Row Counts + +```text +events=18364 +daily_aggregates=628 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT COUNT(*) AS events_in_window +FROM events +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND timestamp >= NOW() - INTERVAL '24 hours' + AND timestamp < NOW(); +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Aggregate (cost=88.93..88.94 rows=1 width=8) (actual time=0.717..0.717 rows=1 loops=1) + Buffers: shared hit=18 + -> Append (cost=0.00..85.17 rows=1501 width=0) (actual time=0.045..0.589 rows=5065 loops=1) + Buffers: shared hit=18 + Subplans Removed: 6 + -> Index Only Scan using events_2026_06_org_id_project_id_timestamp_idx on events_2026_06 events_1 (cost=0.29..77.67 rows=1495 width=0) (actual time=0.044..0.399 rows=5065 loops=1) + Index Cond: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '24:00:00'::interval)) AND ("timestamp" < now())) + Heap Fetches: 0 + Buffers: shared hit=18 + Planning: + Buffers: shared hit=2356 + Planning Time: 4.960 ms + Execution Time: 0.758 ms +(13 rows) + +``` + +## Interpretation + +This query demonstrates the partition set touched for a narrow dashboard window. Compare the child tables and subplans removed here with the 30-day plan from the same run. diff --git a/docs/query-plans/2026-06-16-clean-full-benchmark-partition-pruning-30d.md b/docs/query-plans/2026-06-16-clean-full-benchmark-partition-pruning-30d.md new file mode 100644 index 0000000..85f58dd --- /dev/null +++ b/docs/query-plans/2026-06-16-clean-full-benchmark-partition-pruning-30d.md @@ -0,0 +1,117 @@ +# Partition Pruning Over Last 30 Days + +Captured: 2026-06-16T20:47:31Z +Run ID: 2026-06-16-clean-full-benchmark +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-clean-full-benchmark ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=18364 +target_tenant_events_for_run_id=5309 +``` + +## Table Row Counts + +```text +events=18364 +daily_aggregates=628 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT COUNT(*) AS events_in_window +FROM events +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND timestamp >= NOW() - INTERVAL '30 days' + AND timestamp < NOW(); +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Aggregate (cost=94.23..94.24 rows=1 width=8) (actual time=0.689..0.689 rows=1 loops=1) + Buffers: shared hit=21 + -> Append (cost=0.00..90.37 rows=1541 width=0) (actual time=0.018..0.545 rows=5309 loops=1) + Buffers: shared hit=21 + Subplans Removed: 5 + -> Seq Scan on events_2026_05 events_1 (cost=0.00..0.00 rows=1 width=0) (actual time=0.002..0.002 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" < now()) AND ("timestamp" >= (now() - '30 days'::interval))) + -> Index Only Scan using events_2026_06_org_id_project_id_timestamp_idx on events_2026_06 events_2 (cost=0.29..82.67 rows=1535 width=0) (actual time=0.016..0.341 rows=5309 loops=1) + Index Cond: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '30 days'::interval)) AND ("timestamp" < now())) + Heap Fetches: 0 + Buffers: shared hit=21 + Planning: + Buffers: shared hit=2359 + Planning Time: 4.583 ms + Execution Time: 0.717 ms +(15 rows) + +``` + +## Interpretation + +This 30-day query is the broader scan/pruning comparison for the 24-hour plan from the same run. With monthly local partitions, it can touch more than one child partition depending on the date and available benchmark rows. diff --git a/docs/query-plans/2026-06-16-clean-full-benchmark-tenant-dashboard-chosen-index.md b/docs/query-plans/2026-06-16-clean-full-benchmark-tenant-dashboard-chosen-index.md new file mode 100644 index 0000000..f7d108e --- /dev/null +++ b/docs/query-plans/2026-06-16-clean-full-benchmark-tenant-dashboard-chosen-index.md @@ -0,0 +1,129 @@ +# Tenant Dashboard Raw Event Query With Chosen Indexes + +Captured: 2026-06-16T20:47:31Z +Run ID: 2026-06-16-clean-full-benchmark +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-clean-full-benchmark ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=18364 +target_tenant_events_for_run_id=5309 +``` + +## Table Row Counts + +```text +events=18364 +daily_aggregates=628 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT id, event_id, event_name, user_id, session_id, timestamp, properties +FROM events +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND timestamp >= NOW() - INTERVAL '7 days' +ORDER BY timestamp DESC +LIMIT 100; +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit (cost=1.07..183.45 rows=100 width=302) (actual time=0.039..0.096 rows=100 loops=1) + Buffers: shared hit=35 + -> Append (cost=1.07..2749.50 rows=1507 width=302) (actual time=0.039..0.091 rows=100 loops=1) + Buffers: shared hit=35 + Subplans Removed: 3 + -> Index Scan using events_2026_09_timestamp_idx on events_2026_09 events_4 (cost=0.13..8.15 rows=1 width=1112) (actual time=0.004..0.004 rows=0 loops=1) + Index Cond: ("timestamp" >= (now() - '7 days'::interval)) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid)) + Buffers: shared hit=2 + -> Index Scan using events_2026_08_timestamp_idx on events_2026_08 events_3 (cost=0.13..8.15 rows=1 width=1112) (actual time=0.004..0.004 rows=0 loops=1) + Index Cond: ("timestamp" >= (now() - '7 days'::interval)) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid)) + Buffers: shared hit=2 + -> Index Scan using events_2026_07_timestamp_idx on events_2026_07 events_2 (cost=0.13..8.15 rows=1 width=1112) (actual time=0.003..0.003 rows=0 loops=1) + Index Cond: ("timestamp" >= (now() - '7 days'::interval)) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid)) + Buffers: shared hit=2 + -> Index Scan using events_2026_06_timestamp_idx on events_2026_06 events_1 (cost=0.29..2693.05 rows=1501 width=299) (actual time=0.027..0.075 rows=100 loops=1) + Index Cond: ("timestamp" >= (now() - '7 days'::interval)) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid)) + Rows Removed by Filter: 95 + Buffers: shared hit=29 + Planning: + Buffers: shared hit=2441 + Planning Time: 4.871 ms + Execution Time: 0.146 ms +(26 rows) + +``` + +## Interpretation + +This is the normal tenant dashboard raw-event access path for this run. Use this plan to verify whether PostgreSQL chooses the tenant/project/timestamp index or another timestamp-oriented partition index, and how many partition children are touched. diff --git a/docs/query-plans/2026-06-16-clean-full-benchmark-tenant-dashboard-index-disabled.md b/docs/query-plans/2026-06-16-clean-full-benchmark-tenant-dashboard-index-disabled.md new file mode 100644 index 0000000..6aa50d2 --- /dev/null +++ b/docs/query-plans/2026-06-16-clean-full-benchmark-tenant-dashboard-index-disabled.md @@ -0,0 +1,134 @@ +# Tenant Dashboard Raw Event Query With Index Scans Disabled + +Captured: 2026-06-16T20:47:30Z +Run ID: 2026-06-16-clean-full-benchmark +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-clean-full-benchmark ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=18364 +target_tenant_events_for_run_id=5309 +``` + +## Table Row Counts + +```text +events=18364 +daily_aggregates=628 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +BEGIN; +SET LOCAL enable_indexscan = off; +SET LOCAL enable_bitmapscan = off; +EXPLAIN (ANALYZE, BUFFERS) +SELECT id, event_id, event_name, user_id, session_id, timestamp, properties +FROM events +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND timestamp >= NOW() - INTERVAL '7 days' +ORDER BY timestamp DESC +LIMIT 100; +ROLLBACK; +``` + +## EXPLAIN ANALYZE + +```text +BEGIN +SET +SET + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit (cost=1387.32..1387.57 rows=100 width=302) (actual time=4.454..4.461 rows=100 loops=1) + Buffers: shared hit=912 + -> Sort (cost=1387.32..1391.09 rows=1507 width=302) (actual time=4.453..4.456 rows=100 loops=1) + Sort Key: events."timestamp" DESC + Sort Method: top-N heapsort Memory: 127kB + Buffers: shared hit=912 + -> Append (cost=0.00..1329.73 rows=1507 width=302) (actual time=0.008..3.650 rows=5065 loops=1) + Buffers: shared hit=909 + Subplans Removed: 3 + -> Seq Scan on events_2026_06 events_1 (cost=0.00..1322.19 rows=1501 width=299) (actual time=0.007..3.448 rows=5065 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + Rows Removed by Filter: 13299 + Buffers: shared hit=909 + -> Seq Scan on events_2026_07 events_2 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.007..0.007 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + -> Seq Scan on events_2026_08 events_3 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.002..0.002 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + -> Seq Scan on events_2026_09 events_4 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.002..0.002 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + Planning: + Buffers: shared hit=2441 + Planning Time: 5.027 ms + Execution Time: 4.544 ms +(23 rows) + +ROLLBACK +``` + +## Interpretation + +Index and bitmap scans are disabled in this session to show the cost shape when PostgreSQL cannot use the tenant/time access path. This is not a dropped-index benchmark; it is planner-controlled evidence for comparison. diff --git a/docs/query-plans/2026-06-16-final-benchmark-smoke-aggregate-daily-dashboard.md b/docs/query-plans/2026-06-16-final-benchmark-smoke-aggregate-daily-dashboard.md new file mode 100644 index 0000000..c4b1423 --- /dev/null +++ b/docs/query-plans/2026-06-16-final-benchmark-smoke-aggregate-daily-dashboard.md @@ -0,0 +1,123 @@ +# Daily Aggregate Dashboard Query + +Captured: 2026-06-16T20:05:00Z +Run ID: 2026-06-16-final-benchmark-smoke +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-final-benchmark-smoke ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=7449 +target_tenant_events_for_run_id=2601 +``` + +## Table Row Counts + +```text +events=623997 +daily_aggregates=620 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT date, metric_name, SUM(metric_value) AS metric_value +FROM daily_aggregates +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND metric_name IN ('dau', 'event_count', 'total_events') + AND date >= CURRENT_DATE - INTERVAL '90 days' +GROUP BY date, metric_name +ORDER BY date ASC, metric_name ASC; +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + GroupAggregate (cost=8.32..8.39 rows=1 width=46) (actual time=0.066..0.068 rows=6 loops=1) + Group Key: date, metric_name + Buffers: shared hit=14 + -> Incremental Sort (cost=8.32..8.36 rows=2 width=19) (actual time=0.055..0.055 rows=10 loops=1) + Sort Key: date, metric_name + Presorted Key: date + Full-sort Groups: 1 Sort Method: quicksort Average Memory: 25kB Peak Memory: 25kB + Buffers: shared hit=14 + -> Index Scan Backward using idx_daily_aggregates_org_project_date on daily_aggregates (cost=0.28..8.31 rows=1 width=19) (actual time=0.031..0.034 rows=10 loops=1) + Index Cond: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND (date >= (CURRENT_DATE - '90 days'::interval))) + Filter: ((metric_name)::text = ANY ('{dau,event_count,total_events}'::text[])) + Buffers: shared hit=8 + Planning: + Buffers: shared hit=339 + Planning Time: 0.784 ms + Execution Time: 0.097 ms +(16 rows) + +``` + +## Interpretation + +This plan is the dashboard aggregate read path. It should touch aggregate rows rather than raw event rows. diff --git a/docs/query-plans/2026-06-16-final-benchmark-smoke-graphql-cache-path.md b/docs/query-plans/2026-06-16-final-benchmark-smoke-graphql-cache-path.md new file mode 100644 index 0000000..bdc6caf --- /dev/null +++ b/docs/query-plans/2026-06-16-final-benchmark-smoke-graphql-cache-path.md @@ -0,0 +1,86 @@ +# Dashboard Cache Evidence Note + +Captured: 2026-06-16T20:05:01Z +Run ID: 2026-06-16-final-benchmark-smoke +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## Benchmark Run Counts + +```text +events_for_run_id=7449 +target_tenant_events_for_run_id=2601 +``` + +## Table Row Counts + +```text +events=623997 +daily_aggregates=620 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## GraphQL Query Text + +```graphql +query DashboardCacheBenchmark($orgId: ID!, $projectId: ID!, $startDate: Date!, $endDate: Date!) { + metrics(orgId: $orgId, projectId: $projectId, startDate: $startDate, endDate: $endDate) { + totalEvents + dailyActiveUsers { date value } + topEvents { eventName count } + dateRange { start end } + } +} +``` + +## EXPLAIN ANALYZE + +No PostgreSQL EXPLAIN ANALYZE is recorded for cached vs uncached GraphQL timing. The warm path is served through Redis and resolver-level cache behavior, so a PostgreSQL plan would not represent the cached request. + +## Dashboard Cache Evidence + +Run-scoped cache evidence: `docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-benchmark-smoke.json`. + +## Interpretation + +Use the dashboard cache JSON evidence for cold and warm GraphQL timings. Use the PostgreSQL plans in the other files from this run for database access paths; do not invent a cached GraphQL EXPLAIN plan. diff --git a/docs/query-plans/2026-06-16-final-benchmark-smoke-materialized-dashboard.md b/docs/query-plans/2026-06-16-final-benchmark-smoke-materialized-dashboard.md new file mode 100644 index 0000000..f3ac1c9 --- /dev/null +++ b/docs/query-plans/2026-06-16-final-benchmark-smoke-materialized-dashboard.md @@ -0,0 +1,112 @@ +# Materialized Dashboard Metrics Query + +Captured: 2026-06-16T20:05:01Z +Run ID: 2026-06-16-final-benchmark-smoke +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-final-benchmark-smoke ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=7449 +target_tenant_events_for_run_id=2601 +``` + +## Table Row Counts + +```text +events=623997 +daily_aggregates=620 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT date, event_count, unique_users, unique_sessions, events_by_name +FROM mv_dashboard_metrics +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND date >= CURRENT_DATE - INTERVAL '30 days' +ORDER BY date DESC; +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Index Scan using idx_mv_dashboard_org_project_date on mv_dashboard_metrics (cost=0.15..8.18 rows=1 width=84) (actual time=0.006..0.006 rows=0 loops=1) + Index Cond: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND (date >= (CURRENT_DATE - '30 days'::interval))) + Buffers: shared hit=2 + Planning: + Buffers: shared hit=114 + Planning Time: 0.302 ms + Execution Time: 0.017 ms +(7 rows) + +``` + +## Interpretation + +This plan captures the materialized dashboard read path. It is not Redis cache evidence; Redis cache timing is captured through GraphQL/load-test summaries. diff --git a/docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-24h.md b/docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-24h.md new file mode 100644 index 0000000..bdcab50 --- /dev/null +++ b/docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-24h.md @@ -0,0 +1,119 @@ +# Partition Pruning Over Last 24 Hours + +Captured: 2026-06-16T20:04:59Z +Run ID: 2026-06-16-final-benchmark-smoke +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-final-benchmark-smoke ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=7449 +target_tenant_events_for_run_id=2601 +``` + +## Table Row Counts + +```text +events=623997 +daily_aggregates=620 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT COUNT(*) AS events_in_window +FROM events +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND timestamp >= NOW() - INTERVAL '24 hours' + AND timestamp < NOW(); +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Aggregate (cost=8.51..8.52 rows=1 width=8) (actual time=1.468..1.468 rows=1 loops=1) + Buffers: shared hit=303 + -> Append (cost=0.00..8.49 rows=7 width=0) (actual time=0.021..1.396 rows=2904 loops=1) + Buffers: shared hit=303 + Subplans Removed: 6 + -> Index Scan using events_2026_06_org_id_project_id_idx on events_2026_06 events_1 (cost=0.42..8.46 rows=1 width=0) (actual time=0.021..1.290 rows=2904 loops=1) + Index Cond: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid)) + Filter: (("timestamp" < now()) AND ("timestamp" >= (now() - '24:00:00'::interval))) + Rows Removed by Filter: 237 + Buffers: shared hit=303 + Planning: + Buffers: shared hit=2806 + Planning Time: 5.387 ms + Execution Time: 1.494 ms +(14 rows) + +``` + +## Interpretation + +This query demonstrates the partition set touched for a narrow dashboard window. Compare the child tables and subplans removed here with the 30-day plan from the same run. diff --git a/docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-30d.md b/docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-30d.md new file mode 100644 index 0000000..5a267ad --- /dev/null +++ b/docs/query-plans/2026-06-16-final-benchmark-smoke-partition-pruning-30d.md @@ -0,0 +1,120 @@ +# Partition Pruning Over Last 30 Days + +Captured: 2026-06-16T20:05:00Z +Run ID: 2026-06-16-final-benchmark-smoke +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-final-benchmark-smoke ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=7449 +target_tenant_events_for_run_id=2601 +``` + +## Table Row Counts + +```text +events=623997 +daily_aggregates=620 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT COUNT(*) AS events_in_window +FROM events +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND timestamp >= NOW() - INTERVAL '30 days' + AND timestamp < NOW(); +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Aggregate (cost=8.51..8.52 rows=1 width=8) (actual time=1.723..1.724 rows=1 loops=1) + Buffers: shared hit=303 + -> Append (cost=0.00..8.49 rows=7 width=0) (actual time=0.025..1.646 rows=3141 loops=1) + Buffers: shared hit=303 + Subplans Removed: 5 + -> Seq Scan on events_2026_05 events_1 (cost=0.00..0.00 rows=1 width=0) (actual time=0.002..0.002 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" < now()) AND ("timestamp" >= (now() - '30 days'::interval))) + -> Index Scan using events_2026_06_org_id_project_id_idx on events_2026_06 events_2 (cost=0.42..8.46 rows=1 width=0) (actual time=0.023..1.520 rows=3141 loops=1) + Index Cond: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid)) + Filter: (("timestamp" < now()) AND ("timestamp" >= (now() - '30 days'::interval))) + Buffers: shared hit=303 + Planning: + Buffers: shared hit=2811 + Planning Time: 7.135 ms + Execution Time: 1.757 ms +(15 rows) + +``` + +## Interpretation + +This 30-day query is the broader scan/pruning comparison for the 24-hour plan from the same run. With monthly local partitions, it can touch more than one child partition depending on the date and available benchmark rows. diff --git a/docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-chosen-index.md b/docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-chosen-index.md new file mode 100644 index 0000000..eafbca3 --- /dev/null +++ b/docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-chosen-index.md @@ -0,0 +1,128 @@ +# Tenant Dashboard Raw Event Query With Chosen Indexes + +Captured: 2026-06-16T20:04:59Z +Run ID: 2026-06-16-final-benchmark-smoke +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-final-benchmark-smoke ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=7449 +target_tenant_events_for_run_id=2601 +``` + +## Table Row Counts + +```text +events=623997 +daily_aggregates=620 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT id, event_id, event_name, user_id, session_id, timestamp, properties +FROM events +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND timestamp >= NOW() - INTERVAL '7 days' +ORDER BY timestamp DESC +LIMIT 100; +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit (cost=8.59..8.60 rows=7 width=993) (actual time=2.238..2.245 rows=100 loops=1) + Buffers: shared hit=618 + -> Sort (cost=8.59..8.60 rows=7 width=993) (actual time=2.237..2.240 rows=100 loops=1) + Sort Key: events."timestamp" DESC + Sort Method: top-N heapsort Memory: 75kB + Buffers: shared hit=618 + -> Append (cost=0.00..8.49 rows=7 width=993) (actual time=0.044..1.910 rows=2904 loops=1) + Buffers: shared hit=615 + Subplans Removed: 3 + -> Index Scan using events_2026_06_org_id_project_id_timestamp_idx on events_2026_06 events_1 (cost=0.43..8.45 rows=1 width=281) (actual time=0.043..1.789 rows=2904 loops=1) + Index Cond: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + Buffers: shared hit=615 + -> Seq Scan on events_2026_07 events_2 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.002..0.002 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + -> Seq Scan on events_2026_08 events_3 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.001..0.001 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + -> Seq Scan on events_2026_09 events_4 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.002..0.002 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + Planning: + Buffers: shared hit=2894 + Planning Time: 6.140 ms + Execution Time: 2.307 ms +(22 rows) + +``` + +## Interpretation + +This is the normal tenant dashboard raw-event access path for this run. Use this plan to verify whether PostgreSQL chooses the tenant/project/timestamp index or another timestamp-oriented partition index, and how many partition children are touched. diff --git a/docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-index-disabled.md b/docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-index-disabled.md new file mode 100644 index 0000000..617f32b --- /dev/null +++ b/docs/query-plans/2026-06-16-final-benchmark-smoke-tenant-dashboard-index-disabled.md @@ -0,0 +1,141 @@ +# Tenant Dashboard Raw Event Query With Index Scans Disabled + +Captured: 2026-06-16T20:04:58Z +Run ID: 2026-06-16-final-benchmark-smoke +Git commit: 112ba0414fa55bb431e814441e85ebd925107347 +Command: RUN_ID=2026-06-16-final-benchmark-smoke ./scripts/capture-query-plans.sh +Target org_id: 00000000-0000-4000-8000-0000000f4241 +Target project_id: 00000000-0000-4000-8000-0000001e8481 + +## PostgreSQL Version + +```text +16.13 +``` + +## Benchmark Run Counts + +```text +events_for_run_id=7449 +target_tenant_events_for_run_id=2601 +``` + +## Table Row Counts + +```text +events=623997 +daily_aggregates=620 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +BEGIN; +SET LOCAL enable_indexscan = off; +SET LOCAL enable_bitmapscan = off; +EXPLAIN (ANALYZE, BUFFERS) +SELECT id, event_id, event_name, user_id, session_id, timestamp, properties +FROM events +WHERE org_id = '00000000-0000-4000-8000-0000000f4241' + AND project_id = '00000000-0000-4000-8000-0000001e8481' + AND timestamp >= NOW() - INTERVAL '7 days' +ORDER BY timestamp DESC +LIMIT 100; +ROLLBACK; +``` + +## EXPLAIN ANALYZE + +```text +BEGIN +SET +SET + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit (cost=36581.10..36581.12 rows=7 width=993) (actual time=29.345..30.698 rows=100 loops=1) + Buffers: shared hit=3588 read=26142 + -> Sort (cost=36581.10..36581.12 rows=7 width=993) (actual time=29.343..30.693 rows=100 loops=1) + Sort Key: events."timestamp" DESC + Sort Method: top-N heapsort Memory: 119kB + Buffers: shared hit=3588 read=26142 + -> Gather (cost=1000.00..36581.00 rows=7 width=993) (actual time=28.152..30.240 rows=2904 loops=1) + Workers Planned: 2 + Workers Launched: 2 + Buffers: shared hit=3585 read=26142 + -> Parallel Append (cost=0.00..35580.30 rows=7 width=993) (actual time=26.309..27.108 rows=968 loops=3) + Buffers: shared hit=3585 read=26142 + Subplans Removed: 3 + -> Seq Scan on events_2026_07 events_2 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.003..0.003 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + -> Seq Scan on events_2026_08 events_3 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.003..0.003 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + -> Seq Scan on events_2026_09 events_4 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.002..0.002 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + -> Parallel Seq Scan on events_2026_06 events_1 (cost=0.00..35580.29 rows=1 width=281) (actual time=26.305..27.065 rows=968 loops=3) + Filter: ((org_id = '00000000-0000-4000-8000-0000000f4241'::uuid) AND (project_id = '00000000-0000-4000-8000-0000001e8481'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + Rows Removed by Filter: 207031 + Buffers: shared hit=3585 read=26142 + Planning: + Buffers: shared hit=2894 + Planning Time: 6.457 ms + Execution Time: 30.810 ms +(27 rows) + +ROLLBACK +``` + +## Interpretation + +Index and bitmap scans are disabled in this session to show the cost shape when PostgreSQL cannot use the tenant/time access path. This is not a dropped-index benchmark; it is planner-controlled evidence for comparison. diff --git a/docs/query-plans/2026-06-16-materialized-dashboard.md b/docs/query-plans/2026-06-16-materialized-dashboard.md new file mode 100644 index 0000000..b864e61 --- /dev/null +++ b/docs/query-plans/2026-06-16-materialized-dashboard.md @@ -0,0 +1,94 @@ +# Materialized Dashboard Metrics Query + +Captured: 2026-06-16T18:46:50Z + +## Table Row Counts + +```text +events=130861 +daily_aggregates=14 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT date, event_count, unique_users, unique_sessions, events_by_name +FROM mv_dashboard_metrics +WHERE org_id = '00000000-0000-0000-0000-000000000001' + AND project_id = '00000000-0000-0000-0000-000000000002' + AND date >= CURRENT_DATE - INTERVAL '30 days' +ORDER BY date DESC; +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Index Scan using idx_mv_dashboard_org_project_date on mv_dashboard_metrics (cost=0.15..8.18 rows=1 width=84) (actual time=0.006..0.006 rows=0 loops=1) + Index Cond: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid) AND (date >= (CURRENT_DATE - '30 days'::interval))) + Buffers: shared hit=2 + Planning: + Buffers: shared hit=114 + Planning Time: 0.534 ms + Execution Time: 0.020 ms +(7 rows) + +``` + +## Interpretation + +This plan captures the materialized dashboard read path. It is not Redis cache evidence; Redis cache timing is captured through GraphQL/load-test summaries. diff --git a/docs/query-plans/2026-06-16-partition-pruning-24h.md b/docs/query-plans/2026-06-16-partition-pruning-24h.md new file mode 100644 index 0000000..9aa6f79 --- /dev/null +++ b/docs/query-plans/2026-06-16-partition-pruning-24h.md @@ -0,0 +1,106 @@ +# Partition Pruning Over Last 24 Hours + +Captured: 2026-06-16T18:46:49Z + +## Table Row Counts + +```text +events=130488 +daily_aggregates=14 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT COUNT(*) AS events_in_window +FROM events +WHERE org_id = '00000000-0000-0000-0000-000000000001' + AND project_id = '00000000-0000-0000-0000-000000000002' + AND timestamp >= NOW() - INTERVAL '24 hours' + AND timestamp < NOW(); +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=9122.28..9122.29 rows=1 width=8) (actual time=14.437..15.730 rows=1 loops=1) + Buffers: shared hit=1502 read=4716 + -> Gather (cost=9122.06..9122.27 rows=2 width=8) (actual time=14.380..15.727 rows=3 loops=1) + Workers Planned: 2 + Workers Launched: 2 + Buffers: shared hit=1502 read=4716 + -> Partial Aggregate (cost=8122.06..8122.07 rows=1 width=8) (actual time=12.514..12.514 rows=1 loops=3) + Buffers: shared hit=1502 read=4716 + -> Parallel Append (cost=0.00..7986.13 rows=54372 width=0) (actual time=0.023..11.345 rows=43467 loops=3) + Buffers: shared hit=1502 read=4716 + Subplans Removed: 6 + -> Parallel Seq Scan on events_2026_06 events_1 (cost=0.00..7714.30 rows=54366 width=0) (actual time=0.022..9.592 rows=43467 loops=3) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid) AND ("timestamp" < now()) AND ("timestamp" >= (now() - '24:00:00'::interval))) + Rows Removed by Filter: 55 + Buffers: shared hit=1502 read=4716 + Planning: + Buffers: shared hit=2810 + Planning Time: 7.363 ms + Execution Time: 15.766 ms +(19 rows) + +``` + +## Interpretation + +This query demonstrates the partition set touched for a narrow dashboard window. Compare the child tables in the plan with the 30-day plan. diff --git a/docs/query-plans/2026-06-16-partition-pruning-30d.md b/docs/query-plans/2026-06-16-partition-pruning-30d.md new file mode 100644 index 0000000..3d6c318 --- /dev/null +++ b/docs/query-plans/2026-06-16-partition-pruning-30d.md @@ -0,0 +1,107 @@ +# Partition Pruning Over Last 30 Days + +Captured: 2026-06-16T18:46:50Z + +## Table Row Counts + +```text +events=130608 +daily_aggregates=14 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT COUNT(*) AS events_in_window +FROM events +WHERE org_id = '00000000-0000-0000-0000-000000000001' + AND project_id = '00000000-0000-0000-0000-000000000002' + AND timestamp >= NOW() - INTERVAL '30 days' + AND timestamp < NOW(); +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Finalize Aggregate (cost=9130.45..9130.46 rows=1 width=8) (actual time=15.965..17.586 rows=1 loops=1) + Buffers: shared hit=1604 read=4620 + -> Gather (cost=9130.23..9130.44 rows=2 width=8) (actual time=15.870..17.579 rows=3 loops=1) + Workers Planned: 2 + Workers Launched: 2 + Buffers: shared hit=1604 read=4620 + -> Partial Aggregate (cost=8130.23..8130.24 rows=1 width=8) (actual time=13.582..13.583 rows=1 loops=3) + Buffers: shared hit=1604 read=4620 + -> Parallel Append (cost=0.00..7994.06 rows=54469 width=0) (actual time=0.021..12.409 rows=43567 loops=3) + Buffers: shared hit=1604 read=4620 + Subplans Removed: 5 + -> Seq Scan on events_2026_05 events_1 (cost=0.00..0.00 rows=1 width=0) (actual time=0.003..0.003 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid) AND ("timestamp" < now()) AND ("timestamp" >= (now() - '30 days'::interval))) + -> Parallel Seq Scan on events_2026_06 events_2 (cost=0.00..7721.74 rows=54463 width=0) (actual time=0.019..10.696 rows=43567 loops=3) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid) AND ("timestamp" < now()) AND ("timestamp" >= (now() - '30 days'::interval))) + Buffers: shared hit=1604 read=4620 + Planning: + Buffers: shared hit=2810 + Planning Time: 7.586 ms + Execution Time: 17.690 ms +(20 rows) + +``` + +## Interpretation + +This query should scan a broader partition set than the 24-hour plan. With monthly local partitions, a 30-day window can touch more than one child partition depending on the date. diff --git a/docs/query-plans/2026-06-16-tenant-dashboard-chosen-index.md b/docs/query-plans/2026-06-16-tenant-dashboard-chosen-index.md new file mode 100644 index 0000000..5979118 --- /dev/null +++ b/docs/query-plans/2026-06-16-tenant-dashboard-chosen-index.md @@ -0,0 +1,113 @@ +# Tenant Dashboard Raw Event Query With Chosen Indexes + +Captured: 2026-06-16T18:46:49Z + +## Table Row Counts + +```text +events=130431 +daily_aggregates=14 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT id, event_id, event_name, user_id, session_id, timestamp, properties +FROM events +WHERE org_id = '00000000-0000-0000-0000-000000000001' + AND project_id = '00000000-0000-0000-0000-000000000002' + AND timestamp >= NOW() - INTERVAL '7 days' +ORDER BY timestamp DESC +LIMIT 100; +``` + +## EXPLAIN ANALYZE + +```text + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit (cost=1.20..16.47 rows=100 width=281) (actual time=0.608..1.019 rows=100 loops=1) + Buffers: shared hit=98 read=5 written=5 + -> Append (cost=1.20..19931.14 rows=130508 width=281) (actual time=0.607..1.013 rows=100 loops=1) + Buffers: shared hit=98 read=5 written=5 + Subplans Removed: 3 + -> Index Scan using events_2026_09_timestamp_idx on events_2026_09 events_4 (cost=0.13..8.15 rows=1 width=1112) (actual time=0.005..0.005 rows=0 loops=1) + Index Cond: ("timestamp" >= (now() - '7 days'::interval)) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid)) + Buffers: shared hit=2 + -> Index Scan using events_2026_08_timestamp_idx on events_2026_08 events_3 (cost=0.13..8.15 rows=1 width=1112) (actual time=0.003..0.004 rows=0 loops=1) + Index Cond: ("timestamp" >= (now() - '7 days'::interval)) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid)) + Buffers: shared hit=2 + -> Index Scan using events_2026_07_timestamp_idx on events_2026_07 events_2 (cost=0.13..8.15 rows=1 width=1112) (actual time=0.003..0.003 rows=0 loops=1) + Index Cond: ("timestamp" >= (now() - '7 days'::interval)) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid)) + Buffers: shared hit=2 + -> Index Scan using events_2026_06_timestamp_idx on events_2026_06 events_1 (cost=0.42..19229.69 rows=130502 width=281) (actual time=0.595..0.996 rows=100 loops=1) + Index Cond: ("timestamp" >= (now() - '7 days'::interval)) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid)) + Buffers: shared hit=92 read=5 written=5 + Planning: + Buffers: shared hit=2894 + Planning Time: 6.733 ms + Execution Time: 1.077 ms +(25 rows) + +``` + +## Interpretation + +This normal raw-event dashboard path used timestamp indexes on the relevant event partitions and finished in 1.077 ms on this dataset. PostgreSQL did not choose the tenant/project/timestamp index here, likely because the local benchmark data is dominated by one authenticated org/project, so tenant filters are not selective enough to drive that index choice. Do not generalize index-order claims from this dataset without a true multi-tenant skew run. diff --git a/docs/query-plans/2026-06-16-tenant-dashboard-index-disabled.md b/docs/query-plans/2026-06-16-tenant-dashboard-index-disabled.md new file mode 100644 index 0000000..25c3d12 --- /dev/null +++ b/docs/query-plans/2026-06-16-tenant-dashboard-index-disabled.md @@ -0,0 +1,124 @@ +# Tenant Dashboard Raw Event Query With Index Scans Disabled + +Captured: 2026-06-16T18:46:49Z + +## Table Row Counts + +```text +events=130300 +daily_aggregates=14 +hourly_aggregates=0 +mv_dashboard_metrics=0 +``` + +## Relevant Indexes + +```text +daily_aggregates: daily_aggregates_pkey => CREATE UNIQUE INDEX daily_aggregates_pkey ON public.daily_aggregates USING btree (id) +daily_aggregates: idx_aggregates_lookup => CREATE INDEX idx_aggregates_lookup ON public.daily_aggregates USING btree (org_id, project_id, date) +daily_aggregates: idx_aggregates_metric => CREATE INDEX idx_aggregates_metric ON public.daily_aggregates USING btree (metric_name, date) +daily_aggregates: idx_aggregates_tenant_metric_date => CREATE INDEX idx_aggregates_tenant_metric_date ON public.daily_aggregates USING btree (tenant_id, metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_date => CREATE INDEX idx_daily_aggregates_date ON public.daily_aggregates USING btree (date DESC) +daily_aggregates: idx_daily_aggregates_metric_name => CREATE INDEX idx_daily_aggregates_metric_name ON public.daily_aggregates USING btree (metric_name, date DESC) +daily_aggregates: idx_daily_aggregates_org_project_date => CREATE INDEX idx_daily_aggregates_org_project_date ON public.daily_aggregates USING btree (org_id, project_id, date DESC) +daily_aggregates: unique_aggregate => CREATE UNIQUE INDEX unique_aggregate ON public.daily_aggregates USING btree (org_id, project_id, metric_name, date, dimensions) +events: events_event_id_timestamp_key => CREATE UNIQUE INDEX events_event_id_timestamp_key ON ONLY public.events USING btree (event_id, "timestamp") +events: events_org_project_event_id_timestamp_key => CREATE UNIQUE INDEX events_org_project_event_id_timestamp_key ON ONLY public.events USING btree (org_id, project_id, event_id, "timestamp") +events: events_pkey => CREATE UNIQUE INDEX events_pkey ON ONLY public.events USING btree (id, "timestamp") +events: idx_events_event_id => CREATE INDEX idx_events_event_id ON ONLY public.events USING btree (org_id, project_id, event_id) +events: idx_events_event_id_time => CREATE INDEX idx_events_event_id_time ON ONLY public.events USING btree (event_id, "timestamp") +events: idx_events_event_name_time => CREATE INDEX idx_events_event_name_time ON ONLY public.events USING btree (event_name, "timestamp" DESC) +events: idx_events_org_project => CREATE INDEX idx_events_org_project ON ONLY public.events USING btree (org_id, project_id) +events: idx_events_org_project_time => CREATE INDEX idx_events_org_project_time ON ONLY public.events USING btree (org_id, project_id, "timestamp" DESC) +events: idx_events_org_time => CREATE INDEX idx_events_org_time ON ONLY public.events USING btree (org_id, "timestamp" DESC) +events: idx_events_project_time => CREATE INDEX idx_events_project_time ON ONLY public.events USING btree (project_id, "timestamp" DESC) +events: idx_events_properties_gin => CREATE INDEX idx_events_properties_gin ON ONLY public.events USING gin (properties) +events: idx_events_session => CREATE INDEX idx_events_session ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_session_id => CREATE INDEX idx_events_session_id ON ONLY public.events USING btree (session_id) WHERE (session_id IS NOT NULL) +events: idx_events_tenant_metric_time => CREATE INDEX idx_events_tenant_metric_time ON ONLY public.events USING btree (tenant_id, event_name, "timestamp" DESC) +events: idx_events_tenant_time => CREATE INDEX idx_events_tenant_time ON ONLY public.events USING btree (tenant_id, "timestamp" DESC) +events: idx_events_timestamp => CREATE INDEX idx_events_timestamp ON ONLY public.events USING btree ("timestamp" DESC) +events: idx_events_user_id_time => CREATE INDEX idx_events_user_id_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +events: idx_events_user_time => CREATE INDEX idx_events_user_time ON ONLY public.events USING btree (org_id, project_id, user_id, "timestamp" DESC) WHERE (user_id IS NOT NULL) +hourly_aggregates: hourly_aggregates_pkey => CREATE UNIQUE INDEX hourly_aggregates_pkey ON public.hourly_aggregates USING btree (id) +hourly_aggregates: idx_hourly_aggregates_lookup => CREATE INDEX idx_hourly_aggregates_lookup ON public.hourly_aggregates USING btree (org_id, project_id, hour DESC) +hourly_aggregates: idx_hourly_aggregates_metric => CREATE INDEX idx_hourly_aggregates_metric ON public.hourly_aggregates USING btree (metric_name, hour DESC) +hourly_aggregates: idx_hourly_aggregates_tenant_metric_hour => CREATE INDEX idx_hourly_aggregates_tenant_metric_hour ON public.hourly_aggregates USING btree (tenant_id, metric_name, hour DESC) +hourly_aggregates: unique_hourly_aggregate => CREATE UNIQUE INDEX unique_hourly_aggregate ON public.hourly_aggregates USING btree (org_id, project_id, metric_name, hour, dimensions) +mv_dashboard_metrics: idx_mv_dashboard_org_project_date => CREATE INDEX idx_mv_dashboard_org_project_date ON public.mv_dashboard_metrics USING btree (org_id, project_id, date DESC) +mv_dashboard_metrics: idx_mv_dashboard_unique => CREATE UNIQUE INDEX idx_mv_dashboard_unique ON public.mv_dashboard_metrics USING btree (org_id, project_id, date) +``` + +## Event Partitions + +```text +events_2026_03 +events_2026_04 +events_2026_05 +events_2026_06 +events_2026_07 +events_2026_08 +events_2026_09 +``` + +## Query + +```sql +BEGIN; +SET LOCAL enable_indexscan = off; +SET LOCAL enable_bitmapscan = off; +EXPLAIN (ANALYZE, BUFFERS) +SELECT id, event_id, event_name, user_id, session_id, timestamp, properties +FROM events +WHERE org_id = '00000000-0000-0000-0000-000000000001' + AND project_id = '00000000-0000-0000-0000-000000000002' + AND timestamp >= NOW() - INTERVAL '7 days' +ORDER BY timestamp DESC +LIMIT 100; +ROLLBACK; +``` + +## EXPLAIN ANALYZE + +```text +BEGIN +SET +SET + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit (cost=10779.92..10791.59 rows=100 width=281) (actual time=58.400..59.811 rows=100 loops=1) + Buffers: shared hit=1467 read=4816 written=4 + -> Gather Merge (cost=10779.92..23459.67 rows=108676 width=281) (actual time=58.399..59.805 rows=100 loops=1) + Workers Planned: 2 + Workers Launched: 2 + Buffers: shared hit=1467 read=4816 written=4 + -> Sort (cost=9779.90..9915.74 rows=54338 width=281) (actual time=56.105..56.114 rows=76 loops=3) + Sort Key: events."timestamp" DESC + Sort Method: top-N heapsort Memory: 75kB + Buffers: shared hit=1467 read=4816 written=4 + Worker 0: Sort Method: top-N heapsort Memory: 92kB + Worker 1: Sort Method: top-N heapsort Memory: 92kB + -> Parallel Append (cost=0.00..7703.14 rows=54338 width=281) (actual time=0.046..51.028 rows=43458 loops=3) + Buffers: shared hit=1393 read=4816 written=4 + Subplans Removed: 3 + -> Seq Scan on events_2026_07 events_2 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.005..0.005 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + -> Seq Scan on events_2026_08 events_3 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.002..0.002 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + -> Seq Scan on events_2026_09 events_4 (cost=0.00..0.00 rows=1 width=1112) (actual time=0.002..0.002 rows=0 loops=1) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + -> Parallel Seq Scan on events_2026_06 events_1 (cost=0.00..7431.47 rows=54332 width=281) (actual time=0.042..49.157 rows=43458 loops=3) + Filter: ((org_id = '00000000-0000-0000-0000-000000000001'::uuid) AND (project_id = '00000000-0000-0000-0000-000000000002'::uuid) AND ("timestamp" >= (now() - '7 days'::interval))) + Buffers: shared hit=1393 read=4816 written=4 + Planning: + Buffers: shared hit=2892 read=6 + Planning Time: 6.316 ms + Execution Time: 59.879 ms +(28 rows) + +ROLLBACK +``` + +## Interpretation + +Index and bitmap scans are disabled in this session to show the cost shape when PostgreSQL cannot use the tenant/time access path. This is not a dropped-index benchmark; it is planner-controlled evidence for comparison. diff --git a/docs/query-plans/README.md b/docs/query-plans/README.md new file mode 100644 index 0000000..f375678 --- /dev/null +++ b/docs/query-plans/README.md @@ -0,0 +1,44 @@ +# Query Plan Evidence + +Store PostgreSQL query plans here when preparing a benchmark report. + +Recommended filename format: + +```text +YYYY-MM-DD-query-name-dataset-size.md +``` + +Capture plans with: + +```sql +EXPLAIN (ANALYZE, BUFFERS) +SELECT ... +``` + +Or capture the standard PulseOps evidence set from the live Docker database: + +```bash +./scripts/capture-query-plans.sh +``` + +For a benchmark run, pass the run ID and the tenant/project being analyzed: + +```bash +RUN_ID=2026-06-16-final-benchmark-smoke \ +ORG_ID=00000000-0000-4000-8000-0000000f4241 \ +PROJECT_ID=00000000-0000-4000-8000-0000001e8481 \ +CACHE_EVIDENCE=docs/benchmarks/evidence/dashboard-cache-2026-06-16-final-benchmark-smoke.json \ +pnpm query-plans:capture +``` + +Each plan file should include: + +- Git commit SHA. +- Dataset size and date range. +- PostgreSQL version. +- Relevant indexes and partitions. +- The exact SQL query. +- Full `EXPLAIN (ANALYZE, BUFFERS)` output. +- Short observation about whether partition pruning and expected indexes were used. + +Redis cache hits bypass PostgreSQL, so cached GraphQL paths should be documented as a cache evidence note that links to the dashboard cache JSON measurement, not as a PostgreSQL EXPLAIN plan. diff --git a/docs/rfcs/0001-auth-aggregation-load-testing.md b/docs/rfcs/0001-auth-aggregation-load-testing.md new file mode 100644 index 0000000..b35cd2c --- /dev/null +++ b/docs/rfcs/0001-auth-aggregation-load-testing.md @@ -0,0 +1,30 @@ +# RFC 0001: Authenticated Ingest, Unique DAU, and Load Testing + +## What + +Make the core PulseOps ingest path credible end to end: + +- Restore API-key authentication instead of accepting every request as the demo tenant. +- Bind accepted events to the authenticated organization and project. +- Count daily active users as unique users per day, not total events with a user_id. +- Add a real k6 load test for single-event and batch ingest. +- Update tests and documentation so the repo evidence matches the implementation. + +## Why + +PulseOps is meant to demonstrate event ingestion, tenant-aware metrics, aggregation, and dashboard load. The current repo has the right shape, but important pieces are incomplete: auth is bypassed, demo API-key hashes are placeholders, DAU can overcount repeat events from the same user, and `pnpm test:load` points at a missing script. + +## Alternatives Rejected + +- Keep demo-only auth bypass: rejected because it makes multi-tenant behavior impossible to verify. +- Count DAU from raw events only at query time: rejected because it avoids the aggregation path PulseOps is meant to demonstrate. +- Add a full key-prefix lookup migration: rejected for now because the current schema only stores bcrypt hashes, and a safe active-key scan is enough for a project-scale demo. + +## Test Cases + +- Missing API key returns 401. +- Invalid API key returns 403. +- Valid API key sets org/project context and caches by a non-secret fingerprint. +- Project-scoped keys reject events for a different project. +- Repeated events from the same user on the same day increment event counts but not DAU. +- `pnpm test:load` has a concrete k6 script to exercise concurrent users and batch ingest. diff --git a/migrations/006_performance_optimizations.sql b/migrations/006_performance_optimizations.sql index ec02ac5..3076fbc 100644 --- a/migrations/006_performance_optimizations.sql +++ b/migrations/006_performance_optimizations.sql @@ -1,100 +1,355 @@ --- Database Optimization Migration --- This migration adds indexes and optimizations for production performance - --- Events Table Indexes --- Index for filtering by organization and project -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_events_org_project -ON events(org_id, project_id); - --- Index for time-based queries -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_events_created_at -ON events(created_at DESC); - --- Composite index for common query pattern (org, project, time) -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_events_org_project_time -ON events(org_id, project_id, created_at DESC); - --- Index for event name filtering -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_events_event_name -ON events(event_name); - --- Index for user-based queries -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_events_user_id -ON events(user_id) WHERE user_id IS NOT NULL; - --- Index for session-based queries -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_events_session_id -ON events(session_id) WHERE session_id IS NOT NULL; - --- Daily Aggregates Indexes --- Index for time-series queries -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_daily_aggregates_date -ON daily_aggregates(date DESC); - --- Composite index for org/project filtering -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_daily_aggregates_org_project_date -ON daily_aggregates(org_id, project_id, date DESC); - --- Index for metric type queries -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_daily_aggregates_metric_type -ON daily_aggregates(metric_type); - --- API Keys Indexes --- Index for fast API key lookups -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_api_keys_key_hash -ON api_keys(key_hash); - --- Index for organization lookups -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_api_keys_org_id -ON api_keys(org_id); - --- Table Partitioning for Events (by month) --- This improves query performance for time-based queries -CREATE TABLE IF NOT EXISTS events_template ( - LIKE events INCLUDING ALL -) PARTITION BY RANGE (created_at); - --- Create partitions for current and future months +-- Database optimization and evidence foundation migration. +-- Keep event-time queries on events.timestamp. Do not partition or index raw +-- event dashboards by created_at. + +CREATE EXTENSION IF NOT EXISTS pgcrypto; + +ALTER TABLE projects + ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ DEFAULT NOW(); + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conrelid = 'projects'::regclass + AND conname = 'projects_id_org_id_key' + ) THEN + ALTER TABLE projects ADD CONSTRAINT projects_id_org_id_key UNIQUE (id, org_id); + END IF; +END $$; + +ALTER TABLE api_keys + ADD COLUMN IF NOT EXISTS project_id UUID, + ADD COLUMN IF NOT EXISTS key_fingerprint CHAR(64), + ADD COLUMN IF NOT EXISTS name VARCHAR(255); + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conrelid = 'api_keys'::regclass + AND conname = 'api_keys_project_org_fk' + ) THEN + ALTER TABLE api_keys + ADD CONSTRAINT api_keys_project_org_fk + FOREIGN KEY (project_id, org_id) + REFERENCES projects(id, org_id) + ON DELETE CASCADE + NOT VALID; + END IF; + + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conrelid = 'api_keys'::regclass + AND conname = 'api_keys_fingerprint_hex' + ) THEN + ALTER TABLE api_keys + ADD CONSTRAINT api_keys_fingerprint_hex + CHECK (key_fingerprint IS NULL OR key_fingerprint ~ '^[0-9a-f]{64}$') + NOT VALID; + END IF; +END $$; + +CREATE INDEX IF NOT EXISTS idx_api_keys_project ON api_keys(project_id) WHERE project_id IS NOT NULL; + +ALTER TABLE events + ADD COLUMN IF NOT EXISTS event_id UUID, + ADD COLUMN IF NOT EXISTS properties JSONB DEFAULT '{}', + ADD COLUMN IF NOT EXISTS created_at TIMESTAMPTZ DEFAULT NOW(); + +UPDATE events +SET event_id = gen_random_uuid() +WHERE event_id IS NULL; + +ALTER TABLE events + ALTER COLUMN event_id SET DEFAULT gen_random_uuid(), + ALTER COLUMN event_id SET NOT NULL; + DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conrelid = 'events'::regclass + AND conname = 'events_project_org_fk' + ) THEN + ALTER TABLE events + ADD CONSTRAINT events_project_org_fk + FOREIGN KEY (project_id, org_id) + REFERENCES projects(id, org_id) + ON DELETE CASCADE + NOT VALID; + END IF; + + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conrelid = 'events'::regclass + AND conname = 'events_org_project_event_id_timestamp_key' + ) THEN + ALTER TABLE events + ADD CONSTRAINT events_org_project_event_id_timestamp_key + UNIQUE (org_id, project_id, event_id, timestamp); + END IF; +END $$; + +CREATE OR REPLACE FUNCTION create_events_monthly_partitions(window_start DATE, window_end DATE) +RETURNS void AS $$ DECLARE - start_date DATE; - end_date DATE; + partition_start DATE := DATE_TRUNC('month', window_start)::DATE; + partition_end DATE; partition_name TEXT; BEGIN - -- Create partitions for next 12 months - FOR i IN 0..11 LOOP - start_date := DATE_TRUNC('month', CURRENT_DATE) + (i || ' months')::INTERVAL; - end_date := start_date + '1 month'::INTERVAL; - partition_name := 'events_y' || TO_CHAR(start_date, 'YYYY') || 'm' || TO_CHAR(start_date, 'MM'); - + IF window_end <= window_start THEN + RAISE EXCEPTION 'window_end must be after window_start'; + END IF; + + WHILE partition_start < window_end LOOP + partition_end := (partition_start + INTERVAL '1 month')::DATE; + partition_name := 'events_' || TO_CHAR(partition_start, 'YYYY_MM'); + EXECUTE format( - 'CREATE TABLE IF NOT EXISTS %I PARTITION OF events_template FOR VALUES FROM (%L) TO (%L)', + 'CREATE TABLE IF NOT EXISTS %I PARTITION OF events FOR VALUES FROM (%L) TO (%L)', partition_name, - start_date, - end_date + partition_start::TIMESTAMPTZ, + partition_end::TIMESTAMPTZ ); + + partition_start := partition_end; END LOOP; +END; +$$ LANGUAGE plpgsql; + +-- Supports 7/30/90 day benchmark windows on event timestamp. +SELECT create_events_monthly_partitions( + (CURRENT_DATE - INTERVAL '90 days')::DATE, + (CURRENT_DATE + INTERVAL '90 days')::DATE +); + +CREATE INDEX IF NOT EXISTS idx_events_org_project ON events(org_id, project_id); +CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_events_org_project_time ON events(org_id, project_id, timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_events_event_name_time ON events(event_name, timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_events_event_id ON events(org_id, project_id, event_id); + +ALTER TABLE daily_aggregates + ADD COLUMN IF NOT EXISTS computed_at TIMESTAMPTZ DEFAULT NOW(); + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conrelid = 'daily_aggregates'::regclass + AND conname = 'daily_aggregates_project_org_fk' + ) THEN + ALTER TABLE daily_aggregates + ADD CONSTRAINT daily_aggregates_project_org_fk + FOREIGN KEY (project_id, org_id) + REFERENCES projects(id, org_id) + ON DELETE CASCADE + NOT VALID; + END IF; END $$; --- Materialized View for Fast Dashboard Queries -CREATE MATERIALIZED VIEW IF NOT EXISTS mv_dashboard_metrics AS -SELECT - org_id, - project_id, - DATE_TRUNC('day', created_at) AS date, - COUNT(*) AS event_count, - COUNT(DISTINCT user_id) AS unique_users, - COUNT(DISTINCT session_id) AS unique_sessions, - jsonb_object_agg(event_name, event_count) AS events_by_name -FROM events -WHERE created_at >= CURRENT_DATE - INTERVAL '90 days' -GROUP BY org_id, project_id, DATE_TRUNC('day', created_at); - --- Index on materialized view -CREATE INDEX IF NOT EXISTS idx_mv_dashboard_org_project_date -ON mv_dashboard_metrics(org_id, project_id, date DESC); - --- Function to refresh materialized view +CREATE INDEX IF NOT EXISTS idx_daily_aggregates_date ON daily_aggregates(date DESC); +CREATE INDEX IF NOT EXISTS idx_daily_aggregates_org_project_date + ON daily_aggregates(org_id, project_id, date DESC); +CREATE INDEX IF NOT EXISTS idx_daily_aggregates_metric_name + ON daily_aggregates(metric_name, date DESC); + +CREATE TABLE IF NOT EXISTS daily_active_users ( + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + project_id UUID NOT NULL, + date DATE NOT NULL, + user_id VARCHAR(255) NOT NULL, + first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + event_count INTEGER NOT NULL DEFAULT 1, + PRIMARY KEY (org_id, project_id, date, user_id), + CONSTRAINT daily_active_users_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_daily_active_users_project_date + ON daily_active_users(org_id, project_id, date); + +CREATE TABLE IF NOT EXISTS hourly_aggregates ( + id BIGSERIAL PRIMARY KEY, + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + project_id UUID NOT NULL, + metric_name VARCHAR(100) NOT NULL, + metric_value NUMERIC NOT NULL DEFAULT 0, + dimensions JSONB DEFAULT '{}', + hour TIMESTAMPTZ NOT NULL, + computed_at TIMESTAMPTZ DEFAULT NOW(), + CONSTRAINT hourly_aggregates_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE, + CONSTRAINT unique_hourly_aggregate UNIQUE(org_id, project_id, metric_name, hour, dimensions) +); + +CREATE INDEX IF NOT EXISTS idx_hourly_aggregates_lookup + ON hourly_aggregates(org_id, project_id, hour DESC); +CREATE INDEX IF NOT EXISTS idx_hourly_aggregates_metric + ON hourly_aggregates(metric_name, hour DESC); + +CREATE TABLE IF NOT EXISTS ingest_batches ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + project_id UUID, + batch_id VARCHAR(255), + status VARCHAR(50) NOT NULL DEFAULT 'accepted', + accepted_count INTEGER NOT NULL DEFAULT 0, + rejected_count INTEGER NOT NULL DEFAULT 0, + started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + completed_at TIMESTAMPTZ, + metadata JSONB DEFAULT '{}', + CONSTRAINT ingest_batches_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_ingest_batches_tenant_time + ON ingest_batches(org_id, project_id, started_at DESC); + +CREATE TABLE IF NOT EXISTS processing_state ( + name VARCHAR(255) PRIMARY KEY, + state JSONB NOT NULL DEFAULT '{}', + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE TABLE IF NOT EXISTS worker_offsets ( + consumer_group VARCHAR(255) NOT NULL, + topic VARCHAR(255) NOT NULL, + partition_id INTEGER NOT NULL, + offset_value BIGINT NOT NULL, + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + PRIMARY KEY (consumer_group, topic, partition_id) +); + +CREATE TABLE IF NOT EXISTS benchmark_runs ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + org_id UUID REFERENCES organizations(id) ON DELETE SET NULL, + project_id UUID, + name VARCHAR(255) NOT NULL, + window_days INTEGER NOT NULL CHECK (window_days IN (7, 30, 90)), + started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + completed_at TIMESTAMPTZ, + parameters JSONB DEFAULT '{}', + results JSONB DEFAULT '{}', + notes TEXT, + CONSTRAINT benchmark_runs_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE SET NULL +); + +CREATE TABLE IF NOT EXISTS articles ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + project_id UUID NOT NULL, + external_id VARCHAR(255), + url TEXT, + title TEXT NOT NULL, + source VARCHAR(255), + author VARCHAR(255), + published_at TIMESTAMPTZ, + content_hash CHAR(64), + metadata JSONB DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE (id, org_id, project_id), + UNIQUE (org_id, project_id, external_id), + CONSTRAINT articles_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE, + CONSTRAINT articles_content_hash_hex + CHECK (content_hash IS NULL OR content_hash ~ '^[0-9a-f]{64}$') +); + +CREATE INDEX IF NOT EXISTS idx_articles_tenant_published + ON articles(org_id, project_id, published_at DESC); +CREATE INDEX IF NOT EXISTS idx_articles_content_hash + ON articles(content_hash) WHERE content_hash IS NOT NULL; + +CREATE TABLE IF NOT EXISTS article_evidence ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + project_id UUID NOT NULL, + article_id UUID NOT NULL, + event_id UUID, + evidence_type VARCHAR(100) NOT NULL, + metric_name VARCHAR(100), + metric_value NUMERIC, + dimensions JSONB DEFAULT '{}', + observed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + source_event_timestamp TIMESTAMPTZ, + properties JSONB DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT article_evidence_article_tenant_fk + FOREIGN KEY (article_id, org_id, project_id) REFERENCES articles(id, org_id, project_id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_article_evidence_tenant_observed + ON article_evidence(org_id, project_id, observed_at DESC); +CREATE INDEX IF NOT EXISTS idx_article_evidence_article + ON article_evidence(article_id, observed_at DESC); +CREATE INDEX IF NOT EXISTS idx_article_evidence_event + ON article_evidence(org_id, project_id, event_id) WHERE event_id IS NOT NULL; + +DROP MATERIALIZED VIEW IF EXISTS mv_dashboard_metrics; + +CREATE MATERIALIZED VIEW mv_dashboard_metrics AS +WITH event_counts AS ( + SELECT + org_id, + project_id, + DATE_TRUNC('day', timestamp)::DATE AS date, + event_name, + COUNT(*) AS event_count + FROM events + WHERE timestamp >= CURRENT_DATE - INTERVAL '90 days' + GROUP BY org_id, project_id, DATE_TRUNC('day', timestamp)::DATE, event_name +), +daily_totals AS ( + SELECT + org_id, + project_id, + date, + SUM(event_count) AS event_count, + jsonb_object_agg(event_name, event_count ORDER BY event_name) AS events_by_name + FROM event_counts + GROUP BY org_id, project_id, date +), +daily_users AS ( + SELECT + org_id, + project_id, + DATE_TRUNC('day', timestamp)::DATE AS date, + COUNT(DISTINCT user_id) FILTER (WHERE user_id IS NOT NULL) AS unique_users, + COUNT(DISTINCT session_id) FILTER (WHERE session_id IS NOT NULL) AS unique_sessions + FROM events + WHERE timestamp >= CURRENT_DATE - INTERVAL '90 days' + GROUP BY org_id, project_id, DATE_TRUNC('day', timestamp)::DATE +) +SELECT + daily_totals.org_id, + daily_totals.project_id, + daily_totals.date, + daily_totals.event_count, + COALESCE(daily_users.unique_users, 0) AS unique_users, + COALESCE(daily_users.unique_sessions, 0) AS unique_sessions, + daily_totals.events_by_name +FROM daily_totals +LEFT JOIN daily_users + ON daily_users.org_id = daily_totals.org_id + AND daily_users.project_id = daily_totals.project_id + AND daily_users.date = daily_totals.date; + +CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_dashboard_unique + ON mv_dashboard_metrics(org_id, project_id, date); +CREATE INDEX IF NOT EXISTS idx_mv_dashboard_org_project_date + ON mv_dashboard_metrics(org_id, project_id, date DESC); + CREATE OR REPLACE FUNCTION refresh_dashboard_metrics() RETURNS void AS $$ BEGIN @@ -102,21 +357,16 @@ BEGIN END; $$ LANGUAGE plpgsql; --- Schedule automatic refresh (requires pg_cron extension) --- SELECT cron.schedule('refresh-dashboard', '*/5 * * * *', 'SELECT refresh_dashboard_metrics()'); - --- Query Performance Optimization --- Analyze tables to update statistics ANALYZE events; ANALYZE daily_aggregates; ANALYZE api_keys; --- Vacuum to reclaim space -VACUUM ANALYZE events; -VACUUM ANALYZE daily_aggregates; - --- Add query timeout to prevent long-running queries -ALTER DATABASE pulseops_dev SET statement_timeout = '30s'; - --- Connection pooling settings (adjust based on workload) -COMMENT ON DATABASE pulseops_dev IS 'Recommended settings: max_connections=200, shared_buffers=2GB, effective_cache_size=6GB'; +DO $$ +BEGIN + EXECUTE format('ALTER DATABASE %I SET statement_timeout = %L', current_database(), '30s'); + EXECUTE format( + 'COMMENT ON DATABASE %I IS %L', + current_database(), + 'Recommended settings: max_connections=200, shared_buffers=2GB, effective_cache_size=6GB' + ); +END $$; diff --git a/migrations/007_article_evidence_schema.sql b/migrations/007_article_evidence_schema.sql new file mode 100644 index 0000000..6fed5d5 --- /dev/null +++ b/migrations/007_article_evidence_schema.sql @@ -0,0 +1,170 @@ +CREATE EXTENSION IF NOT EXISTS pgcrypto; + +ALTER TABLE projects + ADD COLUMN IF NOT EXISTS tenant_id UUID GENERATED ALWAYS AS (org_id) STORED; + +ALTER TABLE api_keys + ADD COLUMN IF NOT EXISTS tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + ADD COLUMN IF NOT EXISTS project_id UUID REFERENCES projects(id) ON DELETE CASCADE, + ADD COLUMN IF NOT EXISTS key_fingerprint CHAR(64); + +UPDATE api_keys +SET project_id = '00000000-0000-0000-0000-000000000002' +WHERE project_id IS NULL + AND org_id = '00000000-0000-0000-0000-000000000001'; + +UPDATE api_keys +SET + key_fingerprint = '3543274f3d56b4b5860dd25cfb944d60e953fbe9c67f3de2d1deaf2befae1b7f', + key_hash = '$2b$10$NQbr7kAR4n9lQ1/JwS0jn.sEhYgR4Pi5VejnFudNV/LNpk9DIHSOy', + name = COALESCE(name, 'Demo API Key') +WHERE org_id = '00000000-0000-0000-0000-000000000001'; + +UPDATE api_keys +SET key_fingerprint = encode(digest(key_hash, 'sha256'), 'hex') +WHERE key_fingerprint IS NULL; + +ALTER TABLE api_keys + ALTER COLUMN key_fingerprint SET NOT NULL; + +CREATE INDEX IF NOT EXISTS idx_api_keys_tenant_project + ON api_keys(tenant_id, project_id); + +ALTER TABLE events + ADD COLUMN IF NOT EXISTS event_id UUID, + ADD COLUMN IF NOT EXISTS tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + ADD COLUMN IF NOT EXISTS received_at TIMESTAMPTZ DEFAULT NOW(); + +UPDATE events +SET event_id = gen_random_uuid() +WHERE event_id IS NULL; + +ALTER TABLE events + ALTER COLUMN event_id SET NOT NULL; + +CREATE INDEX IF NOT EXISTS idx_events_tenant_time ON events(tenant_id, timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_events_org_project_time ON events(org_id, project_id, timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_events_tenant_metric_time ON events(tenant_id, event_name, timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_events_event_id_time ON events(event_id, timestamp); +CREATE INDEX IF NOT EXISTS idx_events_properties_gin ON events USING GIN (properties); + +CREATE TABLE IF NOT EXISTS event_dedup_keys ( + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + event_id UUID NOT NULL, + first_seen_at TIMESTAMPTZ DEFAULT NOW(), + PRIMARY KEY(org_id, project_id, event_id) +); + +CREATE INDEX IF NOT EXISTS idx_event_dedup_tenant_seen + ON event_dedup_keys(tenant_id, first_seen_at DESC); + +CREATE TABLE IF NOT EXISTS hourly_aggregates ( + id BIGSERIAL PRIMARY KEY, + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID NOT NULL, + metric_name VARCHAR(100) NOT NULL, + metric_value NUMERIC NOT NULL DEFAULT 0, + hour TIMESTAMPTZ NOT NULL, + dimensions JSONB DEFAULT '{}', + computed_at TIMESTAMPTZ DEFAULT NOW(), + CONSTRAINT hourly_aggregates_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE, + CONSTRAINT unique_hourly_aggregate UNIQUE(org_id, project_id, metric_name, hour, dimensions) +); + +CREATE INDEX IF NOT EXISTS idx_hourly_aggregates_tenant_metric_hour + ON hourly_aggregates(tenant_id, metric_name, hour DESC); +CREATE INDEX IF NOT EXISTS idx_hourly_aggregates_lookup + ON hourly_aggregates(org_id, project_id, hour DESC); + +DROP VIEW IF EXISTS hourly_metrics; +CREATE VIEW hourly_metrics AS +SELECT + id, + org_id, + tenant_id, + project_id, + metric_name, + metric_value, + hour AS bucket_start, + dimensions, + computed_at +FROM hourly_aggregates; + +ALTER TABLE daily_aggregates + ADD COLUMN IF NOT EXISTS tenant_id UUID GENERATED ALWAYS AS (org_id) STORED; + +CREATE INDEX IF NOT EXISTS idx_aggregates_tenant_metric_date + ON daily_aggregates(tenant_id, metric_name, date DESC); + +CREATE OR REPLACE VIEW daily_metrics AS +SELECT + id, + org_id, + tenant_id, + project_id, + metric_name, + metric_value, + date::timestamptz AS bucket_start, + dimensions, + computed_at +FROM daily_aggregates; + +CREATE TABLE IF NOT EXISTS daily_active_users ( + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + date DATE NOT NULL, + user_id VARCHAR(255) NOT NULL, + first_seen_at TIMESTAMPTZ DEFAULT NOW(), + PRIMARY KEY(org_id, project_id, date, user_id) +); + +CREATE INDEX IF NOT EXISTS idx_daily_active_users_tenant_date + ON daily_active_users(tenant_id, project_id, date); + +CREATE TABLE IF NOT EXISTS ingest_batches ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID REFERENCES projects(id) ON DELETE CASCADE, + event_count INT NOT NULL DEFAULT 0, + accepted_count INT NOT NULL DEFAULT 0, + rejected_count INT NOT NULL DEFAULT 0, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE TABLE IF NOT EXISTS processing_state ( + consumer_group VARCHAR(255) NOT NULL, + topic VARCHAR(255) NOT NULL, + partition_id INT NOT NULL, + last_committed_offset BIGINT NOT NULL DEFAULT 0, + updated_at TIMESTAMPTZ DEFAULT NOW(), + PRIMARY KEY(consumer_group, topic, partition_id) +); + +CREATE TABLE IF NOT EXISTS benchmark_runs ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + name VARCHAR(255) NOT NULL, + commit_sha VARCHAR(64), + dataset JSONB DEFAULT '{}', + results JSONB DEFAULT '{}', + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE TABLE IF NOT EXISTS tenant_usage_snapshots ( + id BIGSERIAL PRIMARY KEY, + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID REFERENCES projects(id) ON DELETE CASCADE, + captured_at TIMESTAMPTZ DEFAULT NOW(), + event_count BIGINT NOT NULL DEFAULT 0, + raw_bytes BIGINT NOT NULL DEFAULT 0, + dashboard_query_count BIGINT NOT NULL DEFAULT 0 +); + +CREATE INDEX IF NOT EXISTS idx_tenant_usage_tenant_time + ON tenant_usage_snapshots(tenant_id, captured_at DESC); diff --git a/package.json b/package.json index 99eddfa..6d3af1f 100644 --- a/package.json +++ b/package.json @@ -15,38 +15,62 @@ "dev:worker": "pnpm --filter @pulseops/worker dev", "dev:web": "pnpm --filter @pulseops/web dev", "build": "pnpm -r build", - "test": "pnpm -r test", - "test:unit": "pnpm -r test:unit", - "test:integration": "pnpm -r test:integration", + "test": "pnpm test:unit", + "test:unit": "pnpm -r --if-present test:unit", + "test:integration": "vitest run --dir tests/integration && pnpm -r --if-present test:integration", "test:e2e": "playwright test", - "test:coverage": "pnpm -r test:coverage", - "test:load": "k6 run scripts/load-test.js", - "lint": "pnpm -r lint", + "test:coverage": "pnpm -r --if-present test:coverage", + "test:load": "node scripts/run-k6.js tests/load/ingest-throughput.js", + "test:load:hot-tenant": "node scripts/run-k6.js tests/load/hot-tenant.js", + "test:load:dashboard": "node scripts/run-k6.js tests/load/dashboard-query.js", + "test:load:backpressure": "node scripts/run-k6.js tests/load/backpressure.js", + "lint": "pnpm -r --if-present lint", "typecheck": "pnpm -r typecheck", "db:migrate": "pnpm --filter @pulseops/ingest-api db:migrate", "db:seed": "pnpm --filter @pulseops/ingest-api db:seed", + "db:verify:fresh": "./scripts/verify-fresh-migration.sh", + "events:generate": "pnpm exec tsx scripts/generate-skewed-events.ts", + "benchmark:seed-tenants": "pnpm exec tsx scripts/seed-benchmark-tenants.ts", + "benchmark:generate": "pnpm exec tsx scripts/generate-skewed-events.ts", + "benchmark:ingest": "node scripts/run-k6.js tests/load/ingest-throughput.js", + "benchmark:hot-tenant": "node scripts/run-k6.js tests/load/hot-tenant.js", + "benchmark:hot-db": "pnpm exec tsx scripts/measure-hot-tenant-db.ts", + "benchmark:dashboard": "node scripts/run-k6.js tests/load/dashboard-query.js", + "benchmark:cache": "pnpm exec tsx scripts/measure-dashboard-cache.ts", + "benchmark:worker": "pnpm exec tsx scripts/measure-worker-catchup.ts", + "prove:worker-retry-offsets": "pnpm exec tsx scripts/prove-worker-retry-offsets.ts", + "benchmark:backpressure": "node scripts/run-k6.js tests/load/backpressure.js", + "benchmark": "node scripts/run-benchmark.js", + "benchmark:report": "node scripts/benchmark-report.js", + "query-plans:capture": "./scripts/capture-query-plans.sh", "docker:up": "docker-compose up -d", "docker:down": "docker-compose down", "docker:logs": "docker-compose logs -f", "bootstrap": "pnpm install && pnpm docker:up && sleep 10 && pnpm db:migrate && pnpm db:seed", "startup": "pnpm docker:up && pnpm dev", "shutdown": "docker-compose down", - "secure": "pnpm audit && pnpm -r lint:security", + "secure": "pnpm audit && pnpm -r --if-present lint:security", "validate": "pnpm typecheck && pnpm lint && pnpm test", + "validate:evidence": "pnpm health && pnpm db:verify:fresh && pnpm typecheck && pnpm test && pnpm test:integration && pnpm test:load && pnpm benchmark && pnpm benchmark:report -- --output docs/benchmarks/latest-pulseops-benchmark.md --force", "health": "./scripts/health-check.sh" }, "devDependencies": { "@playwright/test": "^1.40.1", "@types/node": "^20.10.5", + "@types/pg": "^8.10.9", "@typescript-eslint/eslint-plugin": "^6.15.0", "@typescript-eslint/parser": "^6.15.0", "@vitest/coverage-v8": "^4.0.18", "@vitest/ui": "^4.0.18", + "bcryptjs": "^3.0.3", "concurrently": "^8.2.2", "eslint": "^8.56.0", "eslint-config-prettier": "^9.1.0", "eslint-plugin-security": "^2.1.0", + "ioredis": "^5.3.2", + "pg": "^8.11.3", "prettier": "^3.1.1", + "tsx": "^4.7.0", "typescript": "^5.3.3", "vitest": "^1.1.1" }, @@ -58,4 +82,4 @@ "dependencies": { "dotenv": "^17.2.3" } -} \ No newline at end of file +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 608645a..a81c641 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -18,6 +18,9 @@ importers: '@types/node': specifier: ^20.10.5 version: 20.19.30 + '@types/pg': + specifier: ^8.10.9 + version: 8.16.0 '@typescript-eslint/eslint-plugin': specifier: ^6.15.0 version: 6.21.0(@typescript-eslint/parser@6.21.0)(eslint@8.57.1)(typescript@5.9.3) @@ -30,6 +33,9 @@ importers: '@vitest/ui': specifier: ^4.0.18 version: 4.0.18(vitest@1.6.1) + bcryptjs: + specifier: ^3.0.3 + version: 3.0.3 concurrently: specifier: ^8.2.2 version: 8.2.2 @@ -42,9 +48,18 @@ importers: eslint-plugin-security: specifier: ^2.1.0 version: 2.1.1 + ioredis: + specifier: ^5.3.2 + version: 5.9.2 + pg: + specifier: ^8.11.3 + version: 8.17.1 prettier: specifier: ^3.1.1 version: 3.8.0 + tsx: + specifier: ^4.7.0 + version: 4.21.0 typescript: specifier: ^5.3.3 version: 5.9.3 @@ -75,6 +90,12 @@ importers: '@fastify/cors': specifier: ^8.5.0 version: 8.5.0 + bcryptjs: + specifier: ^3.0.3 + version: 3.0.3 + dotenv: + specifier: ^16.4.5 + version: 16.6.1 fastify: specifier: ^4.25.2 version: 4.29.1 @@ -106,6 +127,9 @@ importers: '@graphql-codegen/typescript-resolvers': specifier: ^4.0.1 version: 4.5.2(graphql@16.12.0) + '@types/node': + specifier: ^20.0.0 + version: 20.19.30 '@types/pg': specifier: ^8.10.9 version: 8.16.0 @@ -130,9 +154,12 @@ importers: '@fastify/rate-limit': specifier: ^9.1.0 version: 9.1.0 - bcrypt: - specifier: ^5.1.1 - version: 5.1.1 + bcryptjs: + specifier: ^3.0.3 + version: 3.0.3 + dotenv: + specifier: ^16.4.5 + version: 16.6.1 fastify: specifier: ^4.25.2 version: 4.29.1 @@ -164,15 +191,15 @@ importers: '@jest/globals': specifier: ^29.7.0 version: 29.7.0 - '@types/bcrypt': - specifier: ^5.0.2 - version: 5.0.2 '@types/jest': specifier: ^29.5.11 version: 29.5.14 '@types/jsonwebtoken': specifier: ^9.0.5 version: 9.0.10 + '@types/node': + specifier: ^20.0.0 + version: 20.19.30 '@types/pg': specifier: ^8.10.9 version: 8.16.0 @@ -203,6 +230,12 @@ importers: services/worker: dependencies: + dotenv: + specifier: ^16.4.5 + version: 16.6.1 + ioredis: + specifier: ^5.3.2 + version: 5.9.2 kafkajs: specifier: ^2.2.4 version: 2.2.4 @@ -215,6 +248,9 @@ importers: pino-pretty: specifier: ^10.3.1 version: 10.3.1 + prom-client: + specifier: ^15.1.3 + version: 15.1.3 devDependencies: '@jest/globals': specifier: ^29.7.0 @@ -222,6 +258,9 @@ importers: '@types/jest': specifier: ^29.5.11 version: 29.5.14 + '@types/node': + specifier: ^20.0.0 + version: 20.19.30 '@types/pg': specifier: ^8.10.9 version: 8.16.0 @@ -324,7 +363,7 @@ importers: version: 8.5.6 tailwindcss: specifier: ^3.4.1 - version: 3.4.19 + version: 3.4.19(tsx@4.21.0) typescript: specifier: ^5.3.3 version: 5.9.3 @@ -2313,7 +2352,6 @@ packages: /@ioredis/commands@1.5.0: resolution: {integrity: sha512-eUgLqrMf8nJkZxT24JvVRrQya1vZkQh8BBeYNwGDqa5I0VUi8ACx7uFvAaLxintokpTenkK6DASvo/bvNbBGow==} - dev: false /@istanbuljs/load-nyc-config@1.1.0: resolution: {integrity: sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==} @@ -2587,24 +2625,6 @@ packages: engines: {node: '>=8'} dev: false - /@mapbox/node-pre-gyp@1.0.11: - resolution: {integrity: sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==} - hasBin: true - dependencies: - detect-libc: 2.1.2 - https-proxy-agent: 5.0.1 - make-dir: 3.1.0 - node-fetch: 2.7.0 - nopt: 5.0.0 - npmlog: 5.0.1 - rimraf: 3.0.2 - semver: 7.7.3 - tar: 6.2.1 - transitivePeerDependencies: - - encoding - - supports-color - dev: false - /@noble/hashes@1.8.0: resolution: {integrity: sha512-jCs9ldd7NwzpgXDIf6P3+NrHh9/sD6CQdxHyjQI+h/6rDNo88ypBxxz45UDuZHz9r3tNz7N/VInSVoVdtXEI4A==} engines: {node: ^14.21.3 || >=16} @@ -3382,12 +3402,6 @@ packages: '@babel/types': 7.28.6 dev: true - /@types/bcrypt@5.0.2: - resolution: {integrity: sha512-6atioO8Y75fNcbmj0G7UjI9lXN2pQ/IGJ2FWT4a/btd0Lk9lQalHLKhkgKVZ3r+spnmWUKfbMi1GEe9wyHQfNQ==} - dependencies: - '@types/node': 20.19.30 - dev: true - /@types/body-parser@1.19.6: resolution: {integrity: sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==} dependencies: @@ -3913,10 +3927,6 @@ packages: tslib: 2.8.1 dev: true - /abbrev@1.1.1: - resolution: {integrity: sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==} - dev: false - /abort-controller@3.0.0: resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==} engines: {node: '>=6.5'} @@ -3957,15 +3967,6 @@ packages: hasBin: true dev: true - /agent-base@6.0.2: - resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} - engines: {node: '>= 6.0.0'} - dependencies: - debug: 4.4.3 - transitivePeerDependencies: - - supports-color - dev: false - /agent-base@7.1.4: resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} engines: {node: '>= 14'} @@ -4029,6 +4030,7 @@ packages: /ansi-regex@5.0.1: resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==} engines: {node: '>=8'} + dev: true /ansi-styles@4.3.0: resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==} @@ -4054,19 +4056,6 @@ packages: picomatch: 2.3.1 dev: true - /aproba@2.1.0: - resolution: {integrity: sha512-tLIEcj5GuR2RSTnxNKdkK0dJ/GrC7P38sUkiDmDuHfsHmbagTFAxDVIBltoklXEVIQ/f14IL8IMJ5pn9Hez1Ew==} - dev: false - - /are-we-there-yet@2.0.0: - resolution: {integrity: sha512-Ci/qENmwHnsYo9xKIcUJN5LeDKdJ6R1Z1j9V/J5wyq8nh/mYPEpIKJbBZXtZjG04HiK7zV/p6Vs9952MrMeUIw==} - engines: {node: '>=10'} - deprecated: This package is no longer supported. - dependencies: - delegates: 1.0.0 - readable-stream: 3.6.2 - dev: false - /arg@4.1.3: resolution: {integrity: sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==} dev: true @@ -4271,6 +4260,7 @@ packages: /balanced-match@1.0.2: resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} + dev: true /base64-js@1.5.1: resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} @@ -4280,17 +4270,9 @@ packages: hasBin: true dev: true - /bcrypt@5.1.1: - resolution: {integrity: sha512-AGBHOG5hPYZ5Xl9KXzU5iKq9516yEmvCKDg3ecP5kX2aB6UqTeXZxk2ELnDgDm6BQSMlLt9rDB4LoSMx0rYwww==} - engines: {node: '>= 10.0.0'} - requiresBuild: true - dependencies: - '@mapbox/node-pre-gyp': 1.0.11 - node-addon-api: 5.1.0 - transitivePeerDependencies: - - encoding - - supports-color - dev: false + /bcryptjs@3.0.3: + resolution: {integrity: sha512-GlF5wPWnSa/X5LKM1o0wz0suXIINz1iHRLvTS+sLyi7XPbe5ycmYI3DlZqVGZZtDgl4DmasFg7gOB3JYbphV5g==} + hasBin: true /binary-extensions@2.3.0: resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==} @@ -4334,6 +4316,7 @@ packages: dependencies: balanced-match: 1.0.2 concat-map: 0.0.1 + dev: true /brace-expansion@2.0.2: resolution: {integrity: sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==} @@ -4550,11 +4533,6 @@ packages: fsevents: 2.3.3 dev: true - /chownr@2.0.0: - resolution: {integrity: sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==} - engines: {node: '>=10'} - dev: false - /ci-info@3.9.0: resolution: {integrity: sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==} engines: {node: '>=8'} @@ -4622,7 +4600,6 @@ packages: /cluster-key-slot@1.1.2: resolution: {integrity: sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==} engines: {node: '>=0.10.0'} - dev: false /cmdk@1.1.1(@types/react-dom@18.3.7)(@types/react@18.3.27)(react-dom@18.3.1)(react@18.3.1): resolution: {integrity: sha512-Vsv7kFaXm+ptHDMZ7izaRsP70GgrW9NBNGswt9OZaVBLlE0SNpDq8eu/VGXyF9r7M0azK3Wy7OlYXsuyYLFzHg==} @@ -4661,11 +4638,6 @@ packages: resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} dev: true - /color-support@1.1.3: - resolution: {integrity: sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==} - hasBin: true - dev: false - /colorette@2.0.20: resolution: {integrity: sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w==} @@ -4691,6 +4663,7 @@ packages: /concat-map@0.0.1: resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} + dev: true /concurrently@8.2.2: resolution: {integrity: sha512-1dP4gpXFhei8IOtlXRE/T/4H88ElHgTiUzh71YUmtjTEHMSRS2Z/fgOxHSxxusGHogsRfxNq1vyAwxSC+EVyDg==} @@ -4712,10 +4685,6 @@ packages: resolution: {integrity: sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w==} dev: true - /console-control-strings@1.1.0: - resolution: {integrity: sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==} - dev: false - /constant-case@3.0.4: resolution: {integrity: sha512-I2hSBi7Vvs7BEuJDr5dDHfzb/Ruj3FyvFyh7KLilAjNQw3Be+xgqUBA2W6scVEcL0hL1dwPRtIqEPVUCKkSsyQ==} dependencies: @@ -5037,14 +5006,9 @@ packages: resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} engines: {node: '>=0.4.0'} - /delegates@1.0.0: - resolution: {integrity: sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ==} - dev: false - /denque@2.1.0: resolution: {integrity: sha512-HVQE3AAb/pxF8fQAoiqpvg9i3evqug3hoiwakOyZAwJm+6vZehbkYXZ0l4JxS+I3QxM97v5aaRNhj8v5oBhekw==} engines: {node: '>=0.10'} - dev: false /depd@2.0.0: resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==} @@ -5071,11 +5035,6 @@ packages: engines: {node: '>=8'} dev: true - /detect-libc@2.1.2: - resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} - engines: {node: '>=8'} - dev: false - /detect-newline@3.1.0: resolution: {integrity: sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==} engines: {node: '>=8'} @@ -5149,7 +5108,6 @@ packages: /dotenv@16.6.1: resolution: {integrity: sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==} engines: {node: '>=12'} - dev: true /dotenv@17.2.3: resolution: {integrity: sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==} @@ -5190,6 +5148,7 @@ packages: /emoji-regex@8.0.0: resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==} + dev: true /encodeurl@2.0.0: resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==} @@ -5851,15 +5810,9 @@ packages: engines: {node: '>= 0.6'} dev: false - /fs-minipass@2.1.0: - resolution: {integrity: sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==} - engines: {node: '>= 8'} - dependencies: - minipass: 3.3.6 - dev: false - /fs.realpath@1.0.0: resolution: {integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==} + dev: true /fsevents@2.3.2: resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==} @@ -5884,22 +5837,6 @@ packages: resolution: {integrity: sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ==} dev: true - /gauge@3.0.2: - resolution: {integrity: sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q==} - engines: {node: '>=10'} - deprecated: This package is no longer supported. - dependencies: - aproba: 2.1.0 - color-support: 1.1.3 - console-control-strings: 1.1.0 - has-unicode: 2.0.1 - object-assign: 4.1.1 - signal-exit: 3.0.7 - string-width: 4.2.3 - strip-ansi: 6.0.1 - wide-align: 1.1.5 - dev: false - /gensync@1.0.0-beta.2: resolution: {integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==} engines: {node: '>=6.9.0'} @@ -5986,6 +5923,7 @@ packages: minimatch: 3.1.2 once: 1.4.0 path-is-absolute: 1.0.1 + dev: true /globals@13.24.0: resolution: {integrity: sha512-AhO5QUcj8llrbG09iWhPU2B204J1xnPeL8kQmVorSsy+Sjj1sk8gIyh6cUocGmH4L0UuhAJy+hJMRA4mgA4mFQ==} @@ -6137,10 +6075,6 @@ packages: dependencies: has-symbols: 1.1.0 - /has-unicode@2.0.1: - resolution: {integrity: sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==} - dev: false - /hasown@2.0.2: resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} engines: {node: '>= 0.4'} @@ -6183,16 +6117,6 @@ packages: - supports-color dev: true - /https-proxy-agent@5.0.1: - resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} - engines: {node: '>= 6'} - dependencies: - agent-base: 6.0.2 - debug: 4.4.3 - transitivePeerDependencies: - - supports-color - dev: false - /https-proxy-agent@7.0.6: resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} engines: {node: '>= 14'} @@ -6278,6 +6202,7 @@ packages: dependencies: once: 1.4.0 wrappy: 1.0.2 + dev: true /inherits@2.0.4: resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} @@ -6340,7 +6265,6 @@ packages: standard-as-callback: 2.1.0 transitivePeerDependencies: - supports-color - dev: false /ipaddr.js@1.9.1: resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==} @@ -6425,6 +6349,7 @@ packages: /is-fullwidth-code-point@3.0.0: resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==} engines: {node: '>=8'} + dev: true /is-generator-fn@2.1.0: resolution: {integrity: sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==} @@ -7263,7 +7188,6 @@ packages: /lodash.defaults@4.2.0: resolution: {integrity: sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==} - dev: false /lodash.includes@4.3.0: resolution: {integrity: sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==} @@ -7271,7 +7195,6 @@ packages: /lodash.isarguments@3.1.0: resolution: {integrity: sha512-chi4NHZlZqZD18a0imDHnZPrDeBbTtVN7GXMwuGdRH9qotxAjYs3aVLKc7zNOG9eddR5Ksd8rvFEBc9SsggPpg==} - dev: false /lodash.isboolean@3.0.3: resolution: {integrity: sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==} @@ -7400,13 +7323,6 @@ packages: source-map-js: 1.2.1 dev: true - /make-dir@3.1.0: - resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==} - engines: {node: '>=8'} - dependencies: - semver: 6.3.1 - dev: false - /make-dir@4.0.0: resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==} engines: {node: '>=10'} @@ -7516,6 +7432,7 @@ packages: resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==} dependencies: brace-expansion: 1.1.12 + dev: true /minimatch@9.0.3: resolution: {integrity: sha512-RHiac9mvaRw0x3AYRgDC1CxAP7HTcNrrECeA8YYJeWnpo+2Q5CegtZjaotWTWxDG3UeGA1coE05iH1mPjT/2mg==} @@ -7534,32 +7451,6 @@ packages: /minimist@1.2.8: resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==} - /minipass@3.3.6: - resolution: {integrity: sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==} - engines: {node: '>=8'} - dependencies: - yallist: 4.0.0 - dev: false - - /minipass@5.0.0: - resolution: {integrity: sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==} - engines: {node: '>=8'} - dev: false - - /minizlib@2.1.2: - resolution: {integrity: sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==} - engines: {node: '>= 8'} - dependencies: - minipass: 3.3.6 - yallist: 4.0.0 - dev: false - - /mkdirp@1.0.4: - resolution: {integrity: sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==} - engines: {node: '>=10'} - hasBin: true - dev: false - /mlly@1.8.0: resolution: {integrity: sha512-l8D9ODSRWLe2KHJSifWGwBqpTZXIXTeo8mlKjY+E2HAakaTeNpqAyBZ8GSqLzHgw4XmHmC8whvpjJNMbFZN7/g==} dependencies: @@ -7644,10 +7535,6 @@ packages: resolution: {integrity: sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ==} dev: false - /node-addon-api@5.1.0: - resolution: {integrity: sha512-eh0GgfEkpnoWDq+VY8OyvYhFEzBk6jIYbRKdIlyTiAXIVJ8PyBaKb0rp7oDtoddbdoHWhq8wwr+XZ81F1rpNdA==} - dev: false - /node-domexception@1.0.0: resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} engines: {node: '>=10.5.0'} @@ -7682,14 +7569,6 @@ packages: resolution: {integrity: sha512-nmh3lCkYZ3grZvqcCH+fjmQ7X+H0OeZgP40OierEaAptX4XofMh5kwNbWh7lBduUzCcV/8kZ+NDLCwm2iorIlA==} dev: true - /nopt@5.0.0: - resolution: {integrity: sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==} - engines: {node: '>=6'} - hasBin: true - dependencies: - abbrev: 1.1.1 - dev: false - /normalize-path@2.1.1: resolution: {integrity: sha512-3pKJwH184Xo/lnH6oyP1q2pMd7HcypqqmRs91/6/i2CGtWwIKGCkOOMTm/zXbgTEWHw1uNpNi/igc3ePOYHb6w==} engines: {node: '>=0.10.0'} @@ -7716,16 +7595,6 @@ packages: path-key: 4.0.0 dev: true - /npmlog@5.0.1: - resolution: {integrity: sha512-AqZtDUWOMKs1G/8lwylVjrdYgqA4d9nu8hc+0gzRxlDb1I10+FHBGMXs6aiQHFdCUUlqH99MUMuLfzWDNDtfxw==} - deprecated: This package is no longer supported. - dependencies: - are-we-there-yet: 2.0.0 - console-control-strings: 1.1.0 - gauge: 3.0.2 - set-blocking: 2.0.0 - dev: false - /nullthrows@1.1.1: resolution: {integrity: sha512-2vPPEi+Z7WqML2jZYddDIfy5Dqb0r2fze2zTxNNknZaFpVHU3mFB3R+DWeJWGVx0ecvttSGlJTI+WG+8Z4cDWw==} dev: true @@ -7941,6 +7810,7 @@ packages: /path-is-absolute@1.0.1: resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==} engines: {node: '>=0.10.0'} + dev: true /path-key@3.1.1: resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==} @@ -7992,12 +7862,10 @@ packages: /pg-cloudflare@1.3.0: resolution: {integrity: sha512-6lswVVSztmHiRtD6I8hw4qP/nDm1EJbKMRhf3HCYaqud7frGysPv7FYJ5noZQdhQtN2xJnimfMtvQq21pdbzyQ==} requiresBuild: true - dev: false optional: true /pg-connection-string@2.10.0: resolution: {integrity: sha512-ur/eoPKzDx2IjPaYyXS6Y8NSblxM7X64deV2ObV57vhjsWiwLvUD6meukAzogiOsu60GO8m/3Cb6FdJsWNjwXg==} - dev: false /pg-int8@1.0.1: resolution: {integrity: sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==} @@ -8009,7 +7877,6 @@ packages: pg: '>=8.0' dependencies: pg: 8.17.1 - dev: false /pg-protocol@1.11.0: resolution: {integrity: sha512-pfsxk2M9M3BuGgDOfuy37VNRRX3jmKgMjcvAcWqNDpZSf4cUmv8HSOl5ViRQFsfARFn0KuUQTgLxVMbNq5NW3g==} @@ -8040,13 +7907,11 @@ packages: pgpass: 1.0.5 optionalDependencies: pg-cloudflare: 1.3.0 - dev: false /pgpass@1.0.5: resolution: {integrity: sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==} dependencies: split2: 4.2.0 - dev: false /picocolors@1.1.1: resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} @@ -8204,7 +8069,7 @@ packages: postcss: 8.5.6 dev: true - /postcss-load-config@6.0.1(jiti@1.21.7)(postcss@8.5.6): + /postcss-load-config@6.0.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0): resolution: {integrity: sha512-oPtTM4oerL+UXmx+93ytZVN82RrlY/wPUV8IeDxFrzIjXOLF1pN+EmKPLbubvKHT2HC20xXsCAH2Z+CKV6Oz/g==} engines: {node: '>= 18'} peerDependencies: @@ -8225,6 +8090,7 @@ packages: jiti: 1.21.7 lilconfig: 3.1.3 postcss: 8.5.6 + tsx: 4.21.0 dev: true /postcss-nested@6.2.0(postcss@8.5.6): @@ -8533,6 +8399,7 @@ packages: inherits: 2.0.4 string_decoder: 1.3.0 util-deprecate: 1.0.2 + dev: true /readable-stream@4.7.0: resolution: {integrity: sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==} @@ -8593,14 +8460,12 @@ packages: /redis-errors@1.2.0: resolution: {integrity: sha512-1qny3OExCf0UvUV/5wpYKf2YwPcOqXzkwKKSmKHiE6ZMQs5heeE/c8eXK+PNllPvmjgAbfnsbpkGZWy8cBpn9w==} engines: {node: '>=4'} - dev: false /redis-parser@3.0.0: resolution: {integrity: sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A==} engines: {node: '>=4'} dependencies: redis-errors: 1.2.0 - dev: false /regexp-tree@0.1.27: resolution: {integrity: sha512-iETxpjK6YoRWJG5o6hXLwvjYAoW+FEZn9os0PD/b6AP6xQwsa/Y7lCVgIixBbUPMfhu+i2LtdeAqVTgGlQarfA==} @@ -8718,6 +8583,7 @@ packages: hasBin: true dependencies: glob: 7.2.3 + dev: true /rollup@4.55.2: resolution: {integrity: sha512-PggGy4dhwx5qaW+CKBilA/98Ql9keyfnb7lh4SR6shQ91QQQi1ORJ1v4UinkdP2i87OBs9AQFooQylcrrRfIcg==} @@ -8819,6 +8685,7 @@ packages: /semver@6.3.1: resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==} hasBin: true + dev: true /semver@7.7.3: resolution: {integrity: sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==} @@ -8866,10 +8733,6 @@ packages: - supports-color dev: false - /set-blocking@2.0.0: - resolution: {integrity: sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==} - dev: false - /set-cookie-parser@2.7.2: resolution: {integrity: sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw==} dev: false @@ -8972,6 +8835,7 @@ packages: /signal-exit@3.0.7: resolution: {integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==} + dev: true /signal-exit@4.1.0: resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==} @@ -9061,7 +8925,6 @@ packages: /split2@4.2.0: resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==} engines: {node: '>= 10.x'} - dev: false /sponge-case@1.0.1: resolution: {integrity: sha512-dblb9Et4DAtiZ5YSUZHLl4XhH4uK80GhAZrVXdN4O2P4gQ40Wa5UIOPUHlA/nFd2PLblBZWUioLMMAVrgpoYcA==} @@ -9086,7 +8949,6 @@ packages: /standard-as-callback@2.1.0: resolution: {integrity: sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A==} - dev: false /statuses@2.0.2: resolution: {integrity: sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==} @@ -9124,6 +8986,7 @@ packages: emoji-regex: 8.0.0 is-fullwidth-code-point: 3.0.0 strip-ansi: 6.0.1 + dev: true /string_decoder@1.3.0: resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==} @@ -9135,6 +8998,7 @@ packages: engines: {node: '>=8'} dependencies: ansi-regex: 5.0.1 + dev: true /strip-bom@4.0.0: resolution: {integrity: sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==} @@ -9263,7 +9127,7 @@ packages: resolution: {integrity: sha512-uSaO4gnW+b3Y2aWoWfFpX62vn2sR3skfhbjsEnaBI81WD1wBLlHZe5sWf0AqjksNdYTbGBEd0UasQMT3SNV15g==} dev: false - /tailwindcss@3.4.19: + /tailwindcss@3.4.19(tsx@4.21.0): resolution: {integrity: sha512-3ofp+LL8E+pK/JuPLPggVAIaEuhvIz4qNcf3nA1Xn2o/7fb7s/TYpHhwGDv1ZU3PkBluUVaF8PyCHcm48cKLWQ==} engines: {node: '>=14.0.0'} hasBin: true @@ -9285,7 +9149,7 @@ packages: postcss: 8.5.6 postcss-import: 15.1.0(postcss@8.5.6) postcss-js: 4.1.0(postcss@8.5.6) - postcss-load-config: 6.0.1(jiti@1.21.7)(postcss@8.5.6) + postcss-load-config: 6.0.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0) postcss-nested: 6.2.0(postcss@8.5.6) postcss-selector-parser: 6.1.2 resolve: 1.22.11 @@ -9295,19 +9159,6 @@ packages: - yaml dev: true - /tar@6.2.1: - resolution: {integrity: sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==} - engines: {node: '>=10'} - deprecated: Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exhorbitant rates) by contacting i@izs.me - dependencies: - chownr: 2.0.0 - fs-minipass: 2.1.0 - minipass: 5.0.0 - minizlib: 2.1.2 - mkdirp: 1.0.4 - yallist: 4.0.0 - dev: false - /tdigest@0.1.2: resolution: {integrity: sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==} dependencies: @@ -9714,6 +9565,7 @@ packages: /util-deprecate@1.0.2: resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} + dev: true /utils-merge@1.0.1: resolution: {integrity: sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==} @@ -9972,12 +9824,6 @@ packages: stackback: 0.0.2 dev: true - /wide-align@1.1.5: - resolution: {integrity: sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg==} - dependencies: - string-width: 4.2.3 - dev: false - /word-wrap@1.2.5: resolution: {integrity: sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==} engines: {node: '>=0.10.0'} @@ -10042,10 +9888,6 @@ packages: resolution: {integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==} dev: true - /yallist@4.0.0: - resolution: {integrity: sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==} - dev: false - /yaml-ast-parser@0.0.43: resolution: {integrity: sha512-2PTINUwsRqSd+s8XxKaJWQlUuEMHJQyEuh2edBbW8KNJz0SJPwUSD2zRWqezFEdN7IzAgeuYHFUCF7o8zRdZ0A==} dev: true diff --git a/scripts/bench/aggregate-plan.sql b/scripts/bench/aggregate-plan.sql new file mode 100644 index 0000000..cb9abc9 --- /dev/null +++ b/scripts/bench/aggregate-plan.sql @@ -0,0 +1,16 @@ +-- Aggregate query plan. +-- Usage: +-- psql "$DATABASE_URL" -f scripts/bench/aggregate-plan.sql + +EXPLAIN (ANALYZE, BUFFERS) +SELECT + date, + metric_name, + SUM(metric_value) AS metric_value +FROM daily_aggregates +WHERE org_id = '00000000-0000-0000-0000-000000000001' + AND project_id = '00000000-0000-0000-0000-000000000002' + AND metric_name IN ('dau', 'event_count', 'total_events') + AND date >= CURRENT_DATE - INTERVAL '90 days' +GROUP BY date, metric_name +ORDER BY date ASC, metric_name ASC; diff --git a/scripts/bench/dashboard-plan.sql b/scripts/bench/dashboard-plan.sql new file mode 100644 index 0000000..10f8aea --- /dev/null +++ b/scripts/bench/dashboard-plan.sql @@ -0,0 +1,16 @@ +-- Dashboard query plan. +-- Usage: +-- psql "$DATABASE_URL" -f scripts/bench/dashboard-plan.sql + +EXPLAIN (ANALYZE, BUFFERS) +SELECT + date, + event_count, + unique_users, + unique_sessions, + events_by_name +FROM mv_dashboard_metrics +WHERE org_id = '00000000-0000-0000-0000-000000000001' + AND project_id = '00000000-0000-0000-0000-000000000002' + AND date >= CURRENT_DATE - INTERVAL '30 days' +ORDER BY date DESC; diff --git a/scripts/bench/partition-pruning-plan.sql b/scripts/bench/partition-pruning-plan.sql new file mode 100644 index 0000000..415a82b --- /dev/null +++ b/scripts/bench/partition-pruning-plan.sql @@ -0,0 +1,11 @@ +-- Partition pruning query plan. +-- Usage: +-- psql "$DATABASE_URL" -f scripts/bench/partition-pruning-plan.sql + +EXPLAIN (ANALYZE, BUFFERS) +SELECT COUNT(*) AS events_in_window +FROM events +WHERE org_id = '00000000-0000-0000-0000-000000000001' + AND project_id = '00000000-0000-0000-0000-000000000002' + AND timestamp >= CURRENT_DATE - INTERVAL '90 days' + AND timestamp < CURRENT_DATE + INTERVAL '1 day'; diff --git a/scripts/bench/raw-events-plan.sql b/scripts/bench/raw-events-plan.sql new file mode 100644 index 0000000..15f9ac7 --- /dev/null +++ b/scripts/bench/raw-events-plan.sql @@ -0,0 +1,19 @@ +-- Raw event query plan. +-- Usage: +-- psql "$DATABASE_URL" -f scripts/bench/raw-events-plan.sql + +EXPLAIN (ANALYZE, BUFFERS) +SELECT + id, + event_id, + event_name, + user_id, + session_id, + timestamp, + properties +FROM events +WHERE org_id = '00000000-0000-0000-0000-000000000001' + AND project_id = '00000000-0000-0000-0000-000000000002' + AND timestamp >= NOW() - INTERVAL '7 days' +ORDER BY timestamp DESC +LIMIT 100; diff --git a/scripts/benchmark-report.js b/scripts/benchmark-report.js new file mode 100644 index 0000000..57b0985 --- /dev/null +++ b/scripts/benchmark-report.js @@ -0,0 +1,428 @@ +#!/usr/bin/env node + +const { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } = require('node:fs'); +const { dirname, resolve } = require('node:path'); +const { execSync } = require('node:child_process'); +const os = require('node:os'); + +const repoRoot = resolve(__dirname, '..'); +const evidenceDir = resolve(repoRoot, 'docs/benchmarks/evidence'); +const queryPlansDir = resolve(repoRoot, 'docs/query-plans'); + +function argValue(name, fallback) { + const index = process.argv.indexOf(name); + return index === -1 ? fallback : process.argv[index + 1]; +} + +function safeCommand(command) { + try { + return execSync(command, { + cwd: repoRoot, + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'ignore'], + }).trim(); + } catch (_error) { + return 'TBD'; + } +} + +function today() { + return new Date().toISOString().slice(0, 10); +} + +function latestRunId() { + const fromArgs = argValue('--run-id', process.env.RUN_ID || ''); + if (fromArgs) return fromArgs; + + const latestPath = resolve(evidenceDir, 'latest-run-id.txt'); + if (existsSync(latestPath)) { + return readFileSync(latestPath, 'utf8').trim(); + } + + return ''; +} + +function readJsonIfExists(path) { + if (!existsSync(path)) return null; + return JSON.parse(readFileSync(path, 'utf8')); +} + +function evidencePath(prefix, runId) { + return resolve(evidenceDir, `${prefix}-${runId}.json`); +} + +function relative(path) { + return path.replace(`${repoRoot}/`, ''); +} + +function formatNumber(value, digits = 2) { + return Number.isFinite(value) ? value.toFixed(digits) : 'not found'; +} + +function formatMs(value) { + return Number.isFinite(value) ? `${value.toFixed(2)} ms` : 'not found'; +} + +function formatPercent(value) { + return Number.isFinite(value) ? `${(value * 100).toFixed(2)}%` : 'not found'; +} + +function metricValue(summary, metric, value) { + return summary?.metrics?.[metric]?.values?.[value]; +} + +function k6Row(label, command, summary, evidenceFile) { + if (!summary) { + return `| ${label} | \`${command}\` | not found | not found | not found | not found | not found | not measured by this k6 row | not found | Missing evidence file |`; + } + + return [ + `| ${label}`, + `\`${command}\``, + `${formatNumber(metricValue(summary, 'http_reqs', 'rate'))} req/s`, + formatMs(metricValue(summary, 'http_req_duration', 'med')), + formatMs(metricValue(summary, 'http_req_duration', 'p(95)')), + formatMs(metricValue(summary, 'http_req_duration', 'p(99)')), + formatPercent(metricValue(summary, 'http_req_failed', 'rate')), + 'not measured by this k6 row', + `${formatNumber(metricValue(summary, 'http_reqs', 'count'), 0)} requests`, + `Measured; ${relative(evidenceFile)}`, + ].join(' | ') + ' |'; +} + +function cacheRow(cache, evidenceFile) { + if (!cache) { + return '| Dashboard cache | `pnpm benchmark:cache` | not found | not found | not found | not found | not found | not found | not found | Missing evidence file |'; + } + + return [ + '| Dashboard cache', + '`pnpm benchmark:cache`', + 'n/a', + formatMs(cache.summary?.warm_median_ms), + formatMs(cache.summary?.warm_p95_ms), + 'not captured', + cache.cold?.graphql_errors === 0 ? '0 GraphQL errors' : `${cache.cold?.graphql_errors ?? 'not found'} GraphQL errors`, + 'n/a', + `cold ${formatMs(cache.cold?.duration_ms)}, ${cache.summary?.warm_iterations ?? 'not found'} warm iterations`, + `Measured; ${relative(evidenceFile)}`, + ].join(' | ') + ' |'; +} + +function workerRow(worker, evidenceFile) { + if (!worker) { + return '| Worker catch-up | `pnpm benchmark:worker` | not found | not found | not found | not found | not found | not found | not found | Missing evidence file |'; + } + + return [ + '| Worker catch-up', + '`pnpm benchmark:worker`', + `${formatNumber(worker.persisted_events_per_second_until_caught_up)} persisted events/s`, + 'n/a', + 'n/a', + 'n/a', + `${worker.accepted_events === worker.persisted_events ? '0 lost in run' : 'mismatch'}`, + `${worker.kafka_lag_after?.total_lag ?? 'not found'}`, + `${worker.accepted_events ?? 'not found'} accepted / ${worker.persisted_events ?? 'not found'} persisted`, + `Measured; ${relative(evidenceFile)}`, + ].join(' | ') + ' |'; +} + +function hotDbRow(hotDb, evidenceFile) { + if (!hotDb) { + return '| Hot tenant DB evidence | `pnpm benchmark:hot-db` | not found | n/a | n/a | n/a | not found | not found | not found | Missing evidence file |'; + } + + const hotClass = hotDb.tenant_distribution?.find((row) => row.tenant_class === 'hot'); + const hotPlan = hotDb.representative_query_plans?.find((plan) => plan.label === 'hot-raw-event-count'); + const quietPlan = hotDb.representative_query_plans?.find((plan) => plan.label === 'quiet-raw-event-count'); + const waitingLocks = hotDb.postgres_snapshot?.locks?.waiting_locks; + const aggregatePressure = hotDb.aggregate_pressure?.find((row) => row.tenant_class === 'hot'); + const reconciliation = hotDb.reconciliation; + + return [ + '| Hot tenant DB evidence', + '`pnpm benchmark:hot-db`', + `${hotDb.total_events ?? 'not found'} persisted hot-test events`, + 'n/a', + 'n/a', + 'n/a', + reconciliation?.status === 'complete' ? '0 unmatched requests' : (reconciliation?.status ?? 'not checked'), + reconciliation?.kafka_lag_after?.total_lag ?? 'not checked', + [ + hotPlan ? `hot raw count ${formatNumber(hotPlan.execution_ms)} ms` : 'hot raw count not found', + quietPlan ? `quiet raw count ${formatNumber(quietPlan.execution_ms)} ms` : 'quiet raw count not found', + `${waitingLocks ?? 'not found'} waiting locks at snapshot`, + hotClass + ? `hot ${hotClass.events}/${hotDb.total_events}; max hot events/key ${aggregatePressure?.max_events_per_event_count_key ?? 'not found'}` + : 'hot tenant class not found', + ].join('; '), + `Measured; ${relative(evidenceFile)}`, + ].join(' | ') + ' |'; +} + +function queryPlanFiles() { + if (!existsSync(queryPlansDir)) { + return []; + } + + return readdirSync(queryPlansDir) + .filter((file) => file.endsWith('.md') && file !== 'README.md') + .sort(); +} + +function isRunScopedQueryPlan(file, runId) { + if (!runId) return false; + if (file.includes(runId)) return true; + + const path = resolve(queryPlansDir, file); + const content = readFileSync(path, 'utf8'); + return content.includes(`run ID \`${runId}\``) + || content.includes(`run_id: ${runId}`) + || content.includes(`RUN_ID=${runId}`) + || content.includes(runId); +} + +function queryPlanRows(files, observation) { + if (files.length === 0) { + return '| not found | not found | not found |'; + } + + return files.map((file) => { + const path = `docs/query-plans/${file}`; + const name = file.replace(/^\d{4}-\d{2}-\d{2}-/, '').replace(/\.md$/, ''); + return `| ${name} | \`${path}\` | ${observation} |`; + }).join('\n'); +} + +function commandRows(runId, summaries) { + const suiteArgs = { + ingest: 'ingest', + hot: 'hot', + hotDb: 'hotDb', + dashboard: 'dashboard', + cache: 'cache', + worker: 'worker', + backpressure: 'backpressure', + }; + const found = Object.keys(suiteArgs).filter((key) => summaries[key]); + const allSuites = Object.keys(suiteArgs); + const currentReportCommand = found.length === allSuites.length + ? `RUN_ID=${runId} pnpm benchmark` + : found.length > 0 + ? `RUN_ID=${runId} node scripts/run-benchmark.js ${found.map((key) => suiteArgs[key]).join(' ')}` + : `RUN_ID=${runId} pnpm benchmark`; + + return { + found, + text: [ + '# Command matching the run-specific evidence files currently present in this report:', + currentReportCommand, + `RUN_ID=${runId} pnpm benchmark:report -- --run-id ${runId} --force`, + '', + '# Full-suite command, if you want every row populated:', + `RUN_ID=${runId} pnpm benchmark`, + ].join('\n'), + }; +} + +function evidenceRows(files) { + const rows = files + .filter(({ path }) => existsSync(path)) + .map(({ label, path }) => `| \`${relative(path)}\` | ${label} |`); + + return rows.length ? rows.join('\n') : '| not found | No evidence files found for this run ID |'; +} + +function dockerResources() { + const cpus = safeCommand("docker info --format '{{.NCPU}} CPUs' 2>/dev/null"); + const memoryBytes = safeCommand("docker info --format '{{.MemTotal}}' 2>/dev/null"); + const memory = Number(memoryBytes); + const memoryLabel = Number.isFinite(memory) && memory > 0 + ? `${(memory / 1024 / 1024 / 1024).toFixed(2)} GiB` + : memoryBytes; + return `${cpus}, ${memoryLabel}`; +} + +function dbCount(sql) { + const value = safeCommand(`docker exec pulseops-postgres psql -U pulseops -d pulseops_dev -tAc "${sql}"`); + return value === 'TBD' ? 'TBD' : value; +} + +function serviceVersions() { + const localK6 = safeCommand('k6 version'); + const k6DockerImage = process.env.K6_DOCKER_IMAGE || 'grafana/k6:2.0.0'; + const dockerK6 = localK6 === 'TBD' + ? safeCommand(`docker run --rm ${k6DockerImage} version 2>/dev/null | head -1`) + : localK6; + + return { + postgres: safeCommand("docker exec pulseops-postgres psql -U pulseops -d pulseops_dev -tAc 'SHOW server_version;'"), + redis: safeCommand("docker exec pulseops-redis redis-server --version | awk '{print $3}'"), + kafka: safeCommand("docker exec pulseops-kafka /opt/kafka/bin/kafka-topics.sh --version 2>/dev/null | head -1"), + k6: localK6 === 'TBD' ? `${dockerK6} (Docker fallback image ${k6DockerImage})` : localK6, + }; +} + +const date = argValue('--date', today()); +const output = resolve(repoRoot, argValue('--output', `docs/benchmarks/${date}-pulseops-benchmark.md`)); +const templatePath = resolve(repoRoot, argValue('--template', 'docs/benchmarks/YYYY-MM-DD-pulseops-benchmark.md')); +const dataset = argValue('--dataset', 'local Docker dataset at report generation time'); +const force = process.argv.includes('--force'); +const runId = latestRunId(); + +if (existsSync(output) && !force) { + console.log(`${output} already exists; pass --force to regenerate from evidence or the template.`); + process.exit(0); +} + +if (!runId) { + let report = readFileSync(templatePath, 'utf8'); + const replacements = new Map([ + ['# PulseOps Benchmark Report: YYYY-MM-DD', `# PulseOps Benchmark Report: ${date}`], + ['Status: TBD / not run', 'Status: draft / no run ID supplied; fill with measured results before publishing'], + ['| Git commit | TBD |', `| Git commit | ${safeCommand('git rev-parse --short HEAD')} |`], + ['| Dirty tree | TBD |', `| Dirty tree | ${safeCommand('git status --short').split('\n').filter(Boolean).length ? 'yes' : 'no'} |`], + ['| Node.js version | TBD |', `| Node.js version | ${process.version} |`], + ['| k6 version | TBD |', `| k6 version | ${serviceVersions().k6} |`], + ['| Dataset | TBD |', `| Dataset | ${dataset} |`], + ]); + + for (const [needle, value] of replacements) { + report = report.replace(needle, value); + } + + mkdirSync(dirname(output), { recursive: true }); + writeFileSync(output, report); + console.log(output); + process.exit(0); +} + +const files = { + ingest: evidencePath('ingest-throughput', runId), + hot: evidencePath('hot-tenant', runId), + hotDb: evidencePath('hot-tenant-db', runId), + dashboard: evidencePath('dashboard-query', runId), + cache: evidencePath('dashboard-cache', runId), + worker: evidencePath('worker-catchup', runId), + backpressure: evidencePath('backpressure', runId), +}; + +const summaries = { + ingest: readJsonIfExists(files.ingest), + hot: readJsonIfExists(files.hot), + hotDb: readJsonIfExists(files.hotDb), + dashboard: readJsonIfExists(files.dashboard), + cache: readJsonIfExists(files.cache), + worker: readJsonIfExists(files.worker), + backpressure: readJsonIfExists(files.backpressure), +}; + +const rawEvents = dbCount('SELECT count(*) FROM events;'); +const dailyAggregates = dbCount('SELECT count(*) FROM daily_aggregates;'); +const partitionCount = dbCount("SELECT count(*) FROM pg_inherits WHERE inhparent = 'events'::regclass;"); +const workerLag = summaries.worker?.kafka_lag_after?.total_lag; +const commands = commandRows(runId, summaries); +const allQueryPlans = queryPlanFiles(); +const runScopedQueryPlans = allQueryPlans.filter((file) => isRunScopedQueryPlan(file, runId)); +const referenceQueryPlans = allQueryPlans.filter((file) => !isRunScopedQueryPlan(file, runId)); +const versions = serviceVersions(); + +const resultRows = [ + k6Row('Ingest throughput', 'pnpm benchmark:ingest', summaries.ingest, files.ingest), + k6Row('Hot tenant', 'pnpm benchmark:hot-tenant', summaries.hot, files.hot), + hotDbRow(summaries.hotDb, files.hotDb), + k6Row('Dashboard query', 'pnpm benchmark:dashboard', summaries.dashboard, files.dashboard), + cacheRow(summaries.cache, files.cache), + workerRow(summaries.worker, files.worker), + k6Row('Backpressure', 'pnpm benchmark:backpressure', summaries.backpressure, files.backpressure), +].join('\n'); + +const report = `# PulseOps Benchmark Report: ${date} + +Status: evidence-backed local report for run ID \`${runId}\`; not production-scale + +## Environment + +| Field | Value | +| --- | --- | +| Git commit | \`${safeCommand('git rev-parse HEAD')}\` | +| Dirty tree | ${safeCommand('git status --short').split('\n').filter(Boolean).length ? 'yes' : 'no'} | +| Machine | ${os.cpus()[0]?.model || 'TBD'}, ${os.cpus().length} logical CPUs, ${(os.totalmem() / 1024 / 1024 / 1024).toFixed(2)} GiB host memory | +| Docker resources | ${dockerResources()} | +| OS | ${os.type()} ${os.release()} ${os.arch()} | +| Node.js version | ${process.version} | +| PostgreSQL version | ${versions.postgres} | +| Redis version | ${versions.redis} | +| Kafka version | ${versions.kafka} | +| PostgreSQL row count | ${rawEvents} raw events | +| Daily aggregate row count | ${dailyAggregates} rows | +| Event partitions | ${partitionCount} child partitions | +| k6 version | ${versions.k6} | +| Dataset | ${dataset} | + +## Commands + +\`\`\`bash +${commands.text} +\`\`\` + +Run-specific evidence files found for this report: ${commands.found.length ? commands.found.join(', ') : 'none'}. +If only part of the suite was run, missing evidence stays marked as \`not found\` below. + +## Results + +| Test | Command | Throughput | p50 latency | p95 latency | p99 latency | Error rate | Kafka lag | DB notes | Result | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +${resultRows} + +## Run-Scoped Query Plans + +| Query | Plan file | Observation | +| --- | --- | --- | +${queryPlanRows(runScopedQueryPlans, `Captured for run ID ${runId}; read file for row counts and interpretation`)} + +## Reference Query Plans + +These saved EXPLAIN ANALYZE files are repository evidence, not generated by this benchmark report unless they explicitly mention run ID \`${runId}\`. + +| Query | Plan file | Observation | +| --- | --- | --- | +${queryPlanRows(referenceQueryPlans, 'Reference EXPLAIN ANALYZE evidence; cite separately from this benchmark run')} + +## Evidence Files + +| File | Description | +| --- | --- | +${evidenceRows([ + { label: 'Raw k6 ingest summary JSON', path: files.ingest }, + { label: 'Raw k6 hot-tenant summary JSON', path: files.hot }, + { label: 'Hot-tenant PostgreSQL evidence JSON', path: files.hotDb }, + { label: 'Raw k6 dashboard-query summary JSON', path: files.dashboard }, + { label: 'Cold/warm GraphQL cache JSON measurement', path: files.cache }, + { label: 'Worker catch-up JSON measurement', path: files.worker }, + { label: 'Raw k6 backpressure summary JSON', path: files.backpressure }, +])} + +## Claims Allowed From This Run + +- The numbers in the table are local measurements for run ID \`${runId}\` only. +- Kafka decoupling can be discussed when ingest acceptance and worker catch-up or lag evidence are both present. +- Cache claims are limited to the cold/warm GraphQL measurement if the dashboard cache evidence file exists. +- Worker throughput claims are limited to the bounded worker catch-up workload if the worker evidence file exists. +- Hot-tenant database claims are limited to the aggregate-key pressure, representative EXPLAIN timings, reconciliation status, and after-run PostgreSQL snapshot in the hot-tenant DB evidence file if present. +- Query plan claims from this run require run-scoped files above. Otherwise cite the reference query-plan files separately. + +## Claims Not Supported By This Run + +- Do not claim production scale, production readiness, or a fixed capacity limit. +- Do not extrapolate beyond the exact workload, machine, Docker resources, and dataset above. +- Do not claim long-duration or million-event tenant-skew behavior unless that evidence file is present. +- Do not claim realistic cache hit ratio from a cold/warm smoke measurement. +- Do not claim Kafka lag limits beyond the captured lag evidence${Number.isFinite(workerLag) ? `; this run's worker final lag was ${workerLag}` : ''}. +- The fallback k6 runner is pinned to \`${process.env.K6_DOCKER_IMAGE || 'grafana/k6:2.0.0'}\`; record a new exact version if you override it or use a local k6 binary. +`; + +mkdirSync(dirname(output), { recursive: true }); +writeFileSync(output, report); +console.log(output); diff --git a/scripts/capture-query-plans.sh b/scripts/capture-query-plans.sh new file mode 100755 index 0000000..1e4940e --- /dev/null +++ b/scripts/capture-query-plans.sh @@ -0,0 +1,256 @@ +#!/usr/bin/env bash +set -euo pipefail + +OUT_DIR="${OUT_DIR:-docs/query-plans}" +DB_CONTAINER="${DB_CONTAINER:-pulseops-postgres}" +DB_USER="${DB_USER:-pulseops}" +DB_NAME="${DB_NAME:-pulseops_dev}" +RUN_ID="${RUN_ID:-}" +STAMP="${STAMP:-${RUN_ID:-$(date +%F)}}" +ORG_ID="${ORG_ID:-00000000-0000-0000-0000-000000000001}" +PROJECT_ID="${PROJECT_ID:-00000000-0000-0000-0000-000000000002}" +CACHE_EVIDENCE="${CACHE_EVIDENCE:-}" + +if [[ -n "$RUN_ID" && ! "$RUN_ID" =~ ^[A-Za-z0-9._-]+$ ]]; then + echo "RUN_ID may only contain letters, numbers, dots, underscores, and hyphens: $RUN_ID" >&2 + exit 1 +fi + +if [[ ! "$ORG_ID" =~ ^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$ ]]; then + echo "ORG_ID must be a UUID: $ORG_ID" >&2 + exit 1 +fi + +if [[ ! "$PROJECT_ID" =~ ^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$ ]]; then + echo "PROJECT_ID must be a UUID: $PROJECT_ID" >&2 + exit 1 +fi + +mkdir -p "$OUT_DIR" + +psql_exec() { + docker exec "$DB_CONTAINER" psql -X -U "$DB_USER" -d "$DB_NAME" "$@" +} + +capture_plan() { + local name="$1" + local title="$2" + local query="$3" + local interpretation="$4" + local file="$OUT_DIR/${STAMP}-${name}.md" + + { + echo "# $title" + echo + echo "Captured: $(date -u +"%Y-%m-%dT%H:%M:%SZ")" + echo "Run ID: ${RUN_ID:-not set}" + echo "Git commit: $(git rev-parse HEAD 2>/dev/null || echo unknown)" + echo "Command: RUN_ID=${RUN_ID:-} ./scripts/capture-query-plans.sh" + echo "Target org_id: $ORG_ID" + echo "Target project_id: $PROJECT_ID" + echo + echo "## PostgreSQL Version" + echo + echo '```text' + psql_exec -Atc "SHOW server_version;" + echo '```' + echo + echo "## Benchmark Run Counts" + echo + echo '```text' + if [[ -n "$RUN_ID" ]]; then + psql_exec -Atc "SELECT 'events_for_run_id=' || count(*) FROM events WHERE properties->>'run_id' = '$RUN_ID'; SELECT 'target_tenant_events_for_run_id=' || count(*) FROM events WHERE properties->>'run_id' = '$RUN_ID' AND org_id = '$ORG_ID' AND project_id = '$PROJECT_ID';" + else + echo "RUN_ID not supplied; run-specific counts not captured." + fi + echo '```' + echo + echo "## Table Row Counts" + echo + echo '```text' + psql_exec -Atc "SELECT 'events=' || count(*) FROM events; SELECT 'daily_aggregates=' || count(*) FROM daily_aggregates; SELECT 'hourly_aggregates=' || count(*) FROM hourly_aggregates; SELECT 'mv_dashboard_metrics=' || count(*) FROM mv_dashboard_metrics;" + echo '```' + echo + echo "## Relevant Indexes" + echo + echo '```text' + psql_exec -Atc "SELECT tablename || ': ' || indexname || ' => ' || indexdef FROM pg_indexes WHERE schemaname = 'public' AND tablename IN ('events', 'daily_aggregates', 'hourly_aggregates', 'mv_dashboard_metrics') ORDER BY tablename, indexname;" + echo '```' + echo + echo "## Event Partitions" + echo + echo '```text' + psql_exec -Atc "SELECT inhrelid::regclass::text FROM pg_inherits WHERE inhparent = 'events'::regclass ORDER BY 1;" + echo '```' + echo + echo "## Query" + echo + echo '```sql' + echo "$query" + echo '```' + echo + echo "## EXPLAIN ANALYZE" + echo + echo '```text' + psql_exec -v ON_ERROR_STOP=1 -c "$query" + echo '```' + echo + echo "## Interpretation" + echo + echo "$interpretation" + } > "$file" + + echo "$file" +} + +capture_cache_note() { + local file="$OUT_DIR/${STAMP}-graphql-cache-path.md" + local evidence_file="${CACHE_EVIDENCE:-docs/benchmarks/evidence/dashboard-cache-${RUN_ID}.json}" + + { + echo "# Dashboard Cache Evidence Note" + echo + echo "Captured: $(date -u +"%Y-%m-%dT%H:%M:%SZ")" + echo "Run ID: ${RUN_ID:-not set}" + echo "Git commit: $(git rev-parse HEAD 2>/dev/null || echo unknown)" + echo "Target org_id: $ORG_ID" + echo "Target project_id: $PROJECT_ID" + echo + echo "## Benchmark Run Counts" + echo + echo '```text' + if [[ -n "$RUN_ID" ]]; then + psql_exec -Atc "SELECT 'events_for_run_id=' || count(*) FROM events WHERE properties->>'run_id' = '$RUN_ID'; SELECT 'target_tenant_events_for_run_id=' || count(*) FROM events WHERE properties->>'run_id' = '$RUN_ID' AND org_id = '$ORG_ID' AND project_id = '$PROJECT_ID';" + else + echo "RUN_ID not supplied; run-specific counts not captured." + fi + echo '```' + echo + echo "## Table Row Counts" + echo + echo '```text' + psql_exec -Atc "SELECT 'events=' || count(*) FROM events; SELECT 'daily_aggregates=' || count(*) FROM daily_aggregates; SELECT 'hourly_aggregates=' || count(*) FROM hourly_aggregates; SELECT 'mv_dashboard_metrics=' || count(*) FROM mv_dashboard_metrics;" + echo '```' + echo + echo "## Relevant Indexes" + echo + echo '```text' + psql_exec -Atc "SELECT tablename || ': ' || indexname || ' => ' || indexdef FROM pg_indexes WHERE schemaname = 'public' AND tablename IN ('events', 'daily_aggregates', 'hourly_aggregates', 'mv_dashboard_metrics') ORDER BY tablename, indexname;" + echo '```' + echo + echo "## GraphQL Query Text" + echo + echo '```graphql' + echo "query DashboardCacheBenchmark(\$orgId: ID!, \$projectId: ID!, \$startDate: Date!, \$endDate: Date!) {" + echo " metrics(orgId: \$orgId, projectId: \$projectId, startDate: \$startDate, endDate: \$endDate) {" + echo " totalEvents" + echo " dailyActiveUsers { date value }" + echo " topEvents { eventName count }" + echo " dateRange { start end }" + echo " }" + echo "}" + echo '```' + echo + echo "## EXPLAIN ANALYZE" + echo + echo "No PostgreSQL EXPLAIN ANALYZE is recorded for cached vs uncached GraphQL timing. The warm path is served through Redis and resolver-level cache behavior, so a PostgreSQL plan would not represent the cached request." + echo + echo "## Dashboard Cache Evidence" + echo + if [[ -n "$RUN_ID" && -f "$evidence_file" ]]; then + echo "Run-scoped cache evidence: \`$evidence_file\`." + elif [[ -n "$RUN_ID" ]]; then + echo "Expected run-scoped cache evidence file was not found: \`$evidence_file\`." + else + echo "Set RUN_ID to link this note to a run-scoped dashboard cache evidence file." + fi + echo + echo "## Interpretation" + echo + echo "Use the dashboard cache JSON evidence for cold and warm GraphQL timings. Use the PostgreSQL plans in the other files from this run for database access paths; do not invent a cached GraphQL EXPLAIN plan." + } > "$file" + + echo "$file" +} + +capture_plan \ + "tenant-dashboard-index-disabled" \ + "Tenant Dashboard Raw Event Query With Index Scans Disabled" \ + "BEGIN; +SET LOCAL enable_indexscan = off; +SET LOCAL enable_bitmapscan = off; +EXPLAIN (ANALYZE, BUFFERS) +SELECT id, event_id, event_name, user_id, session_id, timestamp, properties +FROM events +WHERE org_id = '$ORG_ID' + AND project_id = '$PROJECT_ID' + AND timestamp >= NOW() - INTERVAL '7 days' +ORDER BY timestamp DESC +LIMIT 100; +ROLLBACK;" \ + "Index and bitmap scans are disabled in this session to show the cost shape when PostgreSQL cannot use the tenant/time access path. This is not a dropped-index benchmark; it is planner-controlled evidence for comparison." + +capture_plan \ + "tenant-dashboard-chosen-index" \ + "Tenant Dashboard Raw Event Query With Chosen Indexes" \ + "EXPLAIN (ANALYZE, BUFFERS) +SELECT id, event_id, event_name, user_id, session_id, timestamp, properties +FROM events +WHERE org_id = '$ORG_ID' + AND project_id = '$PROJECT_ID' + AND timestamp >= NOW() - INTERVAL '7 days' +ORDER BY timestamp DESC +LIMIT 100;" \ + "This is the normal tenant dashboard raw-event access path for this run. Use this plan to verify whether PostgreSQL chooses the tenant/project/timestamp index or another timestamp-oriented partition index, and how many partition children are touched." + +capture_plan \ + "partition-pruning-24h" \ + "Partition Pruning Over Last 24 Hours" \ + "EXPLAIN (ANALYZE, BUFFERS) +SELECT COUNT(*) AS events_in_window +FROM events +WHERE org_id = '$ORG_ID' + AND project_id = '$PROJECT_ID' + AND timestamp >= NOW() - INTERVAL '24 hours' + AND timestamp < NOW();" \ + "This query demonstrates the partition set touched for a narrow dashboard window. Compare the child tables and subplans removed here with the 30-day plan from the same run." + +capture_plan \ + "partition-pruning-30d" \ + "Partition Pruning Over Last 30 Days" \ + "EXPLAIN (ANALYZE, BUFFERS) +SELECT COUNT(*) AS events_in_window +FROM events +WHERE org_id = '$ORG_ID' + AND project_id = '$PROJECT_ID' + AND timestamp >= NOW() - INTERVAL '30 days' + AND timestamp < NOW();" \ + "This 30-day query is the broader scan/pruning comparison for the 24-hour plan from the same run. With monthly local partitions, it can touch more than one child partition depending on the date and available benchmark rows." + +capture_plan \ + "aggregate-daily-dashboard" \ + "Daily Aggregate Dashboard Query" \ + "EXPLAIN (ANALYZE, BUFFERS) +SELECT date, metric_name, SUM(metric_value) AS metric_value +FROM daily_aggregates +WHERE org_id = '$ORG_ID' + AND project_id = '$PROJECT_ID' + AND metric_name IN ('dau', 'event_count', 'total_events') + AND date >= CURRENT_DATE - INTERVAL '90 days' +GROUP BY date, metric_name +ORDER BY date ASC, metric_name ASC;" \ + "This plan is the dashboard aggregate read path. It should touch aggregate rows rather than raw event rows." + +capture_plan \ + "materialized-dashboard" \ + "Materialized Dashboard Metrics Query" \ + "EXPLAIN (ANALYZE, BUFFERS) +SELECT date, event_count, unique_users, unique_sessions, events_by_name +FROM mv_dashboard_metrics +WHERE org_id = '$ORG_ID' + AND project_id = '$PROJECT_ID' + AND date >= CURRENT_DATE - INTERVAL '30 days' +ORDER BY date DESC;" \ + "This plan captures the materialized dashboard read path. It is not Redis cache evidence; Redis cache timing is captured through GraphQL/load-test summaries." + +capture_cache_note diff --git a/scripts/generate-api-key-hash.js b/scripts/generate-api-key-hash.js index cc9c5c4..d7aa645 100644 --- a/scripts/generate-api-key-hash.js +++ b/scripts/generate-api-key-hash.js @@ -1,4 +1,4 @@ -const bcrypt = require('bcrypt'); +const bcrypt = require('bcryptjs'); // Generate proper API key hash for "demo_key_change_this" const apiKey = 'demo_key_change_this'; diff --git a/scripts/generate-skewed-events.ts b/scripts/generate-skewed-events.ts new file mode 100644 index 0000000..6662a61 --- /dev/null +++ b/scripts/generate-skewed-events.ts @@ -0,0 +1,387 @@ +#!/usr/bin/env node + +import { writeFileSync } from 'node:fs'; +import { randomUUID } from 'node:crypto'; + +type OutputMode = 'jsonl' | 'direct'; +type TenantClass = 'hot' | 'medium' | 'quiet'; + +interface Options { + tenants: number; + events: number; + days: number; + hotTenantRatio: number; + lateArrivalRatio: number; + duplicateRatio: number; + output: OutputMode; + apiUrl: string; + apiKey: string; + batchSize: number; + burstRatio: number; + evidenceFile?: string; +} + +interface Tenant { + tenantId: string; + orgId: string; + projectId: string; + class: TenantClass; + weight: number; +} + +interface GeneratedEvent { + event_name: string; + user_id: string; + session_id: string; + project_id: string; + properties: Record; + timestamp: string; +} + +const EVENT_NAMES = [ + 'page_view', + 'button_click', + 'form_submit', + 'signup', + 'login', + 'purchase', + 'subscription_started', + 'report_exported', +]; + +const PAGES = ['/home', '/pricing', '/docs', '/dashboard', '/settings', '/reports']; +const SEGMENTS = ['free', 'pro', 'team', 'enterprise']; +const REGIONS = ['na', 'eu', 'apac', 'latam']; +const DEVICES = ['web', 'mobile', 'desktop']; +const PRODUCTS = ['core', 'insights', 'automation']; + +function usage(): never { + console.error(`Usage: + pnpm benchmark:generate -- --tenants 100 --events 100000 --days 30 --hot-tenant-ratio 0.6 --late-arrival-ratio 0.05 --duplicate-ratio 0.01 --output jsonl + +Options: + --tenants Number of synthetic tenants. Default: 100 + --events Number of events to generate before duplicates. Default: 10000 + --days <7|30|90|n> Timestamp spread in days. Default: 30 + --hot-tenant-ratio <0..1> Share of events assigned to hot tenants. Default: 0.6 + --late-arrival-ratio <0..1> Share of events shifted earlier to simulate late arrivals. Default: 0.05 + --duplicate-ratio <0..1> Share of generated events duplicated. Default: 0.01 + --output Print JSONL to stdout or send to ingest API. Default: jsonl + --batch-size Direct ingest batch size. Default: 100 + --burst-ratio <0..1> Share of events concentrated into burst windows. Default: 0.15 + --evidence-file Optional metadata JSON output path. + +Environment for --output direct: + API_URL=http://localhost:3001 + API_KEY=demo_key_change_this`); + process.exit(1); +} + +function parseNumber(value: string | undefined, name: string): number { + if (!value) usage(); + const parsed = Number(value); + if (!Number.isFinite(parsed)) { + throw new Error(`${name} must be a number`); + } + return parsed; +} + +function parseOptions(argv: string[]): Options { + const options: Options = { + tenants: 100, + events: 10000, + days: 30, + hotTenantRatio: 0.6, + lateArrivalRatio: 0.05, + duplicateRatio: 0.01, + output: 'jsonl', + apiUrl: process.env.API_URL || 'http://localhost:3001', + apiKey: process.env.API_KEY || 'demo_key_change_this', + batchSize: 100, + burstRatio: 0.15, + }; + + for (let i = 0; i < argv.length; i += 1) { + const arg = argv[i]; + switch (arg) { + case '--': + break; + case '--help': + case '-h': + usage(); + break; + case '--tenants': + options.tenants = parseNumber(argv[++i], arg); + break; + case '--events': + options.events = parseNumber(argv[++i], arg); + break; + case '--days': + options.days = parseNumber(argv[++i], arg); + break; + case '--hot-tenant-ratio': + options.hotTenantRatio = parseNumber(argv[++i], arg); + break; + case '--late-arrival-ratio': + options.lateArrivalRatio = parseNumber(argv[++i], arg); + break; + case '--duplicate-ratio': + options.duplicateRatio = parseNumber(argv[++i], arg); + break; + case '--output': { + const value = argv[++i] as OutputMode; + if (value !== 'jsonl' && value !== 'direct') { + throw new Error('--output must be jsonl or direct'); + } + options.output = value; + break; + } + case '--batch-size': + options.batchSize = parseNumber(argv[++i], arg); + break; + case '--burst-ratio': + options.burstRatio = parseNumber(argv[++i], arg); + break; + case '--evidence-file': + options.evidenceFile = argv[++i]; + break; + default: + throw new Error(`Unknown argument: ${arg}`); + } + } + + if (options.tenants < 1) throw new Error('--tenants must be at least 1'); + if (options.events < 1) throw new Error('--events must be at least 1'); + if (options.days < 1) throw new Error('--days must be at least 1'); + if (options.batchSize < 1 || options.batchSize > 1000) { + throw new Error('--batch-size must be between 1 and 1000'); + } + for (const [name, value] of [ + ['--hot-tenant-ratio', options.hotTenantRatio], + ['--late-arrival-ratio', options.lateArrivalRatio], + ['--duplicate-ratio', options.duplicateRatio], + ['--burst-ratio', options.burstRatio], + ] as const) { + if (value < 0 || value > 1) throw new Error(`${name} must be between 0 and 1`); + } + + return options; +} + +function stableUuid(prefix: number, offset: number): string { + const hex = (prefix * 100000 + offset).toString(16).padStart(12, '0').slice(-12); + return `00000000-0000-4000-8000-${hex}`; +} + +function buildTenants(count: number): Tenant[] { + const hotCount = Math.max(1, Math.ceil(count * 0.05)); + const mediumCount = Math.max(1, Math.ceil(count * 0.20)); + + return Array.from({ length: count }, (_, index) => { + let tenantClass: TenantClass = 'quiet'; + let weight = 1; + if (index < hotCount) { + tenantClass = 'hot'; + weight = 100; + } else if (index < hotCount + mediumCount) { + tenantClass = 'medium'; + weight = 15; + } + + return { + tenantId: `tenant_${String(index + 1).padStart(3, '0')}`, + orgId: stableUuid(10, index + 1), + projectId: stableUuid(20, index + 1), + class: tenantClass, + weight, + }; + }); +} + +function weightedPick(items: T[]): T { + const total = items.reduce((sum, item) => sum + item.weight, 0); + let cursor = Math.random() * total; + for (const item of items) { + cursor -= item.weight; + if (cursor <= 0) return item; + } + return items[items.length - 1]; +} + +function pick(items: T[]): T { + return items[Math.floor(Math.random() * items.length)]; +} + +function randomTimestamp(options: Options): { timestamp: string; lateArrival: boolean; burst: boolean } { + const now = Date.now(); + const windowMs = options.days * 24 * 60 * 60 * 1000; + let offset = Math.random() * windowMs; + const burst = Math.random() < options.burstRatio; + + if (burst) { + const burstWindowMs = Math.min(60 * 60 * 1000, windowMs); + const burstAnchor = Math.floor(Math.random() * Math.max(1, options.days)) * 24 * 60 * 60 * 1000; + offset = Math.min(windowMs, burstAnchor + Math.random() * burstWindowMs); + } + + const lateArrival = Math.random() < options.lateArrivalRatio; + if (lateArrival) { + offset = Math.min(windowMs, offset + (1 + Math.random() * 6) * 24 * 60 * 60 * 1000); + } + + return { + timestamp: new Date(now - offset).toISOString(), + lateArrival, + burst, + }; +} + +function generateBaseEvent(tenant: Tenant, options: Options, sequence: number): GeneratedEvent { + const eventName = pick(EVENT_NAMES); + const timing = randomTimestamp(options); + const value = eventName === 'purchase' ? Math.round((20 + Math.random() * 500) * 100) / 100 : 1; + + return { + event_name: eventName, + user_id: `${tenant.tenantId}_user_${Math.floor(Math.random() * 5000)}`, + session_id: randomUUID(), + project_id: tenant.projectId, + properties: { + tenant_id: tenant.tenantId, + synthetic_org_id: tenant.orgId, + synthetic_project_id: tenant.projectId, + tenant_class: tenant.class, + sequence, + value, + page: pick(PAGES), + segment: pick(SEGMENTS), + region: pick(REGIONS), + device: pick(DEVICES), + product: pick(PRODUCTS), + late_arrival: timing.lateArrival, + burst: timing.burst, + generated_at: new Date().toISOString(), + }, + timestamp: timing.timestamp, + }; +} + +function generateEvents(options: Options): GeneratedEvent[] { + const tenants = buildTenants(options.tenants); + const hotTenants = tenants.filter((tenant) => tenant.class === 'hot'); + const nonHotTenants = tenants.filter((tenant) => tenant.class !== 'hot'); + const events: GeneratedEvent[] = []; + + for (let i = 0; i < options.events; i += 1) { + const tenantPool = Math.random() < options.hotTenantRatio ? hotTenants : nonHotTenants; + const tenant = weightedPick(tenantPool.length ? tenantPool : tenants); + events.push(generateBaseEvent(tenant, options, i + 1)); + } + + const duplicateCount = Math.floor(options.events * options.duplicateRatio); + for (let i = 0; i < duplicateCount; i += 1) { + const source = pick(events); + events.push({ + ...source, + properties: { + ...source.properties, + duplicate: true, + duplicate_source_sequence: source.properties.sequence, + }, + }); + } + + return events; +} + +async function sendDirect(events: GeneratedEvent[], options: Options) { + let accepted = 0; + let failed = 0; + + for (let i = 0; i < events.length; i += options.batchSize) { + const batch = events.slice(i, i + options.batchSize); + const response = await fetch(`${options.apiUrl}/api/v1/events/batch`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-API-Key': options.apiKey, + }, + body: JSON.stringify({ events: batch }), + }); + + if (response.ok) { + accepted += batch.length; + } else { + failed += batch.length; + console.error(`Batch ${i / options.batchSize + 1} failed: ${response.status} ${response.statusText}`); + } + } + + console.error(`Direct ingest complete: ${accepted} accepted, ${failed} failed`); +} + +function summarize(events: GeneratedEvent[], options: Options) { + const tenantCounts = new Map(); + const classCounts = new Map(); + let lateArrivals = 0; + let duplicates = 0; + let bursts = 0; + + for (const event of events) { + const tenantId = String(event.properties.tenant_id); + const tenantClass = String(event.properties.tenant_class); + tenantCounts.set(tenantId, (tenantCounts.get(tenantId) || 0) + 1); + classCounts.set(tenantClass, (classCounts.get(tenantClass) || 0) + 1); + if (event.properties.late_arrival) lateArrivals += 1; + if (event.properties.duplicate) duplicates += 1; + if (event.properties.burst) bursts += 1; + } + + const hottestTenant = [...tenantCounts.entries()].sort((a, b) => b[1] - a[1])[0]; + + return { + generated_at: new Date().toISOString(), + options: { + tenants: options.tenants, + events_requested: options.events, + days: options.days, + hot_tenant_ratio: options.hotTenantRatio, + late_arrival_ratio: options.lateArrivalRatio, + duplicate_ratio: options.duplicateRatio, + burst_ratio: options.burstRatio, + output: options.output, + }, + events_emitted: events.length, + late_arrivals: lateArrivals, + duplicates, + bursts, + class_counts: Object.fromEntries(classCounts), + hottest_tenant: hottestTenant ? { tenant_id: hottestTenant[0], events: hottestTenant[1] } : null, + }; +} + +async function main() { + const options = parseOptions(process.argv.slice(2)); + const events = generateEvents(options); + const summary = summarize(events, options); + + if (options.evidenceFile) { + writeFileSync(options.evidenceFile, `${JSON.stringify(summary, null, 2)}\n`); + } + + if (options.output === 'jsonl') { + for (const event of events) { + process.stdout.write(`${JSON.stringify(event)}\n`); + } + process.stderr.write(`${JSON.stringify(summary, null, 2)}\n`); + return; + } + + await sendDirect(events, options); + process.stderr.write(`${JSON.stringify(summary, null, 2)}\n`); +} + +main().catch((error) => { + console.error(error instanceof Error ? error.message : error); + process.exit(1); +}); diff --git a/scripts/health-check.sh b/scripts/health-check.sh index 8b61456..f9f540f 100755 --- a/scripts/health-check.sh +++ b/scripts/health-check.sh @@ -23,10 +23,10 @@ fi # Check Kafka echo -n "Kafka: " -if docker ps | grep -q "pulseops-kafka"; then +if docker exec pulseops-kafka /opt/kafka/bin/kafka-topics.sh --bootstrap-server localhost:9092 --list > /dev/null 2>&1; then echo "✓ Running" else - echo "✗ Not running" + echo "✗ Not responding" exit 1 fi @@ -36,6 +36,7 @@ if curl -s http://localhost:3001/health > /dev/null 2>&1; then echo "✓ Healthy (http://localhost:3001)" else echo "✗ Not responding" + exit 1 fi # Check GraphQL API @@ -44,6 +45,16 @@ if curl -s http://localhost:3002/health > /dev/null 2>&1; then echo "✓ Healthy (http://localhost:3002)" else echo "✗ Not responding" + exit 1 +fi + +# Check Worker +echo -n "Worker: " +if docker ps --filter "name=pulseops-worker" --filter "status=running" --format "{{.Names}}" | grep -q "pulseops-worker"; then + echo "✓ Running" +else + echo "✗ Not running" + exit 1 fi echo "" diff --git a/scripts/init-db.sql b/scripts/init-db.sql index 09cced7..00c5042 100644 --- a/scripts/init-db.sql +++ b/scripts/init-db.sql @@ -1,6 +1,9 @@ --- Initialize database with time-series partitioning +-- Initialize PulseOps database foundation. +-- Raw events are partitioned by event timestamp because dashboard, retention, +-- and benchmark queries operate on event time, not ingest time. + +CREATE EXTENSION IF NOT EXISTS pgcrypto; --- Organizations table CREATE TABLE IF NOT EXISTS organizations ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), name VARCHAR(255) NOT NULL, @@ -8,99 +11,343 @@ CREATE TABLE IF NOT EXISTS organizations ( updated_at TIMESTAMPTZ DEFAULT NOW() ); --- Projects table CREATE TABLE IF NOT EXISTS projects ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, name VARCHAR(255) NOT NULL, created_at TIMESTAMPTZ DEFAULT NOW(), - updated_at TIMESTAMPTZ DEFAULT NOW() + updated_at TIMESTAMPTZ DEFAULT NOW(), + CONSTRAINT projects_id_org_id_key UNIQUE (id, org_id) ); --- API Keys table +CREATE INDEX IF NOT EXISTS idx_projects_org ON projects(org_id); +CREATE INDEX IF NOT EXISTS idx_projects_tenant ON projects(tenant_id); + CREATE TABLE IF NOT EXISTS api_keys ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID, key_hash VARCHAR(255) NOT NULL UNIQUE, + key_fingerprint CHAR(64) NOT NULL UNIQUE, name VARCHAR(255), created_at TIMESTAMPTZ DEFAULT NOW(), last_used_at TIMESTAMPTZ, - active BOOLEAN DEFAULT TRUE + active BOOLEAN DEFAULT TRUE, + CONSTRAINT api_keys_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE, + CONSTRAINT api_keys_fingerprint_hex + CHECK (key_fingerprint ~ '^[0-9a-f]{64}$') ); -CREATE INDEX idx_api_keys_org ON api_keys(org_id); +CREATE INDEX IF NOT EXISTS idx_api_keys_org ON api_keys(org_id); +CREATE INDEX IF NOT EXISTS idx_api_keys_project ON api_keys(project_id) WHERE project_id IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_api_keys_active_fingerprint ON api_keys(key_fingerprint) WHERE active = true; +CREATE INDEX IF NOT EXISTS idx_api_keys_tenant_project ON api_keys(tenant_id, project_id); --- Events table (partitioned by timestamp) CREATE TABLE IF NOT EXISTS events ( id BIGSERIAL, - org_id UUID NOT NULL, + event_id UUID NOT NULL DEFAULT gen_random_uuid(), + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, project_id UUID NOT NULL, event_name VARCHAR(255) NOT NULL, user_id VARCHAR(255), session_id UUID, - properties JSONB, + properties JSONB DEFAULT '{}', timestamp TIMESTAMPTZ NOT NULL, + received_at TIMESTAMPTZ DEFAULT NOW(), created_at TIMESTAMPTZ DEFAULT NOW(), - PRIMARY KEY (id, timestamp) + PRIMARY KEY (id, timestamp), + CONSTRAINT events_org_project_event_id_timestamp_key UNIQUE (org_id, project_id, event_id, timestamp), + CONSTRAINT events_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE ) PARTITION BY RANGE (timestamp); --- Create partitions for 2026 -CREATE TABLE IF NOT EXISTS events_2026_01 PARTITION OF events - FOR VALUES FROM ('2026-01-01') TO ('2026-02-01'); -CREATE TABLE IF NOT EXISTS events_2026_02 PARTITION OF events - FOR VALUES FROM ('2026-02-01') TO ('2026-03-01'); -CREATE TABLE IF NOT EXISTS events_2026_03 PARTITION OF events - FOR VALUES FROM ('2026-03-01') TO ('2026-04-01'); -CREATE TABLE IF NOT EXISTS events_2026_04 PARTITION OF events - FOR VALUES FROM ('2026-04-01') TO ('2026-05-01'); -CREATE TABLE IF NOT EXISTS events_2026_05 PARTITION OF events - FOR VALUES FROM ('2026-05-01') TO ('2026-06-01'); -CREATE TABLE IF NOT EXISTS events_2026_06 PARTITION OF events - FOR VALUES FROM ('2026-06-01') TO ('2026-07-01'); -CREATE TABLE IF NOT EXISTS events_2026_07 PARTITION OF events - FOR VALUES FROM ('2026-07-01') TO ('2026-08-01'); -CREATE TABLE IF NOT EXISTS events_2026_08 PARTITION OF events - FOR VALUES FROM ('2026-08-01') TO ('2026-09-01'); -CREATE TABLE IF NOT EXISTS events_2026_09 PARTITION OF events - FOR VALUES FROM ('2026-09-01') TO ('2026-10-01'); -CREATE TABLE IF NOT EXISTS events_2026_10 PARTITION OF events - FOR VALUES FROM ('2026-10-01') TO ('2026-11-01'); -CREATE TABLE IF NOT EXISTS events_2026_11 PARTITION OF events - FOR VALUES FROM ('2026-11-01') TO ('2026-12-01'); -CREATE TABLE IF NOT EXISTS events_2026_12 PARTITION OF events - FOR VALUES FROM ('2026-12-01') TO ('2027-01-01'); - --- Indexes for events +CREATE OR REPLACE FUNCTION create_events_monthly_partitions(window_start DATE, window_end DATE) +RETURNS void AS $$ +DECLARE + partition_start DATE := DATE_TRUNC('month', window_start)::DATE; + partition_end DATE; + partition_name TEXT; +BEGIN + IF window_end <= window_start THEN + RAISE EXCEPTION 'window_end must be after window_start'; + END IF; + + WHILE partition_start < window_end LOOP + partition_end := (partition_start + INTERVAL '1 month')::DATE; + partition_name := 'events_' || TO_CHAR(partition_start, 'YYYY_MM'); + + EXECUTE format( + 'CREATE TABLE IF NOT EXISTS %I PARTITION OF events FOR VALUES FROM (%L) TO (%L)', + partition_name, + partition_start::TIMESTAMPTZ, + partition_end::TIMESTAMPTZ + ); + + partition_start := partition_end; + END LOOP; +END; +$$ LANGUAGE plpgsql; + +-- Covers the 7/30/90 day benchmark windows plus near-future ingestion. +SELECT create_events_monthly_partitions( + (CURRENT_DATE - INTERVAL '90 days')::DATE, + (CURRENT_DATE + INTERVAL '90 days')::DATE +); + CREATE INDEX IF NOT EXISTS idx_events_org_time ON events(org_id, timestamp DESC); CREATE INDEX IF NOT EXISTS idx_events_project_time ON events(project_id, timestamp DESC); -CREATE INDEX IF NOT EXISTS idx_events_session ON events(session_id); +CREATE INDEX IF NOT EXISTS idx_events_org_project_time ON events(org_id, project_id, timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_events_event_name_time ON events(event_name, timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_events_user_time ON events(org_id, project_id, user_id, timestamp DESC) WHERE user_id IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_events_session ON events(session_id) WHERE session_id IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_events_event_id ON events(org_id, project_id, event_id); +CREATE INDEX IF NOT EXISTS idx_events_properties_gin ON events USING GIN (properties); + +CREATE TABLE IF NOT EXISTS event_dedup_keys ( + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID NOT NULL, + event_id UUID NOT NULL, + first_seen_at TIMESTAMPTZ DEFAULT NOW(), + PRIMARY KEY (org_id, project_id, event_id), + CONSTRAINT event_dedup_keys_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_event_dedup_tenant_seen + ON event_dedup_keys(tenant_id, first_seen_at DESC); --- Daily aggregates table CREATE TABLE IF NOT EXISTS daily_aggregates ( id BIGSERIAL PRIMARY KEY, - org_id UUID NOT NULL, + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, project_id UUID NOT NULL, metric_name VARCHAR(100) NOT NULL, - metric_value NUMERIC, + metric_value NUMERIC NOT NULL DEFAULT 0, dimensions JSONB DEFAULT '{}', date DATE NOT NULL, computed_at TIMESTAMPTZ DEFAULT NOW(), + CONSTRAINT daily_aggregates_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE, CONSTRAINT unique_aggregate UNIQUE(org_id, project_id, metric_name, date, dimensions) ); CREATE INDEX IF NOT EXISTS idx_aggregates_lookup ON daily_aggregates(org_id, project_id, date); CREATE INDEX IF NOT EXISTS idx_aggregates_metric ON daily_aggregates(metric_name, date); +CREATE INDEX IF NOT EXISTS idx_aggregates_tenant_metric_date ON daily_aggregates(tenant_id, metric_name, date DESC); + +CREATE OR REPLACE VIEW daily_metrics AS +SELECT + id, + org_id, + tenant_id, + project_id, + metric_name, + metric_value, + date::timestamptz AS bucket_start, + dimensions, + computed_at +FROM daily_aggregates; + +CREATE TABLE IF NOT EXISTS daily_active_users ( + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID NOT NULL, + date DATE NOT NULL, + user_id VARCHAR(255) NOT NULL, + first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + event_count INTEGER NOT NULL DEFAULT 1, + PRIMARY KEY (org_id, project_id, date, user_id), + CONSTRAINT daily_active_users_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_daily_active_users_project_date + ON daily_active_users(org_id, project_id, date); +CREATE INDEX IF NOT EXISTS idx_daily_active_users_tenant_date + ON daily_active_users(tenant_id, project_id, date); + +CREATE TABLE IF NOT EXISTS hourly_aggregates ( + id BIGSERIAL PRIMARY KEY, + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID NOT NULL, + metric_name VARCHAR(100) NOT NULL, + metric_value NUMERIC NOT NULL DEFAULT 0, + dimensions JSONB DEFAULT '{}', + hour TIMESTAMPTZ NOT NULL, + computed_at TIMESTAMPTZ DEFAULT NOW(), + CONSTRAINT hourly_aggregates_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE, + CONSTRAINT unique_hourly_aggregate UNIQUE(org_id, project_id, metric_name, hour, dimensions) +); + +CREATE INDEX IF NOT EXISTS idx_hourly_aggregates_lookup + ON hourly_aggregates(org_id, project_id, hour DESC); +CREATE INDEX IF NOT EXISTS idx_hourly_aggregates_metric + ON hourly_aggregates(metric_name, hour DESC); + +CREATE OR REPLACE VIEW hourly_metrics AS +SELECT + id, + org_id, + tenant_id, + project_id, + metric_name, + metric_value, + hour AS bucket_start, + dimensions, + computed_at +FROM hourly_aggregates; + +CREATE TABLE IF NOT EXISTS ingest_batches ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID, + batch_id VARCHAR(255), + status VARCHAR(50) NOT NULL DEFAULT 'accepted', + event_count INTEGER NOT NULL DEFAULT 0, + accepted_count INTEGER NOT NULL DEFAULT 0, + rejected_count INTEGER NOT NULL DEFAULT 0, + started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + completed_at TIMESTAMPTZ, + metadata JSONB DEFAULT '{}', + CONSTRAINT ingest_batches_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_ingest_batches_tenant_time + ON ingest_batches(org_id, project_id, started_at DESC); + +CREATE TABLE IF NOT EXISTS processing_state ( + name VARCHAR(255) PRIMARY KEY, + state JSONB NOT NULL DEFAULT '{}', + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE TABLE IF NOT EXISTS worker_offsets ( + consumer_group VARCHAR(255) NOT NULL, + topic VARCHAR(255) NOT NULL, + partition_id INTEGER NOT NULL, + offset_value BIGINT NOT NULL, + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + PRIMARY KEY (consumer_group, topic, partition_id) +); + +CREATE TABLE IF NOT EXISTS benchmark_runs ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + org_id UUID REFERENCES organizations(id) ON DELETE SET NULL, + project_id UUID, + name VARCHAR(255) NOT NULL, + window_days INTEGER NOT NULL CHECK (window_days IN (7, 30, 90)), + started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + completed_at TIMESTAMPTZ, + parameters JSONB DEFAULT '{}', + results JSONB DEFAULT '{}', + notes TEXT, + CONSTRAINT benchmark_runs_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE SET NULL +); + +CREATE TABLE IF NOT EXISTS articles ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID NOT NULL, + external_id VARCHAR(255), + url TEXT, + title TEXT NOT NULL, + source VARCHAR(255), + author VARCHAR(255), + published_at TIMESTAMPTZ, + content_hash CHAR(64), + metadata JSONB DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE (id, org_id, project_id), + UNIQUE (org_id, project_id, external_id), + CONSTRAINT articles_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE, + CONSTRAINT articles_content_hash_hex + CHECK (content_hash IS NULL OR content_hash ~ '^[0-9a-f]{64}$') +); + +CREATE INDEX IF NOT EXISTS idx_articles_tenant_published + ON articles(org_id, project_id, published_at DESC); +CREATE INDEX IF NOT EXISTS idx_articles_content_hash + ON articles(content_hash) WHERE content_hash IS NOT NULL; + +CREATE TABLE IF NOT EXISTS article_evidence ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID NOT NULL, + article_id UUID NOT NULL, + event_id UUID, + evidence_type VARCHAR(100) NOT NULL, + metric_name VARCHAR(100), + metric_value NUMERIC, + dimensions JSONB DEFAULT '{}', + observed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + source_event_timestamp TIMESTAMPTZ, + properties JSONB DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT article_evidence_article_tenant_fk + FOREIGN KEY (article_id, org_id, project_id) REFERENCES articles(id, org_id, project_id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_article_evidence_tenant_observed + ON article_evidence(org_id, project_id, observed_at DESC); +CREATE INDEX IF NOT EXISTS idx_article_evidence_article + ON article_evidence(article_id, observed_at DESC); +CREATE INDEX IF NOT EXISTS idx_article_evidence_event + ON article_evidence(org_id, project_id, event_id) WHERE event_id IS NOT NULL; + +CREATE TABLE IF NOT EXISTS tenant_usage_snapshots ( + id BIGSERIAL PRIMARY KEY, + org_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE, + tenant_id UUID GENERATED ALWAYS AS (org_id) STORED, + project_id UUID, + captured_at TIMESTAMPTZ DEFAULT NOW(), + event_count BIGINT NOT NULL DEFAULT 0, + raw_bytes BIGINT NOT NULL DEFAULT 0, + dashboard_query_count BIGINT NOT NULL DEFAULT 0, + CONSTRAINT tenant_usage_project_org_fk + FOREIGN KEY (project_id, org_id) REFERENCES projects(id, org_id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_tenant_usage_tenant_time + ON tenant_usage_snapshots(tenant_id, captured_at DESC); --- Insert demo organization and project -INSERT INTO organizations (id, name) VALUES - ('00000000-0000-0000-0000-000000000001', 'Demo Organization') -ON CONFLICT DO NOTHING; +INSERT INTO organizations (id, name) +VALUES ('00000000-0000-0000-0000-000000000001', 'Demo Organization') +ON CONFLICT (id) DO NOTHING; -INSERT INTO projects (id, org_id, name) VALUES - ('00000000-0000-0000-0000-000000000002', '00000000-0000-0000-0000-000000000001', 'Demo Project') -ON CONFLICT DO NOTHING; +INSERT INTO projects (id, org_id, name) +VALUES ('00000000-0000-0000-0000-000000000002', '00000000-0000-0000-0000-000000000001', 'Demo Project') +ON CONFLICT (id) DO NOTHING; --- Create demo API key (hash of 'demo_key_change_this') -INSERT INTO api_keys (org_id, key_hash, name) VALUES - ('00000000-0000-0000-0000-000000000001', '$2b$10$demo_hash_placeholder', 'Demo API Key') -ON CONFLICT DO NOTHING; +-- Actual key: demo_key_change_this +-- key_hash verified by scripts/test-hash.js. +INSERT INTO api_keys (id, org_id, project_id, key_hash, key_fingerprint, name, active) +VALUES ( + '00000000-0000-0000-0000-000000000003', + '00000000-0000-0000-0000-000000000001', + '00000000-0000-0000-0000-000000000002', + '$2b$10$NQbr7kAR4n9lQ1/JwS0jn.sEhYgR4Pi5VejnFudNV/LNpk9DIHSOy', + '3543274f3d56b4b5860dd25cfb944d60e953fbe9c67f3de2d1deaf2befae1b7f', + 'Demo API Key', + TRUE +) +ON CONFLICT (key_fingerprint) DO UPDATE SET + org_id = EXCLUDED.org_id, + project_id = EXCLUDED.project_id, + key_hash = EXCLUDED.key_hash, + name = EXCLUDED.name, + active = EXCLUDED.active; diff --git a/scripts/measure-dashboard-cache.ts b/scripts/measure-dashboard-cache.ts new file mode 100644 index 0000000..34ed58e --- /dev/null +++ b/scripts/measure-dashboard-cache.ts @@ -0,0 +1,202 @@ +#!/usr/bin/env node + +import { mkdir, writeFile } from 'node:fs/promises'; +import { dirname, resolve } from 'node:path'; +import { performance } from 'node:perf_hooks'; +import { config } from 'dotenv'; +import Redis from 'ioredis'; + +interface Options { + graphqlUrl: string; + redisUrl: string; + apiKey: string; + orgId: string; + projectId: string; + startDate: string; + endDate: string; + warmIterations: number; + runId: string; + output: string; +} + +interface Measurement { + label: string; + duration_ms: number; + status: number; + graphql_errors: number; + total_events: number | null; +} + +const repoRoot = resolve(__dirname, '..'); +config({ path: resolve(repoRoot, '.env') }); + +function argValue(argv: string[], name: string, fallback: string) { + const index = argv.indexOf(name); + return index === -1 ? fallback : argv[index + 1]; +} + +function parseOptions(argv: string[]): Options { + const runId = argValue( + argv, + '--run-id', + process.env.RUN_ID || new Date().toISOString().replace(/[:.]/g, '-') + ); + + return { + graphqlUrl: argValue(argv, '--graphql-url', process.env.GRAPHQL_URL || 'http://localhost:3002/graphql'), + redisUrl: argValue(argv, '--redis-url', process.env.REDIS_URL || 'redis://localhost:6379'), + apiKey: argValue(argv, '--api-key', process.env.API_KEY || 'demo_key_change_this'), + orgId: argValue(argv, '--org-id', process.env.ORG_ID || '00000000-0000-0000-0000-000000000001'), + projectId: argValue(argv, '--project-id', process.env.PROJECT_ID || '00000000-0000-0000-0000-000000000002'), + startDate: argValue(argv, '--start-date', process.env.START_DATE || '2026-06-01'), + endDate: argValue(argv, '--end-date', process.env.END_DATE || '2026-06-30'), + warmIterations: Number(argValue(argv, '--warm-iterations', process.env.WARM_ITERATIONS || '10')), + runId, + output: argValue(argv, '--output', `docs/benchmarks/evidence/dashboard-cache-${runId}.json`), + }; +} + +const query = ` +query DashboardCacheBenchmark($orgId: ID!, $projectId: ID!, $startDate: Date!, $endDate: Date!) { + metrics(orgId: $orgId, projectId: $projectId, startDate: $startDate, endDate: $endDate) { + totalEvents + dailyActiveUsers { date value } + topEvents { eventName count } + dateRange { start end } + } +}`; + +async function matchingKeys(redis: Redis, pattern: string) { + let cursor = '0'; + const keys: string[] = []; + + do { + const [nextCursor, batch] = await redis.scan(cursor, 'MATCH', pattern, 'COUNT', '100'); + cursor = nextCursor; + keys.push(...batch); + } while (cursor !== '0'); + + return keys; +} + +async function clearDashboardCache(redis: Redis, options: Options) { + const version = (await redis.get(`cache_version:${options.orgId}:${options.projectId}`)) || '0'; + const pattern = `*:${options.orgId}:${options.projectId}:v${version}:*`; + const keys = await matchingKeys(redis, pattern); + + if (keys.length > 0) { + await redis.del(...keys); + } + + return { version, deletedKeys: keys }; +} + +async function measure(options: Options, label: string): Promise { + const startedAt = performance.now(); + const response = await fetch(options.graphqlUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-API-Key': options.apiKey, + }, + body: JSON.stringify({ + query, + variables: { + orgId: options.orgId, + projectId: options.projectId, + startDate: options.startDate, + endDate: options.endDate, + }, + }), + }); + const durationMs = performance.now() - startedAt; + const body = await response.json() as any; + + return { + label, + duration_ms: Number(durationMs.toFixed(3)), + status: response.status, + graphql_errors: Array.isArray(body.errors) ? body.errors.length : 0, + total_events: body.data?.metrics?.totalEvents ?? null, + }; +} + +function percentile(values: number[], percentileValue: number) { + if (values.length === 0) return null; + const sorted = [...values].sort((a, b) => a - b); + const index = Math.min(sorted.length - 1, Math.ceil((percentileValue / 100) * sorted.length) - 1); + return Number(sorted[index].toFixed(3)); +} + +async function main() { + const options = parseOptions(process.argv.slice(2).filter((arg) => arg !== '--')); + + if (!Number.isInteger(options.warmIterations) || options.warmIterations < 1) { + throw new Error('--warm-iterations must be a positive integer'); + } + + const redis = new Redis(options.redisUrl); + + try { + const before = await clearDashboardCache(redis, options); + const cold = await measure(options, 'cold-cache-miss'); + const keysAfterCold = await matchingKeys( + redis, + `*:${options.orgId}:${options.projectId}:v${before.version}:*` + ); + + const warm: Measurement[] = []; + for (let i = 0; i < options.warmIterations; i += 1) { + warm.push(await measure(options, `warm-cache-hit-${i + 1}`)); + } + + const warmDurations = warm.map((item) => item.duration_ms); + const output = { + run_id: options.runId, + captured_at: new Date().toISOString(), + graphql_url: options.graphqlUrl, + org_id: options.orgId, + project_id: options.projectId, + date_range: { + start: options.startDate, + end: options.endDate, + }, + cache_namespace: { + version: before.version, + deleted_keys_before_cold_run: before.deletedKeys, + keys_after_cold_run: keysAfterCold, + }, + cold, + warm, + summary: { + warm_iterations: options.warmIterations, + warm_min_ms: Number(Math.min(...warmDurations).toFixed(3)), + warm_median_ms: percentile(warmDurations, 50), + warm_p95_ms: percentile(warmDurations, 95), + warm_max_ms: Number(Math.max(...warmDurations).toFixed(3)), + cold_to_warm_median_ratio: percentile(warmDurations, 50) + ? Number((cold.duration_ms / (percentile(warmDurations, 50) || 1)).toFixed(2)) + : null, + }, + safe_claim_note: 'This is a local cold-vs-warm GraphQL dashboard cache measurement. It is not a production cache-hit-ratio benchmark.', + }; + + const outputPath = resolve(repoRoot, options.output); + await mkdir(dirname(outputPath), { recursive: true }); + await writeFile(outputPath, `${JSON.stringify(output, null, 2)}\n`, 'utf8'); + console.log(JSON.stringify({ + output: options.output, + cold_ms: output.cold.duration_ms, + warm_median_ms: output.summary.warm_median_ms, + warm_p95_ms: output.summary.warm_p95_ms, + cache_keys_after_cold_run: keysAfterCold.length, + }, null, 2)); + } finally { + await redis.quit(); + } +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/scripts/measure-hot-tenant-db.ts b/scripts/measure-hot-tenant-db.ts new file mode 100644 index 0000000..fb915ff --- /dev/null +++ b/scripts/measure-hot-tenant-db.ts @@ -0,0 +1,554 @@ +#!/usr/bin/env node + +import { readFile, writeFile, mkdir } from 'node:fs/promises'; +import { existsSync } from 'node:fs'; +import { dirname, resolve } from 'node:path'; +import { execFile } from 'node:child_process'; +import { promisify } from 'node:util'; +import { config } from 'dotenv'; +import { Pool } from 'pg'; + +interface Options { + runId: string; + databaseUrl: string; + output: string; + manifest?: string; + k6Summary: string; + waitForLagZero: boolean; + requireComplete: boolean; + pollMs: number; + timeoutMs: number; +} + +interface PlanSummary { + node_types: Record; + relations: string[]; + actual_rows_sum: number; + plan_rows_sum: number; + shared_hit_blocks: number; + shared_read_blocks: number; +} + +interface LagSnapshot { + captured_at: string; + partitions: Array<{ + topic: string; + partition: number; + current_offset: number; + log_end_offset: number; + lag: number; + }>; + total_lag: number; +} + +const repoRoot = resolve(__dirname, '..'); +const execFileAsync = promisify(execFile); +config({ path: resolve(repoRoot, '.env') }); + +function argValue(argv: string[], name: string, fallback?: string) { + const index = argv.indexOf(name); + return index === -1 ? fallback : argv[index + 1]; +} + +function argFlag(argv: string[], name: string) { + return argv.includes(name); +} + +function parseOptions(argv: string[]): Options { + const runId = argValue(argv, '--run-id', process.env.RUN_ID); + if (!runId) { + throw new Error('Provide --run-id or RUN_ID'); + } + + return { + runId, + databaseUrl: argValue( + argv, + '--database-url', + process.env.DATABASE_URL || 'postgresql://pulseops:dev_password_change_in_production@localhost:5432/pulseops_dev' + )!, + manifest: argValue(argv, '--manifest', process.env.TENANT_KEYS_FILE), + k6Summary: argValue(argv, '--k6-summary', `docs/benchmarks/evidence/hot-tenant-${runId}.json`)!, + waitForLagZero: !argFlag(argv, '--no-wait-for-lag-zero'), + requireComplete: argFlag(argv, '--require-complete'), + pollMs: Number(argValue(argv, '--poll-ms', process.env.POLL_MS || '500')), + timeoutMs: Number(argValue(argv, '--timeout-ms', process.env.TIMEOUT_MS || '60000')), + output: argValue(argv, '--output', `docs/benchmarks/evidence/hot-tenant-db-${runId}.json`)!, + }; +} + +function numberValue(value: unknown) { + if (value === null || value === undefined) return null; + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : null; +} + +function normalizeRows(rows: any[]) { + return rows.map((row) => Object.fromEntries( + Object.entries(row).map(([key, value]) => [key, typeof value === 'bigint' ? Number(value) : value]) + )); +} + +function summarizePlan(node: any, summary: PlanSummary) { + if (!node) return summary; + + const nodeType = node['Node Type'] || 'unknown'; + summary.node_types[nodeType] = (summary.node_types[nodeType] || 0) + 1; + + if (node['Relation Name'] && !summary.relations.includes(node['Relation Name'])) { + summary.relations.push(node['Relation Name']); + } + + summary.actual_rows_sum += numberValue(node['Actual Rows']) || 0; + summary.plan_rows_sum += numberValue(node['Plan Rows']) || 0; + summary.shared_hit_blocks += numberValue(node['Shared Hit Blocks']) || 0; + summary.shared_read_blocks += numberValue(node['Shared Read Blocks']) || 0; + + for (const child of node.Plans || []) { + summarizePlan(child, summary); + } + + return summary; +} + +async function explain( + db: Pool, + label: string, + sql: string, + params: unknown[] +) { + const result = await db.query(`EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) ${sql}`, params); + const json = result.rows[0]['QUERY PLAN'][0]; + const summary = summarizePlan(json.Plan, { + node_types: {}, + relations: [], + actual_rows_sum: 0, + plan_rows_sum: 0, + shared_hit_blocks: 0, + shared_read_blocks: 0, + }); + + return { + label, + planning_ms: Number(json['Planning Time'].toFixed(3)), + execution_ms: Number(json['Execution Time'].toFixed(3)), + plan_summary: summary, + }; +} + +async function readManifest(path?: string) { + if (!path) return null; + const absolute = resolve(repoRoot, path); + if (!existsSync(absolute)) return null; + return JSON.parse(await readFile(absolute, 'utf8')); +} + +async function readJsonIfExists(path: string) { + const absolute = resolve(repoRoot, path); + if (!existsSync(absolute)) return null; + return JSON.parse(await readFile(absolute, 'utf8')); +} + +function sleep(ms: number) { + return new Promise((resolveSleep) => { + setTimeout(resolveSleep, ms); + }); +} + +async function kafkaLag(): Promise { + try { + const { stdout } = await execFileAsync('docker', [ + 'exec', + 'pulseops-kafka', + '/opt/kafka/bin/kafka-consumer-groups.sh', + '--bootstrap-server', + 'localhost:9092', + '--describe', + '--group', + 'pulseops-aggregators', + ]); + + const partitions = stdout + .split('\n') + .map((line) => line.trim()) + .filter((line) => line.includes('events-raw')) + .map((line) => { + const columns = line.split(/\s+/); + return { + topic: columns[1], + partition: Number(columns[2]), + current_offset: Number(columns[3]), + log_end_offset: Number(columns[4]), + lag: Number(columns[5]), + }; + }) + .filter((partition) => Number.isFinite(partition.partition) && Number.isFinite(partition.lag)); + + return { + captured_at: new Date().toISOString(), + partitions, + total_lag: partitions.reduce((sum, partition) => sum + partition.lag, 0), + }; + } catch (_error) { + return null; + } +} + +async function waitForKafkaLagZero(options: Options) { + if (!options.waitForLagZero) { + return kafkaLag(); + } + + const deadline = Date.now() + options.timeoutMs; + let lastLag: LagSnapshot | null = null; + + while (Date.now() < deadline) { + lastLag = await kafkaLag(); + if (lastLag && lastLag.partitions.length > 0 && lastLag.total_lag === 0) { + return lastLag; + } + await sleep(options.pollMs); + } + + throw new Error(`Timed out waiting for Kafka lag to reach zero before hot-tenant DB measurement. Last lag: ${JSON.stringify(lastLag)}`); +} + +function buildReconciliation(params: { + k6Summary: any; + k6SummaryPath: string; + totalEvents: number; + kafkaLagAfter: LagSnapshot | null; +}) { + const k6RequestCount = numberValue(params.k6Summary?.metrics?.http_reqs?.values?.count); + const k6FailedRate = numberValue(params.k6Summary?.metrics?.http_req_failed?.values?.rate); + const notes: string[] = []; + + if (!params.k6Summary) { + notes.push('k6 hot-tenant summary was not found; persisted-count reconciliation was not checked.'); + } + if (params.k6Summary && k6RequestCount === null) { + notes.push('k6 summary did not include http_reqs count.'); + } + if (params.k6Summary && k6FailedRate !== 0) { + notes.push('k6 http_req_failed rate was not zero, so request count cannot be treated as accepted event count.'); + } + if (!params.kafkaLagAfter || params.kafkaLagAfter.partitions.length === 0) { + notes.push('Kafka consumer-group lag was not available.'); + } else if (params.kafkaLagAfter.total_lag !== 0) { + notes.push(`Kafka lag after measurement was ${params.kafkaLagAfter.total_lag}.`); + } + if (k6RequestCount !== null && k6FailedRate === 0 && params.totalEvents !== k6RequestCount) { + notes.push(`Persisted hot-tenant events (${params.totalEvents}) did not match successful k6 requests (${k6RequestCount}).`); + } + + const complete = Boolean( + params.k6Summary && + k6RequestCount !== null && + k6FailedRate === 0 && + params.kafkaLagAfter && + params.kafkaLagAfter.partitions.length > 0 && + params.kafkaLagAfter.total_lag === 0 && + params.totalEvents === k6RequestCount + ); + + return { + status: complete ? 'complete' : params.k6Summary ? 'incomplete' : 'not_checked', + k6_summary_path: params.k6SummaryPath, + k6_request_count: k6RequestCount, + k6_http_req_failed_rate: k6FailedRate, + persisted_events: params.totalEvents, + kafka_lag_after: params.kafkaLagAfter, + notes, + }; +} + +async function main() { + const options = parseOptions(process.argv.slice(2).filter((arg) => arg !== '--')); + if (!Number.isFinite(options.pollMs) || options.pollMs < 100) { + throw new Error('--poll-ms must be at least 100'); + } + if (!Number.isFinite(options.timeoutMs) || options.timeoutMs < 1000) { + throw new Error('--timeout-ms must be at least 1000'); + } + + const db = new Pool({ connectionString: options.databaseUrl }); + + try { + const manifest = await readManifest(options.manifest); + const lagBeforeMeasurement = await waitForKafkaLagZero(options); + const k6Summary = await readJsonIfExists(options.k6Summary); + + const distribution = await db.query( + `SELECT + COALESCE(properties->>'tenant_class', 'unknown') AS tenant_class, + COUNT(*)::int AS events, + COUNT(DISTINCT org_id)::int AS orgs, + COUNT(DISTINCT project_id)::int AS projects, + COUNT(DISTINCT properties->>'tenant_id')::int AS synthetic_tenants, + MIN(timestamp) AS first_event_timestamp, + MAX(timestamp) AS last_event_timestamp, + MIN(received_at) AS first_received_at, + MAX(received_at) AS last_received_at + FROM events + WHERE properties->>'run_id' = $1 + AND properties->>'benchmark' = 'hot-tenant' + GROUP BY COALESCE(properties->>'tenant_class', 'unknown') + ORDER BY events DESC`, + [options.runId] + ); + + const totalEvents = distribution.rows.reduce((sum, row) => sum + Number(row.events), 0); + + const tenants = await db.query( + `SELECT + COALESCE(properties->>'tenant_class', 'unknown') AS tenant_class, + properties->>'tenant_id' AS tenant_id, + org_id::text, + project_id::text, + COUNT(*)::int AS events, + COUNT(DISTINCT user_id)::int AS users, + COUNT(DISTINCT event_name)::int AS event_names, + MIN(timestamp) AS first_event_timestamp, + MAX(timestamp) AS last_event_timestamp + FROM events + WHERE properties->>'run_id' = $1 + AND properties->>'benchmark' = 'hot-tenant' + GROUP BY COALESCE(properties->>'tenant_class', 'unknown'), properties->>'tenant_id', org_id, project_id + ORDER BY events DESC + LIMIT 20`, + [options.runId] + ); + + const aggregatePressure = await db.query( + `WITH aggregate_keys AS ( + SELECT + COALESCE(properties->>'tenant_class', 'unknown') AS tenant_class, + org_id, + project_id, + event_name, + timestamp::date AS event_date, + COUNT(*)::int AS events_per_key + FROM events + WHERE properties->>'run_id' = $1 + AND properties->>'benchmark' = 'hot-tenant' + GROUP BY COALESCE(properties->>'tenant_class', 'unknown'), org_id, project_id, event_name, timestamp::date + ) + SELECT + tenant_class, + COUNT(*)::int AS event_count_aggregate_keys, + SUM(events_per_key)::int AS events, + ROUND(AVG(events_per_key)::numeric, 2)::float AS avg_events_per_event_count_key, + MAX(events_per_key)::int AS max_events_per_event_count_key, + percentile_cont(0.95) WITHIN GROUP (ORDER BY events_per_key)::float AS p95_events_per_event_count_key + FROM aggregate_keys + GROUP BY tenant_class + ORDER BY events DESC`, + [options.runId] + ); + + const activeUserPressure = await db.query( + `WITH user_day_keys AS ( + SELECT + COALESCE(properties->>'tenant_class', 'unknown') AS tenant_class, + org_id, + project_id, + timestamp::date AS event_date, + user_id, + COUNT(*)::int AS events_per_user_day + FROM events + WHERE properties->>'run_id' = $1 + AND properties->>'benchmark' = 'hot-tenant' + AND user_id IS NOT NULL + GROUP BY COALESCE(properties->>'tenant_class', 'unknown'), org_id, project_id, timestamp::date, user_id + ) + SELECT + tenant_class, + COUNT(*)::int AS active_user_keys, + SUM(events_per_user_day)::int AS events_with_user, + ROUND(AVG(events_per_user_day)::numeric, 2)::float AS avg_events_per_user_day_key, + MAX(events_per_user_day)::int AS max_events_per_user_day_key + FROM user_day_keys + GROUP BY tenant_class + ORDER BY events_with_user DESC`, + [options.runId] + ); + + const burstSeconds = await db.query( + `SELECT + date_trunc('second', received_at) AS second, + COUNT(*)::int AS events + FROM events + WHERE properties->>'run_id' = $1 + AND properties->>'benchmark' = 'hot-tenant' + GROUP BY date_trunc('second', received_at) + ORDER BY events DESC, second ASC + LIMIT 10`, + [options.runId] + ); + + const partitions = await db.query( + `SELECT + tableoid::regclass::text AS partition, + COUNT(*)::int AS events + FROM events + WHERE properties->>'run_id' = $1 + AND properties->>'benchmark' = 'hot-tenant' + GROUP BY tableoid::regclass::text + ORDER BY events DESC`, + [options.runId] + ); + + const locks = await db.query( + `SELECT + COUNT(*)::int AS locks_total, + COUNT(*) FILTER (WHERE NOT granted)::int AS waiting_locks, + COUNT(DISTINCT pid) FILTER (WHERE NOT granted)::int AS waiting_pids + FROM pg_locks + WHERE database = (SELECT oid FROM pg_database WHERE datname = current_database()) + OR database IS NULL` + ); + + const activity = await db.query( + `SELECT + COUNT(*)::int AS connections, + COUNT(*) FILTER (WHERE state = 'active')::int AS active_connections, + COUNT(*) FILTER (WHERE wait_event_type IS NOT NULL)::int AS waiting_connections, + COUNT(*) FILTER (WHERE wait_event_type = 'Lock')::int AS lock_wait_connections + FROM pg_stat_activity + WHERE datname = current_database()` + ); + + const dbStats = await db.query( + `SELECT + xact_commit, + xact_rollback, + deadlocks, + conflicts, + temp_files, + temp_bytes, + blk_read_time, + blk_write_time + FROM pg_stat_database + WHERE datname = current_database()` + ); + + const tableStats = await db.query( + `SELECT + relname, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_dead_tup, + seq_scan, + idx_scan + FROM pg_stat_user_tables + WHERE relname IN ('events', 'daily_aggregates', 'daily_active_users', 'event_dedup_keys') + ORDER BY relname` + ); + const lagAfterMeasurement = await kafkaLag(); + const reconciliation = buildReconciliation({ + k6Summary, + k6SummaryPath: options.k6Summary, + totalEvents, + kafkaLagAfter: lagAfterMeasurement || lagBeforeMeasurement, + }); + + const hotTenant = tenants.rows.find((row) => row.tenant_class === 'hot') || tenants.rows[0]; + const quietTenant = tenants.rows.find((row) => row.tenant_class === 'quiet'); + const firstRange = distribution.rows.reduce((min: string | null, row) => { + if (!row.first_event_timestamp) return min; + const value = new Date(row.first_event_timestamp).toISOString(); + return !min || value < min ? value : min; + }, null); + const lastRange = distribution.rows.reduce((max: string | null, row) => { + if (!row.last_event_timestamp) return max; + const value = new Date(row.last_event_timestamp).toISOString(); + return !max || value > max ? value : max; + }, null); + const rangeStart = firstRange || new Date(Date.now() - 60_000).toISOString(); + const rangeEnd = lastRange || new Date().toISOString(); + + const explainPlans = []; + for (const tenant of [hotTenant, quietTenant].filter(Boolean)) { + explainPlans.push(await explain( + db, + `${tenant.tenant_class}-raw-event-count`, + `SELECT COUNT(*)::int + FROM events + WHERE org_id = $1 + AND project_id = $2 + AND timestamp >= $3 + AND timestamp <= $4`, + [tenant.org_id, tenant.project_id, rangeStart, rangeEnd] + )); + + explainPlans.push(await explain( + db, + `${tenant.tenant_class}-aggregate-total-events`, + `SELECT COALESCE(SUM(metric_value), 0)::int + FROM daily_aggregates + WHERE org_id = $1 + AND project_id = $2 + AND metric_name = 'total_events' + AND date >= $3::date + AND date <= $4::date`, + [tenant.org_id, tenant.project_id, rangeStart, rangeEnd] + )); + } + + const output = { + run_id: options.runId, + captured_at: new Date().toISOString(), + manifest: manifest ? { + path: options.manifest, + counts: manifest.counts, + } : null, + total_events: totalEvents, + tenant_distribution: normalizeRows(distribution.rows), + top_tenants: normalizeRows(tenants.rows), + aggregate_pressure: normalizeRows(aggregatePressure.rows), + active_user_pressure: normalizeRows(activeUserPressure.rows), + burst_seconds: normalizeRows(burstSeconds.rows), + partitions: normalizeRows(partitions.rows), + reconciliation, + postgres_snapshot: { + locks: normalizeRows(locks.rows)[0], + activity: normalizeRows(activity.rows)[0], + database: normalizeRows(dbStats.rows)[0], + table_stats: normalizeRows(tableStats.rows), + note: 'PostgreSQL lock/activity/stat snapshots are captured after the run, not sampled continuously during load.', + }, + representative_query_plans: explainPlans, + safe_claim_note: 'Local hot-tenant PostgreSQL evidence for one run_id. Aggregate pressure is derived from raw events per aggregate key, not from a Postgres ON CONFLICT counter.', + unsafe_claim_note: 'Do not claim production lock behavior or long-duration contention from this after-run snapshot.', + }; + + const outputPath = resolve(repoRoot, options.output); + await mkdir(dirname(outputPath), { recursive: true }); + await writeFile(outputPath, `${JSON.stringify(output, null, 2)}\n`, 'utf8'); + + console.log(JSON.stringify({ + output: options.output, + run_id: options.runId, + total_events: totalEvents, + tenant_classes: output.tenant_distribution.map((row: any) => `${row.tenant_class}:${row.events}`), + reconciliation_status: output.reconciliation.status, + waiting_locks: output.postgres_snapshot.locks?.waiting_locks, + representative_plans: explainPlans.map((plan) => ({ + label: plan.label, + execution_ms: plan.execution_ms, + })), + }, null, 2)); + + if (options.requireComplete && reconciliation.status !== 'complete') { + throw new Error(`Hot-tenant DB evidence is not complete: ${JSON.stringify(reconciliation)}`); + } + } finally { + await db.end(); + } +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/scripts/measure-worker-catchup.ts b/scripts/measure-worker-catchup.ts new file mode 100644 index 0000000..eef519d --- /dev/null +++ b/scripts/measure-worker-catchup.ts @@ -0,0 +1,275 @@ +#!/usr/bin/env node + +import { randomUUID } from 'node:crypto'; +import { mkdir, writeFile } from 'node:fs/promises'; +import { dirname, resolve } from 'node:path'; +import { execFile } from 'node:child_process'; +import { promisify } from 'node:util'; +import { performance } from 'node:perf_hooks'; +import { config } from 'dotenv'; +import { Pool } from 'pg'; + +interface Options { + apiUrl: string; + apiKey: string; + databaseUrl: string; + runId: string; + events: number; + batchSize: number; + pollMs: number; + timeoutMs: number; + output: string; +} + +interface LagSnapshot { + captured_at: string; + partitions: Array<{ + topic: string; + partition: number; + current_offset: number; + log_end_offset: number; + lag: number; + }>; + total_lag: number; +} + +interface PollSample { + elapsed_ms: number; + persisted_events: number; + kafka_lag: number | null; + active_db_connections: number | null; +} + +const execFileAsync = promisify(execFile); +const repoRoot = resolve(__dirname, '..'); +config({ path: resolve(repoRoot, '.env') }); + +function argValue(argv: string[], name: string, fallback: string) { + const index = argv.indexOf(name); + return index === -1 ? fallback : argv[index + 1]; +} + +function parseOptions(argv: string[]): Options { + const runId = argValue(argv, '--run-id', process.env.RUN_ID || new Date().toISOString().replace(/[:.]/g, '-')); + return { + apiUrl: argValue(argv, '--api-url', process.env.API_URL || 'http://localhost:3001'), + apiKey: argValue(argv, '--api-key', process.env.API_KEY || 'demo_key_change_this'), + databaseUrl: argValue(argv, '--database-url', process.env.DATABASE_URL || 'postgresql://pulseops:dev_password_change_in_production@localhost:5432/pulseops_dev'), + runId, + events: Number(argValue(argv, '--events', process.env.EVENTS || '1000')), + batchSize: Number(argValue(argv, '--batch-size', process.env.BATCH_SIZE || '100')), + pollMs: Number(argValue(argv, '--poll-ms', process.env.POLL_MS || '500')), + timeoutMs: Number(argValue(argv, '--timeout-ms', process.env.TIMEOUT_MS || '60000')), + output: argValue(argv, '--output', `docs/benchmarks/evidence/worker-catchup-${runId}.json`), + }; +} + +function sleep(ms: number) { + return new Promise((resolveSleep) => { + setTimeout(resolveSleep, ms); + }); +} + +function eventPayload(runId: string, index: number) { + return { + event_name: index % 5 === 0 ? 'worker_catchup_purchase' : 'worker_catchup_page_view', + user_id: `worker_catchup_user_${index % 250}`, + session_id: randomUUID(), + properties: { + benchmark: 'worker-catchup', + run_id: runId, + sequence: index, + segment: ['free', 'pro', 'team', 'enterprise'][index % 4], + region: ['na', 'eu', 'apac'][index % 3], + device: ['web', 'mobile', 'desktop'][index % 3], + product: ['core', 'insights', 'automation'][index % 3], + }, + timestamp: new Date().toISOString(), + }; +} + +async function sendBatch(options: Options, events: ReturnType[]) { + const response = await fetch(`${options.apiUrl}/api/v1/events/batch`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-API-Key': options.apiKey, + }, + body: JSON.stringify({ events }), + }); + + const body = await response.json().catch(() => ({})); + return { + status: response.status, + accepted: response.status === 202 ? body.count || events.length : 0, + body, + }; +} + +async function kafkaLag(): Promise { + try { + const { stdout } = await execFileAsync('docker', [ + 'exec', + 'pulseops-kafka', + '/opt/kafka/bin/kafka-consumer-groups.sh', + '--bootstrap-server', + 'localhost:9092', + '--describe', + '--group', + 'pulseops-aggregators', + ]); + + const partitions = stdout + .split('\n') + .map((line) => line.trim()) + .filter((line) => line.includes('events-raw')) + .map((line) => { + const columns = line.split(/\s+/); + return { + topic: columns[1], + partition: Number(columns[2]), + current_offset: Number(columns[3]), + log_end_offset: Number(columns[4]), + lag: Number(columns[5]), + }; + }) + .filter((partition) => Number.isFinite(partition.lag)); + + return { + captured_at: new Date().toISOString(), + partitions, + total_lag: partitions.reduce((sum, partition) => sum + partition.lag, 0), + }; + } catch (_error) { + return null; + } +} + +async function persistedCount(db: Pool, runId: string) { + const result = await db.query( + `SELECT count(*)::int AS count + FROM events + WHERE properties->>'run_id' = $1 + AND properties->>'benchmark' = 'worker-catchup'`, + [runId] + ); + return result.rows[0]?.count || 0; +} + +async function activeConnections(db: Pool) { + const result = await db.query( + `SELECT count(*)::int AS count + FROM pg_stat_activity + WHERE datname = current_database() + AND state = 'active'` + ); + return result.rows[0]?.count ?? null; +} + +async function main() { + const options = parseOptions(process.argv.slice(2).filter((arg) => arg !== '--')); + + if (!Number.isInteger(options.events) || options.events < 1) { + throw new Error('--events must be a positive integer'); + } + if (!Number.isInteger(options.batchSize) || options.batchSize < 1 || options.batchSize > 1000) { + throw new Error('--batch-size must be an integer between 1 and 1000'); + } + + const db = new Pool({ connectionString: options.databaseUrl }); + const beforeLag = await kafkaLag(); + const acceptedStatuses: Record = {}; + let acceptedEvents = 0; + const batches: Array<{ status: number; accepted: number; duration_ms: number }> = []; + const startedAt = performance.now(); + + try { + for (let offset = 0; offset < options.events; offset += options.batchSize) { + const batch = Array.from( + { length: Math.min(options.batchSize, options.events - offset) }, + (_, index) => eventPayload(options.runId, offset + index) + ); + const batchStart = performance.now(); + const response = await sendBatch(options, batch); + const durationMs = performance.now() - batchStart; + + acceptedStatuses[String(response.status)] = (acceptedStatuses[String(response.status)] || 0) + 1; + acceptedEvents += response.accepted; + batches.push({ + status: response.status, + accepted: response.accepted, + duration_ms: Number(durationMs.toFixed(3)), + }); + } + + const acceptedAtMs = performance.now() - startedAt; + const samples: PollSample[] = []; + let persisted = await persistedCount(db, options.runId); + + while (persisted < acceptedEvents && performance.now() - startedAt < options.timeoutMs) { + const lag = await kafkaLag(); + samples.push({ + elapsed_ms: Number((performance.now() - startedAt).toFixed(3)), + persisted_events: persisted, + kafka_lag: lag?.total_lag ?? null, + active_db_connections: await activeConnections(db), + }); + + await sleep(options.pollMs); + persisted = await persistedCount(db, options.runId); + } + + const caughtUpAtMs = performance.now() - startedAt; + const afterLag = await kafkaLag(); + samples.push({ + elapsed_ms: Number(caughtUpAtMs.toFixed(3)), + persisted_events: persisted, + kafka_lag: afterLag?.total_lag ?? null, + active_db_connections: await activeConnections(db), + }); + + const output = { + run_id: options.runId, + captured_at: new Date().toISOString(), + requested_events: options.events, + accepted_events: acceptedEvents, + persisted_events: persisted, + batch_size: options.batchSize, + batch_statuses: acceptedStatuses, + acceptance_duration_ms: Number(acceptedAtMs.toFixed(3)), + catchup_duration_ms: Number(caughtUpAtMs.toFixed(3)), + accepted_events_per_second: Number((acceptedEvents / (acceptedAtMs / 1000)).toFixed(2)), + persisted_events_per_second_until_caught_up: Number((persisted / (caughtUpAtMs / 1000)).toFixed(2)), + kafka_lag_before: beforeLag, + kafka_lag_after: afterLag, + batches, + samples, + safe_claim_note: 'Local worker catch-up smoke measurement from HTTP acceptance through Kafka to persisted PostgreSQL rows.', + }; + + const outputPath = resolve(repoRoot, options.output); + await mkdir(dirname(outputPath), { recursive: true }); + await writeFile(outputPath, `${JSON.stringify(output, null, 2)}\n`, 'utf8'); + console.log(JSON.stringify({ + output: options.output, + accepted_events: acceptedEvents, + persisted_events: persisted, + acceptance_duration_ms: output.acceptance_duration_ms, + catchup_duration_ms: output.catchup_duration_ms, + accepted_events_per_second: output.accepted_events_per_second, + persisted_events_per_second_until_caught_up: output.persisted_events_per_second_until_caught_up, + kafka_lag_after: afterLag?.total_lag ?? null, + }, null, 2)); + + if (persisted < acceptedEvents) { + process.exitCode = 1; + } + } finally { + await db.end(); + } +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/scripts/prove-worker-retry-offsets.ts b/scripts/prove-worker-retry-offsets.ts new file mode 100644 index 0000000..b7cc78d --- /dev/null +++ b/scripts/prove-worker-retry-offsets.ts @@ -0,0 +1,447 @@ +#!/usr/bin/env node + +import { randomUUID } from 'node:crypto'; +import { execFile, spawn, type ChildProcess } from 'node:child_process'; +import { promisify } from 'node:util'; +import { dirname, resolve } from 'node:path'; +import { mkdir, writeFile } from 'node:fs/promises'; +import { config } from 'dotenv'; +import { Pool } from 'pg'; + +interface Options { + apiUrl: string; + apiKey: string; + databaseUrl: string; + redisUrl: string; + kafkaBrokers: string; + kafkaGroup: string; + runId: string; + pollMs: number; + timeoutMs: number; + workerMetricsPort: number; + output: string; +} + +interface LagSnapshot { + partitions: Array<{ + topic: string; + partition: number; + current_offset: number; + log_end_offset: number; + lag: number; + }>; + total_lag: number; +} + +interface ManagedWorker { + child: ChildProcess; + logs: string[]; + exit: { code: number | null; signal: NodeJS.Signals | null } | null; + exitPromise: Promise<{ code: number | null; signal: NodeJS.Signals | null }>; +} + +const execFileAsync = promisify(execFile); +const repoRoot = resolve(__dirname, '..'); +config({ path: resolve(repoRoot, '.env') }); + +function argValue(argv: string[], name: string, fallback: string) { + const index = argv.indexOf(name); + return index === -1 ? fallback : argv[index + 1]; +} + +function safeRunId(value: string) { + return value.replace(/[^a-zA-Z0-9._-]/g, '-').slice(0, 80); +} + +function parseOptions(argv: string[]): Options { + const defaultRunId = `retry-proof-${new Date().toISOString().replace(/[:.]/g, '-')}-${randomUUID().slice(0, 8)}`; + const runId = safeRunId(argValue(argv, '--run-id', process.env.RUN_ID || defaultRunId)) || defaultRunId; + + return { + apiUrl: argValue(argv, '--api-url', process.env.API_URL || 'http://localhost:3001'), + apiKey: argValue(argv, '--api-key', process.env.API_KEY || 'demo_key_change_this'), + databaseUrl: argValue(argv, '--database-url', process.env.DATABASE_URL || 'postgresql://pulseops:dev_password_change_in_production@localhost:5432/pulseops_dev'), + redisUrl: argValue(argv, '--redis-url', process.env.REDIS_URL || 'redis://localhost:6379'), + kafkaBrokers: argValue(argv, '--kafka-brokers', process.env.KAFKA_BROKERS || 'localhost:9092'), + kafkaGroup: argValue(argv, '--kafka-group', process.env.PROOF_KAFKA_GROUP || `pulseops-retry-proof-${runId}`), + runId, + pollMs: Number(argValue(argv, '--poll-ms', process.env.POLL_MS || '500')), + timeoutMs: Number(argValue(argv, '--timeout-ms', process.env.TIMEOUT_MS || '120000')), + workerMetricsPort: Number(argValue(argv, '--worker-metrics-port', process.env.PROOF_WORKER_METRICS_PORT || '0')), + output: argValue(argv, '--output', `docs/benchmarks/evidence/worker-retry-offsets-${runId}.json`), + }; +} + +function sleep(ms: number) { + return new Promise((resolveSleep) => { + setTimeout(resolveSleep, ms); + }); +} + +function dateOnly(timestamp: string) { + return new Date(timestamp).toISOString().split('T')[0]; +} + +async function waitFor( + description: string, + poll: () => Promise, + timeoutMs: number, + pollMs: number +) { + const deadline = Date.now() + timeoutMs; + let lastValue: T | null = null; + + while (Date.now() < deadline) { + lastValue = await poll(); + if (lastValue) { + return lastValue; + } + await sleep(pollMs); + } + + throw new Error(`Timed out waiting for ${description}. Last value: ${JSON.stringify(lastValue)}`); +} + +async function assertNoManagedWorkerContainer() { + const { stdout } = await execFileAsync('docker', [ + 'ps', + '--filter', + 'name=pulseops-worker', + '--format', + '{{.Names}}', + ]); + + const runningWorkers = stdout + .split('\n') + .map((line) => line.trim()) + .filter(Boolean); + + if (runningWorkers.length > 0) { + throw new Error( + `Refusing to run with existing worker container(s): ${runningWorkers.join(', ')}. ` + + 'Run this proof with only Postgres, Redis, Kafka, and ingest-api active so the controlled worker owns the DB side effects.' + ); + } +} + +async function assertIngestHealthy(apiUrl: string) { + const response = await fetch(`${apiUrl}/health`); + if (!response.ok) { + throw new Error(`Ingest API health check failed with HTTP ${response.status}`); + } +} + +async function kafkaLag(groupId: string): Promise { + try { + const { stdout } = await execFileAsync('docker', [ + 'exec', + 'pulseops-kafka', + '/opt/kafka/bin/kafka-consumer-groups.sh', + '--bootstrap-server', + 'localhost:9092', + '--describe', + '--group', + groupId, + ]); + + const partitions = stdout + .split('\n') + .map((line) => line.trim()) + .filter((line) => line.includes('events-raw')) + .map((line) => { + const columns = line.split(/\s+/); + return { + topic: columns[1], + partition: Number(columns[2]), + current_offset: Number(columns[3]), + log_end_offset: Number(columns[4]), + lag: Number(columns[5]), + }; + }) + .filter((partition) => Number.isFinite(partition.partition) && Number.isFinite(partition.lag)); + + return { + partitions, + total_lag: partitions.reduce((sum, partition) => sum + partition.lag, 0), + }; + } catch (_error) { + return null; + } +} + +function startWorker(options: Options, failRunId?: string): ManagedWorker { + const logs: string[] = []; + const env: NodeJS.ProcessEnv = { + ...process.env, + NODE_ENV: 'development', + DATABASE_URL: options.databaseUrl, + REDIS_URL: options.redisUrl, + KAFKA_BROKERS: options.kafkaBrokers, + KAFKA_GROUP_ID: options.kafkaGroup, + WORKER_METRICS_HOST: '127.0.0.1', + WORKER_METRICS_PORT: String(options.workerMetricsPort), + }; + + if (failRunId) { + env.WORKER_FAIL_AFTER_PROCESSING_ONCE_RUN_ID = failRunId; + } else { + delete env.WORKER_FAIL_AFTER_PROCESSING_ONCE_RUN_ID; + } + + const child = spawn('pnpm', ['exec', 'tsx', 'services/worker/src/index.ts'], { + cwd: repoRoot, + env, + stdio: ['ignore', 'pipe', 'pipe'], + }); + + child.stdout?.on('data', (chunk) => { + logs.push(chunk.toString()); + }); + child.stderr?.on('data', (chunk) => { + logs.push(chunk.toString()); + }); + + const worker: ManagedWorker = { + child, + logs, + exit: null, + exitPromise: new Promise((resolveExit) => { + child.on('exit', (code, signal) => { + worker.exit = { code, signal }; + resolveExit(worker.exit); + }); + }), + }; + + return worker; +} + +async function waitForWorkerReady(worker: ManagedWorker, timeoutMs: number, pollMs: number) { + await waitFor('worker subscription', async () => { + if (worker.exit) { + throw new Error(`Worker exited before subscribing: ${JSON.stringify(worker.exit)}\n${worker.logs.join('')}`); + } + return worker.logs.join('').includes('Subscribed to events-raw topic') ? true : null; + }, timeoutMs, pollMs); +} + +async function waitForWorkerExit(worker: ManagedWorker, timeoutMs: number) { + if (worker.exit) { + return worker.exit; + } + + let timeout: NodeJS.Timeout | undefined; + try { + return await Promise.race([ + worker.exitPromise, + new Promise((_resolve, reject) => { + timeout = setTimeout(() => reject(new Error(`Timed out waiting for worker exit\n${worker.logs.join('')}`)), timeoutMs); + }), + ]); + } finally { + if (timeout) { + clearTimeout(timeout); + } + } +} + +async function stopWorker(worker: ManagedWorker) { + if (worker.exit) { + return worker.exit; + } + + worker.child.kill('SIGTERM'); + + try { + return await waitForWorkerExit(worker, 10000); + } catch (error) { + worker.child.kill('SIGKILL'); + throw error; + } +} + +async function sendEvent(options: Options, role: 'warmup' | 'probe') { + const timestamp = new Date().toISOString(); + const eventName = `retry_proof_${role}_${options.runId}`; + const response = await fetch(`${options.apiUrl}/api/v1/events`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-API-Key': options.apiKey, + }, + body: JSON.stringify({ + event_name: eventName, + user_id: `retry-proof-user-${options.runId}`, + session_id: randomUUID(), + properties: { + retry_probe_run_id: options.runId, + retry_probe_role: role, + proof: 'worker-retry-offsets', + }, + timestamp, + }), + }); + + const body = await response.json().catch(() => ({})); + if (response.status !== 202) { + throw new Error(`Ingest ${role} event failed with HTTP ${response.status}: ${JSON.stringify(body)}`); + } + + return { + eventId: body.event_id as string, + eventName, + timestamp, + }; +} + +async function eventState(db: Pool, eventId: string, eventName: string, timestamp: string) { + const [events, dedupe, aggregate] = await Promise.all([ + db.query( + `SELECT count(*)::int AS count + FROM events + WHERE event_id = $1`, + [eventId] + ), + db.query( + `SELECT count(*)::int AS count + FROM event_dedup_keys + WHERE event_id = $1`, + [eventId] + ), + db.query( + `SELECT COALESCE(sum(metric_value), 0)::int AS value + FROM daily_aggregates + WHERE metric_name = 'event_count' + AND date = $1 + AND dimensions = $2::jsonb`, + [dateOnly(timestamp), JSON.stringify({ event_name: eventName })] + ), + ]); + + return { + events: events.rows[0]?.count ?? 0, + dedupe_keys: dedupe.rows[0]?.count ?? 0, + event_count_aggregate: aggregate.rows[0]?.value ?? 0, + }; +} + +async function waitForEventState( + db: Pool, + eventId: string, + eventName: string, + timestamp: string, + timeoutMs: number, + pollMs: number +) { + return waitFor('Postgres event side effects', async () => { + const state = await eventState(db, eventId, eventName, timestamp); + return state.events === 1 && state.dedupe_keys === 1 && state.event_count_aggregate === 1 ? state : null; + }, timeoutMs, pollMs); +} + +async function waitForLagZero(groupId: string, timeoutMs: number, pollMs: number) { + return waitFor('Kafka lag to reach zero', async () => { + const lag = await kafkaLag(groupId); + return lag && lag.partitions.length > 0 && lag.total_lag === 0 ? lag : null; + }, timeoutMs, pollMs); +} + +async function main() { + const options = parseOptions(process.argv.slice(2).filter((arg) => arg !== '--')); + + if (!Number.isFinite(options.pollMs) || options.pollMs < 100) { + throw new Error('--poll-ms must be at least 100'); + } + if (!Number.isFinite(options.timeoutMs) || options.timeoutMs < 1000) { + throw new Error('--timeout-ms must be at least 1000'); + } + + await assertNoManagedWorkerContainer(); + await assertIngestHealthy(options.apiUrl); + + const db = new Pool({ connectionString: options.databaseUrl }); + let worker: ManagedWorker | null = null; + + try { + worker = startWorker(options); + await waitForWorkerReady(worker, options.timeoutMs, options.pollMs); + + const warmup = await sendEvent(options, 'warmup'); + const warmupState = await waitForEventState( + db, + warmup.eventId, + warmup.eventName, + warmup.timestamp, + options.timeoutMs, + options.pollMs + ); + const warmupLag = await waitForLagZero(options.kafkaGroup, options.timeoutMs, options.pollMs); + await stopWorker(worker); + worker = null; + + worker = startWorker(options, options.runId); + await waitForWorkerReady(worker, options.timeoutMs, options.pollMs); + + const probe = await sendEvent(options, 'probe'); + const crashExit = await waitForWorkerExit(worker, options.timeoutMs); + if (crashExit.code !== 86) { + throw new Error(`Expected worker exit code 86, got ${JSON.stringify(crashExit)}\n${worker.logs.join('')}`); + } + const afterCrashState = await waitForEventState( + db, + probe.eventId, + probe.eventName, + probe.timestamp, + options.timeoutMs, + options.pollMs + ); + const lagAfterCrash = await kafkaLag(options.kafkaGroup); + worker = null; + + worker = startWorker(options); + await waitForWorkerReady(worker, options.timeoutMs, options.pollMs); + const replayLag = await waitForLagZero(options.kafkaGroup, options.timeoutMs, options.pollMs); + const afterReplayState = await eventState(db, probe.eventId, probe.eventName, probe.timestamp); + + if ( + afterReplayState.events !== 1 || + afterReplayState.dedupe_keys !== 1 || + afterReplayState.event_count_aggregate !== 1 + ) { + throw new Error(`Replay double-count check failed: ${JSON.stringify(afterReplayState)}`); + } + + const proof = { + run_id: options.runId, + kafka_group_id: options.kafkaGroup, + warmup_event_id: warmup.eventId, + probe_event_id: probe.eventId, + crash_exit_code: crashExit.code, + warmup_state: warmupState, + after_crash_state: afterCrashState, + after_replay_state: afterReplayState, + kafka_lag_after_warmup: warmupLag.total_lag, + kafka_lag_after_crash: lagAfterCrash?.total_lag ?? null, + kafka_lag_after_replay: replayLag.total_lag, + output: options.output, + proof_note: 'HTTP ingest reached Kafka; controlled worker exited after processEvent and before commit; restart replayed the message without double-counting and committed lag to zero.', + }; + + const outputPath = resolve(repoRoot, options.output); + await mkdir(dirname(outputPath), { recursive: true }); + await writeFile(outputPath, `${JSON.stringify(proof, null, 2)}\n`, 'utf8'); + console.log(JSON.stringify(proof, null, 2)); + } finally { + if (worker) { + await stopWorker(worker).catch((error) => { + console.error(`Failed to stop worker: ${error instanceof Error ? error.message : String(error)}`); + }); + } + await db.end(); + } +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/scripts/run-benchmark.js b/scripts/run-benchmark.js new file mode 100644 index 0000000..91876cf --- /dev/null +++ b/scripts/run-benchmark.js @@ -0,0 +1,43 @@ +#!/usr/bin/env node + +const { spawnSync } = require('node:child_process'); +const { mkdirSync, writeFileSync } = require('node:fs'); + +const suites = { + ingest: ['node', 'scripts/run-k6.js', 'tests/load/ingest-throughput.js'], + hot: ['node', 'scripts/run-k6.js', 'tests/load/hot-tenant.js'], + hotDb: ['pnpm', 'exec', 'tsx', 'scripts/measure-hot-tenant-db.ts'], + dashboard: ['node', 'scripts/run-k6.js', 'tests/load/dashboard-query.js'], + cache: ['pnpm', 'exec', 'tsx', 'scripts/measure-dashboard-cache.ts'], + worker: ['pnpm', 'exec', 'tsx', 'scripts/measure-worker-catchup.ts'], + backpressure: ['node', 'scripts/run-k6.js', 'tests/load/backpressure.js'], +}; + +const requested = process.argv.slice(2); +const names = requested.length ? requested : Object.keys(suites); +const runId = process.env.RUN_ID || new Date().toISOString().replace(/[:.]/g, '-'); + +mkdirSync('docs/benchmarks/evidence', { recursive: true }); +writeFileSync('docs/benchmarks/evidence/latest-run-id.txt', `${runId}\n`, 'utf8'); + +for (const name of names) { + const command = suites[name]; + if (!command) { + console.error(`Unknown benchmark suite: ${name}`); + console.error(`Available suites: ${Object.keys(suites).join(', ')}`); + process.exit(1); + } + + console.log(`Running ${name} with RUN_ID=${runId}: ${command.join(' ')}`); + const result = spawnSync(command[0], command.slice(1), { + stdio: 'inherit', + env: { + ...process.env, + RUN_ID: runId, + }, + }); + + if (result.status !== 0) { + process.exit(result.status || 1); + } +} diff --git a/scripts/run-k6.js b/scripts/run-k6.js new file mode 100644 index 0000000..e57c447 --- /dev/null +++ b/scripts/run-k6.js @@ -0,0 +1,135 @@ +#!/usr/bin/env node + +const { spawnSync } = require('node:child_process'); +const { existsSync, mkdirSync } = require('node:fs'); +const { isAbsolute, relative, resolve } = require('node:path'); + +const script = process.argv[2]; + +if (!script) { + console.error('Usage: node scripts/run-k6.js [k6 args...]'); + process.exit(1); +} + +if (!existsSync(script)) { + console.error(`k6 test file not found: ${script}`); + process.exit(1); +} + +mkdirSync('docs/benchmarks/evidence', { recursive: true }); + +const extraArgs = process.argv.slice(3); +const k6DockerImage = process.env.K6_DOCKER_IMAGE || 'grafana/k6:2.0.0'; + +function commandExists(command) { + const result = spawnSync('sh', ['-c', `command -v ${command}`], { + stdio: 'ignore', + }); + return result.status === 0; +} + +function run(command, args, options = {}) { + const result = spawnSync(command, args, { + stdio: 'inherit', + env: process.env, + ...options, + }); + + if (result.error) { + console.error(result.error.message); + return 1; + } + + return result.status || 0; +} + +if (commandExists('k6')) { + process.exit(run('k6', ['run', script, ...extraArgs])); +} + +if (!commandExists('docker')) { + console.error('k6 is not installed and Docker is not available for the fallback runner.'); + console.error('Install k6 or run Docker before executing PulseOps load tests.'); + process.exit(127); +} + +function dockerTargetUrl(value, fallback) { + return (value || fallback) + .replace('http://localhost:', 'http://host.docker.internal:') + .replace('https://localhost:', 'https://host.docker.internal:') + .replace('http://127.0.0.1:', 'http://host.docker.internal:') + .replace('https://127.0.0.1:', 'https://host.docker.internal:'); +} + +function dockerMountedPath(value) { + if (!value) return value; + if (isAbsolute(value)) { + const relativeToCwd = relative(process.cwd(), value); + return relativeToCwd.startsWith('..') ? value : `/work/${relativeToCwd}`; + } + return `/work/${value}`; +} + +const dockerEnv = { + ...process.env, + API_URL: dockerTargetUrl(process.env.API_URL, 'http://host.docker.internal:3001'), + GRAPHQL_URL: dockerTargetUrl(process.env.GRAPHQL_URL, 'http://host.docker.internal:3002/graphql'), + TENANT_KEYS_FILE: dockerMountedPath(process.env.TENANT_KEYS_FILE), + API_KEY: process.env.API_KEY || 'demo_key_change_this', +}; + +const envArgs = Object.entries(dockerEnv) + .filter(([name, value]) => value !== undefined && [ + 'API_URL', + 'GRAPHQL_URL', + 'API_KEY', + 'RUN_ID', + 'SUMMARY_PATH', + 'RATE', + 'DURATION', + 'BATCH_SIZE', + 'SLEEP_SECONDS', + 'PREALLOCATED_VUS', + 'MAX_VUS', + 'START_RATE', + 'PEAK_RATE', + 'RAMP_DURATION', + 'HOLD_DURATION', + 'RAMP_DOWN_DURATION', + 'TENANTS', + 'HOT_TENANT_RATIO', + 'HOT_TENANT_COUNT', + 'VUS', + 'FILTER_RATIO', + 'START_DATE', + 'END_DATE', + 'ORG_ID', + 'PROJECT_ID', + 'TENANT_KEYS_FILE', + 'DASHBOARD_TENANT_CLASS', + 'BURST_RATE', + 'BURST_RAMP', + 'BURST_HOLD', + 'RECOVERY_RATE', + 'RECOVERY', + 'HTTP_TIMEOUT', + ].includes(name)) + .flatMap(([name, value]) => ['-e', `${name}=${value}`]); + +const args = [ + 'run', + '--rm', + '--add-host', + 'host.docker.internal:host-gateway', + '-v', + `${resolve(process.cwd())}:/work`, + '-w', + '/work', + ...envArgs, + k6DockerImage, + 'run', + script, + ...extraArgs, +]; + +process.exit(run('docker', args, { env: dockerEnv })); diff --git a/scripts/seed-benchmark-tenants.ts b/scripts/seed-benchmark-tenants.ts new file mode 100644 index 0000000..0ddb4bf --- /dev/null +++ b/scripts/seed-benchmark-tenants.ts @@ -0,0 +1,228 @@ +#!/usr/bin/env node + +import { createHash } from 'node:crypto'; +import { mkdir, writeFile } from 'node:fs/promises'; +import { dirname, resolve } from 'node:path'; +import { config } from 'dotenv'; +import bcrypt from 'bcryptjs'; +import { Pool } from 'pg'; + +type TenantClass = 'hot' | 'medium' | 'quiet'; + +interface Options { + tenants: number; + hotTenants: number; + mediumTenants: number; + manifest: string; + saltRounds: number; +} + +interface BenchmarkTenant { + tenant_id: string; + tenant_class: TenantClass; + org_id: string; + project_id: string; + api_key: string; +} + +const repoRoot = resolve(__dirname, '..'); +config({ path: resolve(repoRoot, '.env') }); + +function usage(): never { + console.error(`Usage: + pnpm benchmark:seed-tenants -- --tenants 100 --hot-tenants 1 --medium-tenants 10 --manifest tmp/benchmark-tenants.json + +Options: + --tenants Total benchmark tenants to seed. Default: 100 + --hot-tenants Number of hot tenants. Default: 1 + --medium-tenants Number of medium tenants. Default: 10 + --manifest Local manifest for k6 TENANT_KEYS_FILE. Default: tmp/benchmark-tenants.json + --salt-rounds bcryptjs cost for local benchmark keys. Default: 4`); + process.exit(1); +} + +function parseNumber(value: string | undefined, name: string): number { + if (!value) usage(); + const parsed = Number(value); + if (!Number.isInteger(parsed) || parsed < 0) { + throw new Error(`${name} must be a non-negative integer`); + } + return parsed; +} + +function parseOptions(argv: string[]): Options { + const options: Options = { + tenants: 100, + hotTenants: 1, + mediumTenants: 10, + manifest: 'tmp/benchmark-tenants.json', + saltRounds: 4, + }; + + for (let i = 0; i < argv.length; i += 1) { + switch (argv[i]) { + case '--': + break; + case '--help': + case '-h': + usage(); + break; + case '--tenants': + options.tenants = parseNumber(argv[++i], '--tenants'); + break; + case '--hot-tenants': + options.hotTenants = parseNumber(argv[++i], '--hot-tenants'); + break; + case '--medium-tenants': + options.mediumTenants = parseNumber(argv[++i], '--medium-tenants'); + break; + case '--manifest': + options.manifest = argv[++i]; + break; + case '--salt-rounds': + options.saltRounds = parseNumber(argv[++i], '--salt-rounds'); + break; + default: + throw new Error(`Unknown argument: ${argv[i]}`); + } + } + + if (options.tenants < 1) throw new Error('--tenants must be at least 1'); + if (options.hotTenants < 1) throw new Error('--hot-tenants must be at least 1'); + if (options.hotTenants + options.mediumTenants > options.tenants) { + throw new Error('--hot-tenants + --medium-tenants cannot exceed --tenants'); + } + if (options.saltRounds < 4) throw new Error('--salt-rounds must be at least 4'); + + return options; +} + +function stableUuid(prefix: number, offset: number): string { + const hex = (prefix * 100000 + offset).toString(16).padStart(12, '0').slice(-12); + return `00000000-0000-4000-8000-${hex}`; +} + +function benchmarkTenants(options: Options): BenchmarkTenant[] { + return Array.from({ length: options.tenants }, (_, index) => { + let tenantClass: TenantClass = 'quiet'; + if (index < options.hotTenants) { + tenantClass = 'hot'; + } else if (index < options.hotTenants + options.mediumTenants) { + tenantClass = 'medium'; + } + + const number = index + 1; + const tenantId = `tenant_${String(number).padStart(3, '0')}`; + + return { + tenant_id: tenantId, + tenant_class: tenantClass, + org_id: stableUuid(10, number), + project_id: stableUuid(20, number), + api_key: `pulseops_benchmark_key_${tenantId}`, + }; + }); +} + +async function seedTenant(db: Pool, tenant: BenchmarkTenant, saltRounds: number) { + const keyHash = await bcrypt.hash(tenant.api_key, saltRounds); + const fingerprint = createHash('sha256').update(tenant.api_key).digest('hex'); + const number = Number(tenant.tenant_id.slice(-3)); + const apiKeyId = stableUuid(30, number); + + await db.query( + `INSERT INTO organizations (id, name, created_at) + VALUES ($1, $2, NOW()) + ON CONFLICT (id) DO UPDATE SET + name = EXCLUDED.name, + updated_at = NOW()`, + [tenant.org_id, `Benchmark ${tenant.tenant_class} org ${tenant.tenant_id}`] + ); + + await db.query( + `INSERT INTO projects (id, org_id, name, created_at) + VALUES ($1, $2, $3, NOW()) + ON CONFLICT (id) DO UPDATE SET + org_id = EXCLUDED.org_id, + name = EXCLUDED.name, + updated_at = NOW()`, + [tenant.project_id, tenant.org_id, `Benchmark project ${tenant.tenant_id}`] + ); + + await db.query( + `INSERT INTO api_keys ( + id, + org_id, + project_id, + key_hash, + key_fingerprint, + name, + active, + created_at, + last_used_at + ) + VALUES ($1, $2, $3, $4, $5, $6, TRUE, NOW(), NOW()) + ON CONFLICT (key_fingerprint) DO UPDATE SET + org_id = EXCLUDED.org_id, + project_id = EXCLUDED.project_id, + key_hash = EXCLUDED.key_hash, + name = EXCLUDED.name, + active = TRUE, + last_used_at = NOW()`, + [ + apiKeyId, + tenant.org_id, + tenant.project_id, + keyHash, + fingerprint, + `Benchmark API key ${tenant.tenant_id}`, + ] + ); +} + +async function main() { + const options = parseOptions(process.argv.slice(2)); + const tenants = benchmarkTenants(options); + const db = new Pool({ + connectionString: process.env.DATABASE_URL || 'postgresql://pulseops:dev_password_change_in_production@localhost:5432/pulseops_dev', + }); + + try { + for (const tenant of tenants) { + await seedTenant(db, tenant, options.saltRounds); + } + + const manifestPath = resolve(repoRoot, options.manifest); + await mkdir(dirname(manifestPath), { recursive: true }); + await writeFile( + manifestPath, + `${JSON.stringify({ + generated_at: new Date().toISOString(), + note: 'Local benchmark credentials only. Do not use for production.', + tenants, + counts: { + total: tenants.length, + hot: tenants.filter((tenant) => tenant.tenant_class === 'hot').length, + medium: tenants.filter((tenant) => tenant.tenant_class === 'medium').length, + quiet: tenants.filter((tenant) => tenant.tenant_class === 'quiet').length, + }, + }, null, 2)}\n`, + 'utf8' + ); + + console.log(JSON.stringify({ + seeded: tenants.length, + hot: tenants.filter((tenant) => tenant.tenant_class === 'hot').length, + medium: tenants.filter((tenant) => tenant.tenant_class === 'medium').length, + quiet: tenants.filter((tenant) => tenant.tenant_class === 'quiet').length, + manifest: options.manifest, + }, null, 2)); + } finally { + await db.end(); + } +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/scripts/seed-demo-data.sql b/scripts/seed-demo-data.sql index 6c119cf..02d1ce2 100644 --- a/scripts/seed-demo-data.sql +++ b/scripts/seed-demo-data.sql @@ -1,23 +1,54 @@ --- Insert demo organization -INSERT INTO organizations (id, name, created_at) +-- Seed demo tenant and API key. +-- Actual key: demo_key_change_this +-- key_hash verified by scripts/test-hash.js. +-- key_fingerprint is SHA-256("demo_key_change_this"). + +INSERT INTO organizations (id, name, created_at) VALUES ('00000000-0000-0000-0000-000000000001', 'Demo Organization', NOW()) -ON CONFLICT (id) DO NOTHING; +ON CONFLICT (id) DO UPDATE SET + name = EXCLUDED.name, + updated_at = NOW(); --- Insert demo project INSERT INTO projects (id, org_id, name, created_at) -VALUES ('00000000-0000-0000-0000-000000000002', '00000000-0000-0000-0000-000000000001', 'Demo Project', NOW()) -ON CONFLICT (id) DO NOTHING; +VALUES ( + '00000000-0000-0000-0000-000000000002', + '00000000-0000-0000-0000-000000000001', + 'Demo Project', + NOW() +) +ON CONFLICT (id) DO UPDATE SET + org_id = EXCLUDED.org_id, + name = EXCLUDED.name, + updated_at = NOW(); --- Insert demo API key (the actual key is "demo_key_change_this", but we store the bcrypt hash) --- Hash generated with: bcrypt.hash("demo_key_change_this", 10) -INSERT INTO api_keys (org_id, key_hash, active, created_at, last_used_at) +INSERT INTO api_keys ( + id, + org_id, + project_id, + key_hash, + key_fingerprint, + name, + active, + created_at, + last_used_at +) VALUES ( - '00000000-0000-0000-0000-000000000001', - '$2b$10$YourBcryptHashHereChangeThis1234567890abcdefghijklmnopqrstuv', - true, - NOW(), - NOW() + '00000000-0000-0000-0000-000000000003', + '00000000-0000-0000-0000-000000000001', + '00000000-0000-0000-0000-000000000002', + '$2b$10$NQbr7kAR4n9lQ1/JwS0jn.sEhYgR4Pi5VejnFudNV/LNpk9DIHSOy', + '3543274f3d56b4b5860dd25cfb944d60e953fbe9c67f3de2d1deaf2befae1b7f', + 'Demo API Key', + TRUE, + NOW(), + NOW() ) -ON CONFLICT DO NOTHING; +ON CONFLICT (key_fingerprint) DO UPDATE SET + org_id = EXCLUDED.org_id, + project_id = EXCLUDED.project_id, + key_hash = EXCLUDED.key_hash, + name = EXCLUDED.name, + active = EXCLUDED.active, + last_used_at = EXCLUDED.last_used_at; SELECT 'Seed data inserted successfully' AS status; diff --git a/scripts/test-hash.js b/scripts/test-hash.js index d8a2c3e..4616ce2 100644 --- a/scripts/test-hash.js +++ b/scripts/test-hash.js @@ -1,5 +1,5 @@ // Test bcrypt hash verification -const bcrypt = require('bcrypt'); +const bcrypt = require('bcryptjs'); const apiKey = 'demo_key_change_this'; const hash = '$2b$10$NQbr7kAR4n9lQ1/JwS0jn.sEhYgR4Pi5VejnFudNV/LNpk9DIHSOy'; diff --git a/scripts/verify-fresh-migration.sh b/scripts/verify-fresh-migration.sh new file mode 100755 index 0000000..b6b4ae5 --- /dev/null +++ b/scripts/verify-fresh-migration.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +set -euo pipefail + +RUN_ID="${RUN_ID:-$(date -u +%Y-%m-%dT%H-%M-%SZ)}" +POSTGRES_IMAGE="${POSTGRES_IMAGE:-postgres:16-alpine}" +HOST_PORT="${HOST_PORT:-}" +CONTAINER_NAME="${CONTAINER_NAME:-pulseops-migration-proof-${RUN_ID//[^a-zA-Z0-9]/-}}" +OUTPUT="${OUTPUT:-docs/migrations/evidence/fresh-migration-${RUN_ID}.txt}" +DB_USER="pulseops" +DB_PASSWORD="dev_password_change_in_production" +DB_NAME="pulseops_dev" + +mkdir -p "$(dirname "$OUTPUT")" +: > "$OUTPUT" + +log() { + printf '%s\n' "$*" | tee -a "$OUTPUT" +} + +cleanup() { + docker stop "$CONTAINER_NAME" >/dev/null 2>&1 || true +} +trap cleanup EXIT + +log "# Fresh Migration Verification" +log "" +log "run_id: ${RUN_ID}" +log "postgres_image: ${POSTGRES_IMAGE}" +log "container: ${CONTAINER_NAME}" +log "host_port_requested: ${HOST_PORT:-auto}" +log "" + +if [[ -n "$HOST_PORT" ]]; then + PORT_SPEC="127.0.0.1:${HOST_PORT}:5432" +else + PORT_SPEC="127.0.0.1::5432" +fi + +docker run --rm -d \ + --name "$CONTAINER_NAME" \ + -e "POSTGRES_USER=${DB_USER}" \ + -e "POSTGRES_PASSWORD=${DB_PASSWORD}" \ + -e "POSTGRES_DB=${DB_NAME}" \ + -p "$PORT_SPEC" \ + "$POSTGRES_IMAGE" >/dev/null + +if [[ -z "$HOST_PORT" ]]; then + HOST_PORT="$(docker port "$CONTAINER_NAME" 5432/tcp | awk -F: 'NR == 1 { print $NF }')" +fi + +log "host_port_actual: ${HOST_PORT}" + +for _ in $(seq 1 60); do + if docker exec "$CONTAINER_NAME" pg_isready -U "$DB_USER" -d "$DB_NAME" >/dev/null 2>&1; then + break + fi + sleep 1 +done + +docker exec "$CONTAINER_NAME" pg_isready -U "$DB_USER" -d "$DB_NAME" >/dev/null + +log "## Migration Output" +DATABASE_URL="postgresql://${DB_USER}:${DB_PASSWORD}@localhost:${HOST_PORT}/${DB_NAME}" \ + pnpm --filter @pulseops/ingest-api db:migrate 2>&1 | tee -a "$OUTPUT" + +psql_temp() { + docker exec "$CONTAINER_NAME" psql -X -U "$DB_USER" -d "$DB_NAME" -tAc "$1" +} + +log "" +log "## Schema Checks" + +migrations="$(psql_temp "SELECT string_agg(filename, ', ' ORDER BY filename) FROM schema_migrations;")" +partition_count="$(psql_temp "SELECT count(*) FROM pg_inherits WHERE inhparent = 'events'::regclass;")" +materialized_view="$(psql_temp "SELECT to_regclass('public.mv_dashboard_metrics') IS NOT NULL;")" +expected_indexes="$(psql_temp "SELECT string_agg(indexname, ', ' ORDER BY indexname) FROM pg_indexes WHERE schemaname = 'public' AND indexname IN ('idx_events_org_project_time', 'idx_events_tenant_metric_time', 'idx_events_event_id_time', 'idx_mv_dashboard_unique');")" +unwanted_objects="$(psql_temp " +WITH unwanted(name) AS ( + VALUES + ('idx_api_keys_key_fingerprint'), + ('idx_api_keys_key_hash'), + ('idx_api_keys_org_id'), + ('idx_events_user_id_time'), + ('idx_events_session_id'), + ('events_event_id_timestamp_key') +), +objects AS ( + SELECT indexname AS name FROM pg_indexes WHERE schemaname = 'public' + UNION ALL + SELECT conname AS name FROM pg_constraint WHERE connamespace = 'public'::regnamespace +) +SELECT COALESCE(string_agg(objects.name, ', ' ORDER BY objects.name), '') +FROM objects +JOIN unwanted USING (name); +")" + +log "schema_migrations: ${migrations}" +log "event_partitions: ${partition_count}" +log "mv_dashboard_metrics_exists: ${materialized_view}" +log "expected_indexes: ${expected_indexes}" +log "unwanted_duplicate_objects: ${unwanted_objects:-none}" + +if [[ "$migrations" != "006_performance_optimizations.sql, 007_article_evidence_schema.sql" ]]; then + log "FAIL: migration ledger did not contain the expected migration files" + exit 1 +fi + +if [[ "$partition_count" -lt 6 ]]; then + log "FAIL: expected at least 6 event partitions for the local benchmark window" + exit 1 +fi + +if [[ "$materialized_view" != "t" ]]; then + log "FAIL: mv_dashboard_metrics was not created" + exit 1 +fi + +if [[ "$expected_indexes" != "idx_events_event_id_time, idx_events_org_project_time, idx_events_tenant_metric_time, idx_mv_dashboard_unique" ]]; then + log "FAIL: expected benchmark/query-plan indexes were not present" + exit 1 +fi + +if [[ -n "$unwanted_objects" ]]; then + log "FAIL: fresh migration produced duplicate legacy objects: ${unwanted_objects}" + exit 1 +fi + +log "" +log "PASS: fresh migration produced the expected schema without known duplicate index/constraint drift." diff --git a/services/graphql-api/package.json b/services/graphql-api/package.json index 7999578..73e0d83 100644 --- a/services/graphql-api/package.json +++ b/services/graphql-api/package.json @@ -7,7 +7,8 @@ "dev": "tsx watch src/index.ts", "build": "tsc", "start": "node dist/index.js", - "test": "jest", + "test": "vitest run", + "test:unit": "vitest run", "typecheck": "tsc --noEmit", "codegen": "graphql-codegen --config codegen.yml" }, @@ -15,6 +16,7 @@ "@apollo/server": "^4.10.0", "@as-integrations/fastify": "^2.1.1", "@fastify/cors": "^8.5.0", + "bcryptjs": "^3.0.3", "dotenv": "^16.4.5", "fastify": "^4.25.2", "graphql": "^16.8.1", diff --git a/services/graphql-api/src/auth.ts b/services/graphql-api/src/auth.ts new file mode 100644 index 0000000..b43f0b3 --- /dev/null +++ b/services/graphql-api/src/auth.ts @@ -0,0 +1,58 @@ +import { createHash } from 'node:crypto'; +import bcrypt from 'bcryptjs'; +import Redis from 'ioredis'; +import { Pool } from 'pg'; + +export interface AuthenticatedTenant { + orgId: string; + projectId?: string; +} + +export async function authenticateApiKey(db: Pool, redis: Redis, apiKey?: string): Promise { + if (!apiKey) { + return null; + } + + const fingerprint = createHash('sha256').update(apiKey).digest('hex'); + const cacheKey = `apikey:${fingerprint}`; + const cached = await redis.get(cacheKey); + + if (cached) { + const { org_id, project_id } = JSON.parse(cached); + return { + orgId: org_id, + projectId: project_id || undefined, + }; + } + + const result = await db.query( + `SELECT id, org_id, project_id, key_hash, active + FROM api_keys + WHERE active = true + AND key_fingerprint = $1 + LIMIT 1`, + [fingerprint] + ); + + for (const row of result.rows) { + const isValid = await bcrypt.compare(apiKey, row.key_hash); + if (isValid && row.active) { + await db.query( + 'UPDATE api_keys SET last_used_at = NOW() WHERE id = $1', + [row.id] + ); + + await redis.setex(cacheKey, 300, JSON.stringify({ + org_id: row.org_id, + project_id: row.project_id || null, + })); + + return { + orgId: row.org_id, + projectId: row.project_id || undefined, + }; + } + } + + return null; +} diff --git a/services/graphql-api/src/index.ts b/services/graphql-api/src/index.ts index 11957d1..380de09 100644 --- a/services/graphql-api/src/index.ts +++ b/services/graphql-api/src/index.ts @@ -8,18 +8,22 @@ import { ApolloServer } from '@apollo/server'; import fastifyApollo from '@as-integrations/fastify'; import Fastify from 'fastify'; import cors from '@fastify/cors'; +import { GraphQLError } from 'graphql'; import Redis from 'ioredis'; import { Pool } from 'pg'; import { typeDefs } from './schema'; import { resolvers } from './resolvers'; import { logger } from './utils/logger'; +import { authenticateApiKey, type AuthenticatedTenant } from './auth'; +import { + metrics, + metricsHandler, + recordDbPoolGauges, + register as metricsRegister, +} from './metrics'; const PORT = parseInt(process.env.PORT || '3002', 10); -// Debug environment loading -console.log('[DEBUG] DATABASE_URL:', process.env.DATABASE_URL ? 'LOADED' : 'NOT LOADED'); -console.log('[DEBUG] REDIS_URL:', process.env.REDIS_URL ? 'LOADED' : 'NOT LOADED'); - // Initialize database const db = new Pool({ connectionString: process.env.DATABASE_URL || 'postgresql://pulseops:dev_password_change_in_production@localhost:5432/pulseops_dev', @@ -33,6 +37,8 @@ const redis = new Redis(process.env.REDIS_URL || 'redis://localhost:6379'); interface Context { db: Pool; redis: Redis; + auth: AuthenticatedTenant; + requestId?: string; } // Initialize Apollo Server @@ -66,7 +72,25 @@ const apollo = new ApolloServer({ }); await app.register(fastifyApollo(apollo), { - context: async () => ({ db, redis }), + context: async (request) => { + const header = request.headers['x-api-key']; + const apiKey = Array.isArray(header) ? header[0] : header; + const auth = await authenticateApiKey(db, redis, apiKey); + + if (!auth) { + metrics.graphqlAuthFailures.inc(); + logger.warn({ + request_id: request.id, + trace_id: request.id, + }, 'GraphQL request rejected by API key auth'); + + throw new GraphQLError('Unauthorized', { + extensions: { code: 'UNAUTHENTICATED' }, + }); + } + + return { db, redis, auth, requestId: request.id }; + }, }); // Health check @@ -75,6 +99,13 @@ const apollo = new ApolloServer({ timestamp: new Date().toISOString(), })); + app.get('/metrics', async (_request, reply) => { + recordDbPoolGauges(db); + return reply + .header('Content-Type', metricsRegister.contentType) + .send(await metricsHandler()); + }); + // Graceful shutdown async function shutdown() { app.log.info('Shutting down...'); diff --git a/services/graphql-api/src/metrics.ts b/services/graphql-api/src/metrics.ts new file mode 100644 index 0000000..6974fb4 --- /dev/null +++ b/services/graphql-api/src/metrics.ts @@ -0,0 +1,83 @@ +import { collectDefaultMetrics, Counter, Gauge, Histogram, Registry } from 'prom-client'; +import type { Pool } from 'pg'; + +export const register = new Registry(); + +collectDefaultMetrics({ register }); + +export const metrics = { + graphqlQueryDuration: new Histogram({ + name: 'graphql_query_duration_seconds', + help: 'GraphQL resolver duration in seconds', + labelNames: ['operation'], + buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5], + registers: [register], + }), + + graphqlQueriesTotal: new Counter({ + name: 'graphql_queries_total', + help: 'Total GraphQL resolver executions', + labelNames: ['operation'], + registers: [register], + }), + + graphqlAuthFailures: new Counter({ + name: 'graphql_auth_failures_total', + help: 'Total GraphQL API key authentication failures', + registers: [register], + }), + + redisCacheHits: new Counter({ + name: 'graphql_redis_cache_hits_total', + help: 'Total Redis cache hits from GraphQL resolvers', + labelNames: ['query_type'], + registers: [register], + }), + + redisCacheMisses: new Counter({ + name: 'graphql_redis_cache_misses_total', + help: 'Total Redis cache misses from GraphQL resolvers', + labelNames: ['query_type'], + registers: [register], + }), + + dbConnectionsActive: new Gauge({ + name: 'graphql_db_connections_active', + help: 'Active PostgreSQL connections in the GraphQL API pool', + registers: [register], + }), + + dbConnectionsIdle: new Gauge({ + name: 'graphql_db_connections_idle', + help: 'Idle PostgreSQL connections in the GraphQL API pool', + registers: [register], + }), +}; + +export function cacheNamespace(cacheKey: string) { + return cacheKey.split(':', 1)[0] || 'unknown'; +} + +export function recordCache(cacheKey: string, hit: boolean) { + const queryType = cacheNamespace(cacheKey); + + if (hit) { + metrics.redisCacheHits.inc({ query_type: queryType }); + } else { + metrics.redisCacheMisses.inc({ query_type: queryType }); + } +} + +export function recordGraphqlQuery(operation: string, durationMs: number) { + metrics.graphqlQueriesTotal.inc({ operation }); + metrics.graphqlQueryDuration.observe({ operation }, Math.max(0, durationMs) / 1000); +} + +export function recordDbPoolGauges(db: Pool) { + metrics.dbConnectionsActive.set(db.totalCount - db.idleCount); + metrics.dbConnectionsIdle.set(db.idleCount); +} + +export async function metricsHandler() { + return register.metrics(); +} diff --git a/services/graphql-api/src/resolvers.ts b/services/graphql-api/src/resolvers.ts index 4032dab..fef841f 100644 --- a/services/graphql-api/src/resolvers.ts +++ b/services/graphql-api/src/resolvers.ts @@ -1,10 +1,20 @@ import { Pool } from 'pg'; import Redis from 'ioredis'; -import { GraphQLScalarType, Kind } from 'graphql'; +import { GraphQLError, GraphQLScalarType, Kind } from 'graphql'; +import { logger } from './utils/logger'; +import { type AuthenticatedTenant } from './auth'; +import { recordCache, recordGraphqlQuery } from './metrics'; interface Context { db: Pool; redis: Redis; + auth: AuthenticatedTenant; + requestId?: string; +} + +interface TenantScope { + orgId: string; + projectId: string; } // Date scalar @@ -85,6 +95,73 @@ function buildPropertyClauses(filters: Record, params: any[]) { return clauses.length ? ` AND ${clauses.join(' AND ')}` : ''; } +function assertTenantScope(args: { orgId: string; projectId: string }, context: Context): TenantScope { + if (context.auth.orgId !== args.orgId) { + throw new GraphQLError('Forbidden', { + extensions: { code: 'FORBIDDEN' }, + }); + } + + if (context.auth.projectId && context.auth.projectId !== args.projectId) { + throw new GraphQLError('Forbidden', { + extensions: { code: 'FORBIDDEN' }, + }); + } + + return { + orgId: context.auth.orgId, + projectId: context.auth.projectId || args.projectId, + }; +} + +async function cacheVersion(redis: Redis, tenant: TenantScope) { + return (await redis.get(`cache_version:${tenant.orgId}:${tenant.projectId}`)) || '0'; +} + +function scopedCacheKey( + namespace: string, + tenant: TenantScope, + version: string, + parts: Array +) { + return [ + namespace, + tenant.orgId, + tenant.projectId, + `v${version}`, + ...parts.map((part) => part ?? 'all'), + ].join(':'); +} + +function logCache(context: Context, tenant: TenantScope, cacheKey: string, hit: boolean) { + recordCache(cacheKey, hit); + + logger.info({ + request_id: context.requestId, + trace_id: context.requestId, + org_id: tenant.orgId, + project_id: tenant.projectId, + tenant: `${tenant.orgId}:${tenant.projectId}`, + cache_key: cacheKey, + cache_result: hit ? 'hit' : 'miss', + }, hit ? 'GraphQL cache hit' : 'GraphQL cache miss'); +} + +function logQuery(context: Context, tenant: TenantScope, operation: string, startedAt: number) { + const durationMs = Date.now() - startedAt; + recordGraphqlQuery(operation, durationMs); + + logger.info({ + request_id: context.requestId, + trace_id: context.requestId, + org_id: tenant.orgId, + project_id: tenant.projectId, + tenant: `${tenant.orgId}:${tenant.projectId}`, + graphql_operation: operation, + graphql_query_duration_ms: durationMs, + }, 'GraphQL query resolved'); +} + export const resolvers = { Date: dateScalar, JSON: jsonScalar, @@ -92,23 +169,30 @@ export const resolvers = { Query: { async dailyActiveUsers( _: any, - { orgId, projectId, startDate, endDate, filters }: any, - { db, redis }: Context + args: any, + context: Context ) { + const { db, redis } = context; + const { startDate, endDate, filters } = args; + const tenant = assertTenantScope(args, context); + const startedAt = Date.now(); const normalizedFilters = normalizeFilters(filters); const filtersKey = JSON.stringify(normalizedFilters); - // Try cache first - const cacheKey = `dau:${orgId}:${projectId}:${startDate}:${endDate}:${filtersKey}`; + const version = await cacheVersion(redis, tenant); + const cacheKey = scopedCacheKey('dau', tenant, version, [startDate, endDate, filtersKey]); const cached = await redis.get(cacheKey); if (cached) { + logCache(context, tenant, cacheKey, true); + logQuery(context, tenant, 'dailyActiveUsers', startedAt); return JSON.parse(cached); } + logCache(context, tenant, cacheKey, false); let result; if (Object.keys(normalizedFilters).length > 0) { - const params: any[] = [orgId, projectId, startDate, endDate]; + const params: any[] = [tenant.orgId, tenant.projectId, startDate, endDate]; const filterClause = buildPropertyClauses(normalizedFilters, params); result = await db.query( `SELECT DATE(timestamp) as date, COUNT(DISTINCT user_id)::float as value @@ -132,7 +216,7 @@ export const resolvers = { AND date >= $3 AND date <= $4 ORDER BY date ASC`, - [orgId, projectId, startDate, endDate] + [tenant.orgId, tenant.projectId, startDate, endDate] ); } @@ -141,22 +225,31 @@ export const resolvers = { // Cache for 5 minutes await redis.setex(cacheKey, 300, JSON.stringify(data)); + logQuery(context, tenant, 'dailyActiveUsers', startedAt); return data; }, async eventCounts( _: any, - { orgId, projectId, startDate, endDate, eventName, filters }: any, - { db, redis }: Context + args: any, + context: Context ) { + const { db, redis } = context; + const { startDate, endDate, eventName, filters } = args; + const tenant = assertTenantScope(args, context); + const startedAt = Date.now(); const normalizedFilters = normalizeFilters(filters); const filtersKey = JSON.stringify(normalizedFilters); - const cacheKey = `events:${orgId}:${projectId}:${startDate}:${endDate}:${eventName || 'all'}:${filtersKey}`; + const version = await cacheVersion(redis, tenant); + const cacheKey = scopedCacheKey('events', tenant, version, [startDate, endDate, eventName, filtersKey]); const cached = await redis.get(cacheKey); if (cached) { + logCache(context, tenant, cacheKey, true); + logQuery(context, tenant, 'eventCounts', startedAt); return JSON.parse(cached); } + logCache(context, tenant, cacheKey, false); let result; if (Object.keys(normalizedFilters).length > 0) { @@ -168,7 +261,7 @@ export const resolvers = { AND timestamp::date >= $3 AND timestamp::date <= $4 `; - const params: any[] = [orgId, projectId, startDate, endDate]; + const params: any[] = [tenant.orgId, tenant.projectId, startDate, endDate]; if (eventName) { params.push(eventName); @@ -181,7 +274,7 @@ export const resolvers = { result = await db.query(query, params); } else { let query = ` - SELECT + SELECT dimensions->>'event_name' as "eventName", SUM(metric_value)::int as count FROM daily_aggregates @@ -192,7 +285,7 @@ export const resolvers = { AND date <= $4 `; - const params: any[] = [orgId, projectId, startDate, endDate]; + const params: any[] = [tenant.orgId, tenant.projectId, startDate, endDate]; if (eventName) { query += ` AND dimensions->>'event_name' = $5`; @@ -207,26 +300,35 @@ export const resolvers = { await redis.setex(cacheKey, 300, JSON.stringify(data)); + logQuery(context, tenant, 'eventCounts', startedAt); return data; }, async totalEvents( _: any, - { orgId, projectId, startDate, endDate, filters }: any, - { db, redis }: Context + args: any, + context: Context ) { + const { db, redis } = context; + const { startDate, endDate, filters } = args; + const tenant = assertTenantScope(args, context); + const startedAt = Date.now(); const normalizedFilters = normalizeFilters(filters); const filtersKey = JSON.stringify(normalizedFilters); - const cacheKey = `total:${orgId}:${projectId}:${startDate}:${endDate}:${filtersKey}`; + const version = await cacheVersion(redis, tenant); + const cacheKey = scopedCacheKey('total', tenant, version, [startDate, endDate, filtersKey]); const cached = await redis.get(cacheKey); if (cached) { + logCache(context, tenant, cacheKey, true); + logQuery(context, tenant, 'totalEvents', startedAt); return parseInt(cached, 10); } + logCache(context, tenant, cacheKey, false); let total = 0; if (Object.keys(normalizedFilters).length > 0) { - const params: any[] = [orgId, projectId, startDate, endDate]; + const params: any[] = [tenant.orgId, tenant.projectId, startDate, endDate]; const filterClause = buildPropertyClauses(normalizedFilters, params); const result = await db.query( `SELECT COUNT(*)::int as total @@ -247,13 +349,14 @@ export const resolvers = { AND metric_name = 'total_events' AND date >= $3 AND date <= $4`, - [orgId, projectId, startDate, endDate] + [tenant.orgId, tenant.projectId, startDate, endDate] ); total = result.rows[0]?.total || 0; } await redis.setex(cacheKey, 300, total.toString()); + logQuery(context, tenant, 'totalEvents', startedAt); return total; }, @@ -262,13 +365,15 @@ export const resolvers = { args: any, context: Context ) { + const tenant = assertTenantScope(args, context); + const startedAt = Date.now(); const [totalEvents, dailyActiveUsers, topEvents] = await Promise.all([ resolvers.Query.totalEvents(_, args, context), resolvers.Query.dailyActiveUsers(_, args, context), resolvers.Query.eventCounts(_, args, context), ]); - return { + const data = { totalEvents, dailyActiveUsers, topEvents: topEvents.slice(0, 10), @@ -277,21 +382,32 @@ export const resolvers = { end: args.endDate, }, }; + + logQuery(context, tenant, 'metrics', startedAt); + return data; }, async eventCountsOverTime( _: any, - { orgId, projectId, startDate, endDate, eventName, filters }: any, - { db, redis }: Context + args: any, + context: Context ) { + const { db, redis } = context; + const { startDate, endDate, eventName, filters } = args; + const tenant = assertTenantScope(args, context); + const startedAt = Date.now(); const normalizedFilters = normalizeFilters(filters); const filtersKey = JSON.stringify(normalizedFilters); - const cacheKey = `events-series:${orgId}:${projectId}:${startDate}:${endDate}:${eventName || 'all'}:${filtersKey}`; + const version = await cacheVersion(redis, tenant); + const cacheKey = scopedCacheKey('events-series', tenant, version, [startDate, endDate, eventName, filtersKey]); const cached = await redis.get(cacheKey); if (cached) { + logCache(context, tenant, cacheKey, true); + logQuery(context, tenant, 'eventCountsOverTime', startedAt); return JSON.parse(cached); } + logCache(context, tenant, cacheKey, false); let result; if (Object.keys(normalizedFilters).length > 0) { @@ -303,7 +419,7 @@ export const resolvers = { AND timestamp::date >= $3 AND timestamp::date <= $4 `; - const params: any[] = [orgId, projectId, startDate, endDate]; + const params: any[] = [tenant.orgId, tenant.projectId, startDate, endDate]; if (eventName) { params.push(eventName); @@ -325,7 +441,7 @@ export const resolvers = { AND date <= $4 `; - const params: any[] = [orgId, projectId, startDate, endDate]; + const params: any[] = [tenant.orgId, tenant.projectId, startDate, endDate]; if (eventName) { query += ` AND dimensions->>'event_name' = $5`; @@ -340,16 +456,21 @@ export const resolvers = { await redis.setex(cacheKey, 300, JSON.stringify(data)); + logQuery(context, tenant, 'eventCountsOverTime', startedAt); return data; }, async recentEvents( _: any, - { orgId, projectId, limit, filters }: any, - { db }: Context + args: any, + context: Context ) { + const { db } = context; + const { limit, filters } = args; + const tenant = assertTenantScope(args, context); + const startedAt = Date.now(); const normalizedFilters = normalizeFilters(filters); - const params: any[] = [orgId, projectId]; + const params: any[] = [tenant.orgId, tenant.projectId]; let query = ` SELECT id::text as id, event_name as "eventName", user_id as "userId", timestamp, properties FROM events @@ -369,6 +490,7 @@ export const resolvers = { params ); + logQuery(context, tenant, 'recentEvents', startedAt); return result.rows; }, }, diff --git a/services/graphql-api/tests/auth.test.ts b/services/graphql-api/tests/auth.test.ts new file mode 100644 index 0000000..5d2d989 --- /dev/null +++ b/services/graphql-api/tests/auth.test.ts @@ -0,0 +1,53 @@ +import bcrypt from 'bcryptjs'; +import { describe, expect, it, vi } from 'vitest'; +import { authenticateApiKey } from '../src/auth'; + +describe('GraphQL API key auth', () => { + it('authenticates active keys and caches the tenant scope', async () => { + const keyHash = await bcrypt.hash('test_key', 4); + const db = { + query: vi.fn() + .mockResolvedValueOnce({ + rows: [{ + id: 'key_1', + org_id: 'org_1', + project_id: 'project_1', + key_hash: keyHash, + active: true, + }], + }) + .mockResolvedValueOnce({ rows: [] }), + } as any; + const redis = { + get: vi.fn().mockResolvedValue(null), + setex: vi.fn().mockResolvedValue('OK'), + } as any; + + const auth = await authenticateApiKey(db, redis, 'test_key'); + + expect(auth).toEqual({ orgId: 'org_1', projectId: 'project_1' }); + expect(db.query).toHaveBeenCalledTimes(2); + expect(redis.setex).toHaveBeenCalledWith( + expect.stringMatching(/^apikey:/), + 300, + JSON.stringify({ org_id: 'org_1', project_id: 'project_1' }) + ); + }); + + it('uses cached tenant scope without scanning API keys', async () => { + const db = { + query: vi.fn(), + } as any; + const redis = { + get: vi.fn().mockResolvedValue(JSON.stringify({ + org_id: 'org_1', + project_id: 'project_1', + })), + } as any; + + const auth = await authenticateApiKey(db, redis, 'test_key'); + + expect(auth).toEqual({ orgId: 'org_1', projectId: 'project_1' }); + expect(db.query).not.toHaveBeenCalled(); + }); +}); diff --git a/services/graphql-api/tests/metrics.test.ts b/services/graphql-api/tests/metrics.test.ts new file mode 100644 index 0000000..c645dbe --- /dev/null +++ b/services/graphql-api/tests/metrics.test.ts @@ -0,0 +1,31 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import { + cacheNamespace, + recordCache, + recordGraphqlQuery, + register, +} from '../src/metrics'; + +describe('GraphQL metrics', () => { + beforeEach(() => { + register.resetMetrics(); + }); + + it('derives low-cardinality cache namespaces from scoped cache keys', () => { + expect(cacheNamespace('total:org:project:v1:2026-06-01')).toBe('total'); + expect(cacheNamespace('events-series:org:project:v1:2026-06-01')).toBe('events-series'); + }); + + it('records cache and resolver metrics', async () => { + recordCache('total:org:project:v1:2026-06-01', true); + recordCache('events:org:project:v1:2026-06-01', false); + recordGraphqlQuery('totalEvents', 12); + + const output = await register.metrics(); + + expect(output).toContain('graphql_redis_cache_hits_total{query_type="total"} 1'); + expect(output).toContain('graphql_redis_cache_misses_total{query_type="events"} 1'); + expect(output).toContain('graphql_queries_total{operation="totalEvents"} 1'); + expect(output).toContain('graphql_query_duration_seconds_count{operation="totalEvents"} 1'); + }); +}); diff --git a/services/graphql-api/tests/resolvers.test.ts b/services/graphql-api/tests/resolvers.test.ts new file mode 100644 index 0000000..0bc5a35 --- /dev/null +++ b/services/graphql-api/tests/resolvers.test.ts @@ -0,0 +1,73 @@ +import { describe, expect, it, vi } from 'vitest'; +import { resolvers } from '../src/resolvers'; + +const orgId = '00000000-0000-0000-0000-000000000001'; +const projectId = '00000000-0000-0000-0000-000000000002'; +const otherOrgId = '00000000-0000-0000-0000-000000000003'; +const otherProjectId = '00000000-0000-0000-0000-000000000004'; + +function createContext(overrides: Record = {}) { + return { + auth: { orgId, projectId }, + requestId: 'req_1', + db: { + query: vi.fn().mockResolvedValue({ rows: [{ total: 3 }] }), + }, + redis: { + get: vi.fn().mockImplementation(async (key: string) => { + if (key === `cache_version:${orgId}:${projectId}`) return '7'; + return null; + }), + setex: vi.fn().mockResolvedValue('OK'), + }, + ...overrides, + } as any; +} + +describe('GraphQL resolver authz and cache keys', () => { + it('rejects cross-org arguments', async () => { + const context = createContext(); + + await expect(resolvers.Query.totalEvents(null, { + orgId: otherOrgId, + projectId, + startDate: '2026-06-01', + endDate: '2026-06-16', + }, context)).rejects.toThrow('Forbidden'); + }); + + it('rejects cross-project arguments for project-scoped keys', async () => { + const context = createContext(); + + await expect(resolvers.Query.totalEvents(null, { + orgId, + projectId: otherProjectId, + startDate: '2026-06-01', + endDate: '2026-06-16', + }, context)).rejects.toThrow('Forbidden'); + }); + + it('uses authenticated tenant and cache version in cache keys', async () => { + const context = createContext(); + + const total = await resolvers.Query.totalEvents(null, { + orgId, + projectId, + startDate: '2026-06-01', + endDate: '2026-06-16', + }, context); + + expect(total).toBe(3); + expect(context.db.query).toHaveBeenCalledWith(expect.stringContaining('daily_aggregates'), [ + orgId, + projectId, + '2026-06-01', + '2026-06-16', + ]); + expect(context.redis.setex).toHaveBeenCalledWith( + `total:${orgId}:${projectId}:v7:2026-06-01:2026-06-16:{}`, + 300, + '3' + ); + }); +}); diff --git a/services/graphql-api/tsconfig.json b/services/graphql-api/tsconfig.json index d78b0b6..fe14a06 100644 --- a/services/graphql-api/tsconfig.json +++ b/services/graphql-api/tsconfig.json @@ -1,11 +1,11 @@ { "compilerOptions": { "target": "ES2022", - "module": "CommonJS", + "module": "Node16", "lib": [ "ES2022" ], - "moduleResolution": "node", + "moduleResolution": "Node16", "rootDir": "./src", "outDir": "./dist", "esModuleInterop": true, diff --git a/services/ingest-api/package.json b/services/ingest-api/package.json index 700ecbb..bb1a3cb 100644 --- a/services/ingest-api/package.json +++ b/services/ingest-api/package.json @@ -20,7 +20,7 @@ "dependencies": { "@fastify/cors": "^8.5.0", "@fastify/rate-limit": "^9.1.0", - "bcrypt": "^5.1.1", + "bcryptjs": "^3.0.3", "dotenv": "^16.4.5", "fastify": "^4.25.2", "ioredis": "^5.3.2", @@ -34,7 +34,6 @@ }, "devDependencies": { "@jest/globals": "^29.7.0", - "@types/bcrypt": "^5.0.2", "@types/jest": "^29.5.11", "@types/jsonwebtoken": "^9.0.5", "@types/node": "^20.0.0", diff --git a/services/ingest-api/scripts/migrate.ts b/services/ingest-api/scripts/migrate.ts new file mode 100644 index 0000000..f8fc606 --- /dev/null +++ b/services/ingest-api/scripts/migrate.ts @@ -0,0 +1,62 @@ +import { config } from 'dotenv'; +import { resolve } from 'node:path'; +import { readdir, readFile } from 'node:fs/promises'; +import { Pool } from 'pg'; + +const repoRoot = resolve(__dirname, '../../..'); +config({ path: resolve(repoRoot, '.env') }); + +const db = new Pool({ + connectionString: process.env.DATABASE_URL || 'postgresql://pulseops:dev_password_change_in_production@localhost:5432/pulseops_dev', +}); + +async function runSqlFile(path: string) { + const sql = await readFile(path, 'utf8'); + if (!sql.trim()) return; + + console.log(`Applying ${path.replace(repoRoot + '/', '')}`); + await db.query(sql); +} + +async function main() { + await db.query(` + CREATE TABLE IF NOT EXISTS schema_migrations ( + filename TEXT PRIMARY KEY, + applied_at TIMESTAMPTZ DEFAULT NOW() + ) + `); + + await runSqlFile(resolve(repoRoot, 'scripts/init-db.sql')); + + const migrationsDir = resolve(repoRoot, 'migrations'); + const files = (await readdir(migrationsDir)) + .filter((file) => file.endsWith('.sql')) + .sort(); + + for (const file of files) { + const applied = await db.query( + 'SELECT 1 FROM schema_migrations WHERE filename = $1', + [file] + ); + + if (applied.rowCount) { + console.log(`Skipping ${file}; already applied`); + continue; + } + + await runSqlFile(resolve(migrationsDir, file)); + await db.query( + 'INSERT INTO schema_migrations (filename) VALUES ($1) ON CONFLICT DO NOTHING', + [file] + ); + } +} + +main() + .catch((error) => { + console.error(error); + process.exitCode = 1; + }) + .finally(async () => { + await db.end(); + }); diff --git a/services/ingest-api/scripts/seed.ts b/services/ingest-api/scripts/seed.ts new file mode 100644 index 0000000..e2768aa --- /dev/null +++ b/services/ingest-api/scripts/seed.ts @@ -0,0 +1,26 @@ +import { config } from 'dotenv'; +import { resolve } from 'node:path'; +import { readFile } from 'node:fs/promises'; +import { Pool } from 'pg'; + +const repoRoot = resolve(__dirname, '../../..'); +config({ path: resolve(repoRoot, '.env') }); + +const db = new Pool({ + connectionString: process.env.DATABASE_URL || 'postgresql://pulseops:dev_password_change_in_production@localhost:5432/pulseops_dev', +}); + +async function main() { + const sql = await readFile(resolve(repoRoot, 'scripts/seed-demo-data.sql'), 'utf8'); + await db.query(sql); + console.log('Seed data inserted successfully'); +} + +main() + .catch((error) => { + console.error(error); + process.exitCode = 1; + }) + .finally(async () => { + await db.end(); + }); diff --git a/services/ingest-api/src/events.ts b/services/ingest-api/src/events.ts new file mode 100644 index 0000000..69eb041 --- /dev/null +++ b/services/ingest-api/src/events.ts @@ -0,0 +1,73 @@ +import { randomUUID } from 'node:crypto'; +import { z } from 'zod'; +import { eventValidationSchema, type Event } from './schemas/event'; + +export interface AuthenticatedTenant { + orgId: string; + projectId?: string; +} + +export interface IngestedEvent { + id: string; + org_id: string; + project_id: string; + event_name: string; + user_id?: string; + session_id: string; + properties: Record; + timestamp: string; +} + +export class ProjectMismatchError extends Error { + constructor() { + super('Project does not match authenticated API key'); + this.name = 'ProjectMismatchError'; + } +} + +export class MissingProjectError extends Error { + constructor() { + super('Missing project_id'); + this.name = 'MissingProjectError'; + } +} + +export function parseEventPayload(payload: Event): z.infer { + return eventValidationSchema.parse(payload); +} + +export function bindEventToTenant( + payload: Event, + tenant: AuthenticatedTenant, + eventId: string = randomUUID() +): IngestedEvent { + const validatedEvent = parseEventPayload(payload); + const projectId = tenant.projectId || validatedEvent.project_id; + + if (!projectId) { + throw new MissingProjectError(); + } + + if (tenant.projectId && validatedEvent.project_id && tenant.projectId !== validatedEvent.project_id) { + throw new ProjectMismatchError(); + } + + return { + id: eventId, + org_id: tenant.orgId, + project_id: projectId, + event_name: validatedEvent.event_name, + user_id: validatedEvent.user_id, + session_id: validatedEvent.session_id || randomUUID(), + properties: validatedEvent.properties || {}, + timestamp: validatedEvent.timestamp || new Date().toISOString(), + }; +} + +export function kafkaKeyForEvent(event: Pick) { + return [ + event.org_id, + event.project_id, + event.user_id || event.session_id || event.id, + ].join(':'); +} diff --git a/services/ingest-api/src/index.ts b/services/ingest-api/src/index.ts index 1aee3d8..7414327 100644 --- a/services/ingest-api/src/index.ts +++ b/services/ingest-api/src/index.ts @@ -10,9 +10,23 @@ import rateLimit from '@fastify/rate-limit'; import Redis from 'ioredis'; import { Kafka } from 'kafkajs'; import { Pool } from 'pg'; -import { randomUUID } from 'node:crypto'; -import { eventValidationSchema, type Event } from './schemas/event'; +import { createHash, randomUUID } from 'node:crypto'; +import { ZodError } from 'zod'; +import { type Event } from './schemas/event'; import { apiKeyAuth } from './middleware/auth'; +import { + bindEventToTenant, + kafkaKeyForEvent, + MissingProjectError, + ProjectMismatchError, +} from './events'; +import { + metrics, + metricsHandler, + observeHttpRequest, + recordDbPoolGauges, + register as metricsRegister, +} from './metrics'; const PORT = parseInt(process.env.PORT || '3001', 10); const KAFKA_BROKERS = (process.env.KAFKA_BROKERS || 'localhost:9092').split(','); @@ -62,13 +76,24 @@ const app = Fastify({ timeWindow: parseInt(process.env.RATE_LIMIT_WINDOW_MS || '60000', 10), redis, keyGenerator: (req) => { - const apiKey = req.headers['x-api-key'] as string; - return apiKey || req.ip; + const header = req.headers['x-api-key']; + const apiKey = Array.isArray(header) ? header[0] : header; + return apiKey ? createHash('sha256').update(apiKey).digest('hex') : req.ip; }, }); + app.addHook('onRequest', async (request) => { + (request as any).metricsStartedAt = process.hrtime.bigint(); + }); + + app.addHook('onResponse', async (request, reply) => { + observeHttpRequest(request, reply); + }); + // Health check endpoint app.get('/health', async () => { + recordDbPoolGauges(db); + return { status: 'healthy', timestamp: new Date().toISOString(), @@ -80,49 +105,59 @@ const app = Fastify({ }; }); + app.get('/metrics', async (_req, reply) => { + recordDbPoolGauges(db); + return reply + .header('Content-Type', metricsRegister.contentType) + .send(await metricsHandler()); + }); + // Event ingestion endpoint app.post<{ Body: Event }>('/api/v1/events', async (req, reply) => { // Auth check - await apiKeyAuth(db, redis)(req, reply); + const authenticated = await apiKeyAuth(db, redis)(req, reply); + if (!authenticated) { + return reply; + } const startTime = Date.now(); try { - // Validate event payload - const validatedEvent = eventValidationSchema.parse(req.body); - - // Attach org_id and project_id from auth context - const event = { - id: randomUUID(), - org_id: req.orgId, - project_id: req.projectId || validatedEvent.project_id, - event_name: validatedEvent.event_name, - user_id: validatedEvent.user_id, - session_id: validatedEvent.session_id || randomUUID(), - properties: validatedEvent.properties || {}, - timestamp: validatedEvent.timestamp || new Date().toISOString(), - }; + const event = bindEventToTenant(req.body, { + orgId: req.orgId, + projectId: req.projectId, + }); // Publish to Kafka topic - await producer.send({ + const metadata = await producer.send({ topic: 'events-raw', messages: [ { - key: event.org_id, + key: kafkaKeyForEvent(event), value: JSON.stringify(event), headers: { trace_id: req.id, + request_id: req.id, }, }, ], }); const latency = Date.now() - startTime; + metrics.eventsIngested.inc({ org_id: event.org_id, project_id: event.project_id }); + metrics.eventIngestionDuration.observe({ type: 'single' }, latency / 1000); + metrics.kafkaMessagesProduced.inc({ topic: 'events-raw' }); req.log.info({ event_id: event.id, org_id: event.org_id, + project_id: event.project_id, + tenant: `${event.org_id}:${event.project_id}`, event_name: event.event_name, + request_id: req.id, + trace_id: req.id, + kafka_topic: metadata[0]?.topicName || 'events-raw', + kafka_partition: metadata[0]?.partition, latency_ms: latency, }, 'Event ingested'); @@ -132,15 +167,30 @@ const app = Fastify({ event_id: event.id, }); } catch (error) { + const errorType = error instanceof Error ? error.name : 'UnknownError'; + metrics.eventIngestionErrors.inc({ error_type: errorType }); + if (!(error instanceof ZodError) && + !(error instanceof ProjectMismatchError) && + !(error instanceof MissingProjectError)) { + metrics.kafkaProduceErrors.inc({ topic: 'events-raw', error_type: errorType }); + } req.log.error({ error }, 'Event ingestion failed'); - if (error instanceof Error && 'issues' in error) { + if (error instanceof ZodError) { return reply.code(400).send({ error: 'Validation failed', - details: error, + details: error.issues, }); } + if (error instanceof ProjectMismatchError) { + return reply.code(403).send({ error: error.message }); + } + + if (error instanceof MissingProjectError) { + return reply.code(400).send({ error: error.message }); + } + return reply.code(500).send({ error: 'Internal server error', }); @@ -150,9 +200,14 @@ const app = Fastify({ // Batch event ingestion endpoint app.post<{ Body: { events: Event[] } }>('/api/v1/events/batch', async (req, reply) => { // Auth check - await apiKeyAuth(db, redis)(req, reply); + const authenticated = await apiKeyAuth(db, redis)(req, reply); + if (!authenticated) { + return reply; + } const { events } = req.body; + const batchId = randomUUID(); + const startTime = Date.now(); if (!Array.isArray(events) || events.length === 0) { return reply.code(400).send({ @@ -168,33 +223,45 @@ const app = Fastify({ try { const messages = events.map((eventData) => { - const validatedEvent = eventValidationSchema.parse(eventData); - - const event = { - id: randomUUID(), - org_id: req.orgId, - project_id: req.projectId || validatedEvent.project_id, - event_name: validatedEvent.event_name, - user_id: validatedEvent.user_id, - session_id: validatedEvent.session_id || randomUUID(), - properties: validatedEvent.properties || {}, - timestamp: validatedEvent.timestamp || new Date().toISOString(), - }; + const event = bindEventToTenant(eventData, { + orgId: req.orgId, + projectId: req.projectId, + }); return { - key: event.org_id, + key: kafkaKeyForEvent(event), value: JSON.stringify(event), + headers: { + trace_id: req.id, + request_id: req.id, + batch_id: batchId, + }, }; }); - await producer.send({ + const metadata = await producer.send({ topic: 'events-raw', messages, }); + const latency = Date.now() - startTime; + metrics.eventsIngestedBatch.inc( + { org_id: req.orgId, project_id: req.projectId || 'none', batch_size: String(events.length) }, + events.length + ); + metrics.eventIngestionDuration.observe({ type: 'batch' }, latency / 1000); + metrics.kafkaMessagesProduced.inc({ topic: 'events-raw' }, events.length); + req.log.info({ org_id: req.orgId, + project_id: req.projectId, + tenant: req.projectId ? `${req.orgId}:${req.projectId}` : req.orgId, + request_id: req.id, + trace_id: req.id, + batch_id: batchId, batch_size: events.length, + kafka_topic: metadata[0]?.topicName || 'events-raw', + kafka_partition: metadata[0]?.partition, }, 'Batch ingested'); return reply.code(202).send({ @@ -202,7 +269,30 @@ const app = Fastify({ count: events.length, }); } catch (error) { + const errorType = error instanceof Error ? error.name : 'UnknownError'; + metrics.eventIngestionErrors.inc({ error_type: errorType }); + if (!(error instanceof ZodError) && + !(error instanceof ProjectMismatchError) && + !(error instanceof MissingProjectError)) { + metrics.kafkaProduceErrors.inc({ topic: 'events-raw', error_type: errorType }); + } req.log.error({ error }, 'Batch ingestion failed'); + + if (error instanceof ZodError) { + return reply.code(400).send({ + error: 'Validation failed', + details: error.issues, + }); + } + + if (error instanceof ProjectMismatchError) { + return reply.code(403).send({ error: error.message }); + } + + if (error instanceof MissingProjectError) { + return reply.code(400).send({ error: error.message }); + } + return reply.code(500).send({ error: 'Internal server error' }); } }); diff --git a/services/ingest-api/src/metrics.ts b/services/ingest-api/src/metrics.ts index e199760..d1f0efb 100644 --- a/services/ingest-api/src/metrics.ts +++ b/services/ingest-api/src/metrics.ts @@ -1,4 +1,6 @@ import { Registry, Counter, Histogram, Gauge } from 'prom-client'; +import type { FastifyReply, FastifyRequest } from 'fastify'; +import type { Pool } from 'pg'; // Create registry export const register = new Registry(); @@ -140,29 +142,29 @@ export const metrics = { }), }; -// Middleware to track HTTP metrics -export function metricsMiddleware() { - return async (request: any, reply: any) => { - const start = Date.now(); - - reply.addHook('onSend', async () => { - const duration = (Date.now() - start) / 1000; - const route = request.routerPath || request.url; - const method = request.method; - const statusCode = reply.statusCode; - - metrics.httpRequestDuration.observe( - { method, route, status_code: statusCode }, - duration - ); - - metrics.httpRequestsTotal.inc({ - method, - route, - status_code: statusCode, - }); - }); +function routeLabel(request: FastifyRequest) { + return request.routeOptions?.url || request.url; +} + +export function observeHttpRequest(request: FastifyRequest, reply: FastifyReply) { + const startedAt = (request as any).metricsStartedAt; + const duration = typeof startedAt === 'bigint' + ? Number(process.hrtime.bigint() - startedAt) / 1_000_000_000 + : 0; + + const labels = { + method: request.method, + route: routeLabel(request), + status_code: String(reply.statusCode), }; + + metrics.httpRequestDuration.observe(labels, duration); + metrics.httpRequestsTotal.inc(labels); +} + +export function recordDbPoolGauges(db: Pool) { + metrics.dbConnectionsActive.set(db.totalCount - db.idleCount); + metrics.dbConnectionsIdle.set(db.idleCount); } // Metrics endpoint handler diff --git a/services/ingest-api/src/middleware/auth.ts b/services/ingest-api/src/middleware/auth.ts index ed9a431..2301c88 100644 --- a/services/ingest-api/src/middleware/auth.ts +++ b/services/ingest-api/src/middleware/auth.ts @@ -1,74 +1,69 @@ import { FastifyRequest, FastifyReply } from 'fastify'; import { Pool } from 'pg'; import Redis from 'ioredis'; +import bcrypt from 'bcryptjs'; +import { createHash } from 'node:crypto'; -export function apiKeyAuth(_db: Pool, _redis: Redis) { - return async (req: FastifyRequest, _reply: FastifyReply): Promise => { - // TEMPORARY: Skip API key validation for initial testing - req.orgId = '00000000-0000-0000-0000-000000000001'; - req.projectId = '00000000-0000-0000-0000-000000000002'; - req.log.warn('API KEY AUTH DISABLED FOR TESTING - DO NOT USE IN PRODUCTION'); - return; - - // Original auth logic commented out temporarily - /* - const apiKey = req.headers['x-api-key'] as string; +export function apiKeyAuth(db: Pool, redis: Redis) { + return async (req: FastifyRequest, reply: FastifyReply): Promise => { + const header = req.headers['x-api-key']; + const apiKey = Array.isArray(header) ? header[0] : header; if (!apiKey) { - return reply.code(401).send({ error: 'Missing API key' }); + reply.code(401).send({ error: 'Missing API key' }); + return false; } try { // Check cache first - const cacheKey = `apikey:${apiKey}`; + const fingerprint = createHash('sha256').update(apiKey).digest('hex'); + const cacheKey = `apikey:${fingerprint}`; const cached = await redis.get(cacheKey); if (cached) { const { org_id, project_id } = JSON.parse(cached); req.orgId = org_id; - req.projectId = project_id; - return; + req.projectId = project_id || undefined; + return true; } - // Validate API key from database const result = await db.query( - `SELECT org_id, key_hash, active - FROM api_keys - WHERE active = true - LIMIT 100` + `SELECT id, org_id, project_id, key_hash, active + FROM api_keys + WHERE active = true + AND key_fingerprint = $1 + LIMIT 1`, + [fingerprint] ); - let authenticated = false; - let orgId = ''; - for (const row of result.rows) { const isValid = await bcrypt.compare(apiKey, row.key_hash); if (isValid && row.active) { - authenticated = true; - orgId = row.org_id; + req.orgId = row.org_id; + req.projectId = row.project_id || undefined; // Update last_used_at await db.query( - 'UPDATE api_keys SET last_used_at = NOW() WHERE key_hash = $1', - [row.key_hash] + 'UPDATE api_keys SET last_used_at = NOW() WHERE id = $1', + [row.id] ); - break; - } - } + // Cache for 5 minutes without storing the raw key + await redis.setex(cacheKey, 300, JSON.stringify({ + org_id: row.org_id, + project_id: row.project_id || null, + })); - if (!authenticated) { - return reply.code(403).send({ error: 'Invalid API key' }); + return true; + } } - req.orgId = orgId; - - // Cache for 5 minutes - await redis.setex(cacheKey, 300, JSON.stringify({ org_id: orgId })); + reply.code(403).send({ error: 'Invalid API key' }); + return false; } catch (error) { req.log.error({ error }, 'API key validation failed'); - return reply.code(500).send({ error: 'Internal server error' }); + reply.code(500).send({ error: 'Internal server error' }); + return false; } - */ }; } diff --git a/services/ingest-api/tests/integration/ingest.test.ts b/services/ingest-api/tests/integration/ingest.test.ts index 712ceb6..0640868 100644 --- a/services/ingest-api/tests/integration/ingest.test.ts +++ b/services/ingest-api/tests/integration/ingest.test.ts @@ -33,6 +33,20 @@ describe('Ingest API Integration Tests', () => { expect(response.status).toBe(401); }); + it('should reject cross-project event writes for project-scoped API keys', async () => { + const response = await request(INGEST_API_URL) + .post('/api/v1/events') + .set('X-API-Key', API_KEY) + .send({ + event_name: 'cross_project_attempt', + project_id: '00000000-0000-0000-0000-000000000099', + user_id: 'test_user_123', + }); + + expect(response.status).toBe(403); + expect(response.body.error).toContain('Project does not match'); + }); + it('should reject invalid event data', async () => { const response = await request(INGEST_API_URL) .post('/api/v1/events') @@ -45,6 +59,24 @@ describe('Ingest API Integration Tests', () => { expect(response.status).toBe(400); }); + it('should reject cross-project batch writes for project-scoped API keys', async () => { + const response = await request(INGEST_API_URL) + .post('/api/v1/events/batch') + .set('X-API-Key', API_KEY) + .send({ + events: [ + { + event_name: 'cross_project_batch_attempt', + project_id: '00000000-0000-0000-0000-000000000099', + user_id: 'test_user_123', + }, + ], + }); + + expect(response.status).toBe(403); + expect(response.body.error).toContain('Project does not match'); + }); + it('should accept batch events', async () => { const events = Array.from({ length: 10 }, (_, i) => ({ event_name: 'batch_event', @@ -74,4 +106,14 @@ describe('Ingest API Integration Tests', () => { expect(response.status).toBe(400); }); + + it('should expose Prometheus metrics for ingest coordination', async () => { + const response = await request(INGEST_API_URL).get('/metrics'); + + expect(response.status).toBe(200); + expect(response.text).toContain('http_requests_total'); + expect(response.text).toContain('events_ingested_total'); + expect(response.text).toContain('kafka_messages_produced_total'); + expect(response.text).toContain('db_connections_active'); + }); }); diff --git a/services/ingest-api/tests/unit/auth.test.ts b/services/ingest-api/tests/unit/auth.test.ts new file mode 100644 index 0000000..5fd9c4b --- /dev/null +++ b/services/ingest-api/tests/unit/auth.test.ts @@ -0,0 +1,49 @@ +import { describe, it, expect, jest } from '@jest/globals'; +import { apiKeyAuth } from '../../src/middleware/auth'; + +describe('apiKeyAuth', () => { + it('short-circuits missing API keys', async () => { + const db = { query: jest.fn() } as any; + const redis = { get: jest.fn() } as any; + const reply = { + code: jest.fn().mockReturnThis(), + send: jest.fn(), + } as any; + const req = { + headers: {}, + log: { error: jest.fn() }, + } as any; + + const authenticated = await apiKeyAuth(db, redis)(req, reply); + + expect(authenticated).toBe(false); + expect(reply.code).toHaveBeenCalledWith(401); + expect(reply.send).toHaveBeenCalledWith({ error: 'Missing API key' }); + expect(db.query).not.toHaveBeenCalled(); + }); + + it('hydrates tenant from cached API key auth', async () => { + const db = { query: jest.fn() } as any; + const redis = { + get: jest.fn(async () => JSON.stringify({ + org_id: 'org_1', + project_id: 'project_1', + })), + } as any; + const reply = { + code: jest.fn().mockReturnThis(), + send: jest.fn(), + } as any; + const req = { + headers: { 'x-api-key': 'test_key' }, + log: { error: jest.fn() }, + } as any; + + const authenticated = await apiKeyAuth(db, redis)(req, reply); + + expect(authenticated).toBe(true); + expect(req.orgId).toBe('org_1'); + expect(req.projectId).toBe('project_1'); + expect(db.query).not.toHaveBeenCalled(); + }); +}); diff --git a/services/ingest-api/tests/unit/events.test.ts b/services/ingest-api/tests/unit/events.test.ts new file mode 100644 index 0000000..7802348 --- /dev/null +++ b/services/ingest-api/tests/unit/events.test.ts @@ -0,0 +1,62 @@ +import { describe, it, expect } from '@jest/globals'; +import { + bindEventToTenant, + kafkaKeyForEvent, + MissingProjectError, + ProjectMismatchError, +} from '../../src/events'; + +describe('ingest event binding', () => { + const orgId = '00000000-0000-0000-0000-000000000001'; + const projectId = '00000000-0000-0000-0000-000000000002'; + const otherProjectId = '00000000-0000-0000-0000-000000000003'; + + it('binds events to the authenticated org and project', () => { + const event = bindEventToTenant( + { + event_name: 'signup', + project_id: projectId, + user_id: 'user_1', + }, + { orgId, projectId }, + 'event_1' + ); + + expect(event.id).toBe('event_1'); + expect(event.org_id).toBe(orgId); + expect(event.project_id).toBe(projectId); + }); + + it('rejects payload project mismatch for project-scoped keys', () => { + expect(() => bindEventToTenant( + { + event_name: 'signup', + project_id: otherProjectId, + }, + { orgId, projectId }, + 'event_1' + )).toThrow(ProjectMismatchError); + }); + + it('requires a project when the API key is not project scoped', () => { + expect(() => bindEventToTenant( + { event_name: 'signup' }, + { orgId }, + 'event_1' + )).toThrow(MissingProjectError); + }); + + it('partitions Kafka keys by tenant plus user/session/event identity', () => { + const event = bindEventToTenant( + { + event_name: 'signup', + project_id: projectId, + user_id: 'user_1', + }, + { orgId, projectId }, + 'event_1' + ); + + expect(kafkaKeyForEvent(event)).toBe(`${orgId}:${projectId}:user_1`); + }); +}); diff --git a/services/ingest-api/tsconfig.json b/services/ingest-api/tsconfig.json index a2d6ea5..1844673 100644 --- a/services/ingest-api/tsconfig.json +++ b/services/ingest-api/tsconfig.json @@ -1,11 +1,11 @@ { "compilerOptions": { "target": "ES2022", - "module": "CommonJS", + "module": "Node16", "lib": [ "ES2022" ], - "moduleResolution": "node", + "moduleResolution": "Node16", "rootDir": "./src", "outDir": "./dist", "esModuleInterop": true, diff --git a/services/worker/Dockerfile b/services/worker/Dockerfile index bba31d9..90951a6 100644 --- a/services/worker/Dockerfile +++ b/services/worker/Dockerfile @@ -10,4 +10,6 @@ RUN pnpm install --no-frozen-lockfile COPY . . RUN pnpm build +EXPOSE 3003 + CMD ["pnpm", "start"] diff --git a/services/worker/package.json b/services/worker/package.json index 9bd928e..3e367e9 100644 --- a/services/worker/package.json +++ b/services/worker/package.json @@ -8,15 +8,18 @@ "build": "tsc", "start": "node dist/index.js", "test": "jest --config jest.config.js", + "test:unit": "jest --config jest.config.js --testPathPattern=unit", "test:integration": "jest --config jest.config.js --testPathPattern=integration", "typecheck": "tsc --noEmit" }, "dependencies": { "dotenv": "^16.4.5", + "ioredis": "^5.3.2", "kafkajs": "^2.2.4", "pg": "^8.11.3", "pino": "^8.17.2", - "pino-pretty": "^10.3.1" + "pino-pretty": "^10.3.1", + "prom-client": "^15.1.3" }, "devDependencies": { "@types/pg": "^8.10.9", diff --git a/services/worker/src/aggregators/daily.ts b/services/worker/src/aggregators/daily.ts index e368808..6ccabd9 100644 --- a/services/worker/src/aggregators/daily.ts +++ b/services/worker/src/aggregators/daily.ts @@ -1,4 +1,8 @@ -import { Pool } from 'pg'; +import { logger } from '../utils/logger'; + +interface Queryable { + query: (text: string, params?: any[]) => Promise; +} interface Event { org_id: string; @@ -9,21 +13,30 @@ interface Event { properties: Record; } -export async function computeDailyAggregates(db: Pool, event: Event) { +export async function computeDailyAggregates(db: Queryable, event: Event) { const date = new Date(event.timestamp).toISOString().split('T')[0]; + const start = Date.now(); // DAU (Daily Active Users) if (event.user_id) { - await db.query( - `INSERT INTO daily_aggregates (org_id, project_id, metric_name, metric_value, date, dimensions) - VALUES ($1, $2, 'dau', 1, $3, '{}') - ON CONFLICT (org_id, project_id, metric_name, date, dimensions) - DO UPDATE SET metric_value = daily_aggregates.metric_value + 1`, - [event.org_id, event.project_id, date] + const activeUserResult = await db.query( + `INSERT INTO daily_active_users (org_id, project_id, date, user_id) + VALUES ($1, $2, $3, $4) + ON CONFLICT (org_id, project_id, date, user_id) DO NOTHING + RETURNING user_id`, + [event.org_id, event.project_id, date, event.user_id] ); - } - + if ((activeUserResult.rowCount || 0) > 0) { + await db.query( + `INSERT INTO daily_aggregates (org_id, project_id, metric_name, metric_value, date, dimensions) + VALUES ($1, $2, 'dau', 1, $3, '{}') + ON CONFLICT (org_id, project_id, metric_name, date, dimensions) + DO UPDATE SET metric_value = daily_aggregates.metric_value + 1`, + [event.org_id, event.project_id, date] + ); + } + } // Event count by name await db.query( @@ -42,4 +55,12 @@ export async function computeDailyAggregates(db: Pool, event: Event) { DO UPDATE SET metric_value = daily_aggregates.metric_value + 1`, [event.org_id, event.project_id, date] ); + + logger.info({ + org_id: event.org_id, + project_id: event.project_id, + tenant: `${event.org_id}:${event.project_id}`, + event_name: event.event_name, + db_write_duration_ms: Date.now() - start, + }, 'Daily aggregates updated'); } diff --git a/services/worker/src/index.ts b/services/worker/src/index.ts index 38ac589..55ef09a 100644 --- a/services/worker/src/index.ts +++ b/services/worker/src/index.ts @@ -1,16 +1,26 @@ import { config } from 'dotenv'; import { resolve } from 'path'; +import type { Server } from 'node:http'; // Load environment variables from project root config({ path: resolve(__dirname, '../../../.env') }); import { Kafka } from 'kafkajs'; import { Pool } from 'pg'; +import Redis from 'ioredis'; import { logger } from './utils/logger'; -import { computeDailyAggregates } from './aggregators/daily'; +import { processEvent } from './processing'; +import { + recordProcessedEvent, + recordProcessingError, + startMetricsServer, + stopMetricsServer, +} from './metrics'; const KAFKA_BROKERS = (process.env.KAFKA_BROKERS || 'localhost:9092').split(','); const KAFKA_GROUP_ID = process.env.KAFKA_GROUP_ID || 'pulseops-aggregators'; +const WORKER_METRICS_HOST = process.env.WORKER_METRICS_HOST || '0.0.0.0'; +const WORKER_METRICS_PORT = parseInt(process.env.WORKER_METRICS_PORT || process.env.METRICS_PORT || '3003', 10); // Initialize database const db = new Pool({ @@ -18,6 +28,8 @@ const db = new Pool({ max: 10, }); +const redis = new Redis(process.env.REDIS_URL || 'redis://localhost:6379'); + // Initialize Kafka consumer const kafka = new Kafka({ clientId: 'pulseops-worker', @@ -33,46 +45,69 @@ const consumer = kafka.consumer({ // Track processing stats let processedCount = 0; let errorCount = 0; +let metricsServer: Server | undefined; + +async function failAfterProcessingOnceIfRequested(params: { + event: any; + topic: string; + partition: number; + offset: string; + traceId?: string; + requestId?: string; + batchId?: string; +}) { + const runId = process.env.WORKER_FAIL_AFTER_PROCESSING_ONCE_RUN_ID; + + if (!runId || runId !== params.event.properties?.retry_probe_run_id) { + return; + } + + const proofKey = `retry-proof:${KAFKA_GROUP_ID}:${runId}`; -async function processEvent(event: any) { + let setResult: string | null; try { - // Store raw event (optional, for debugging/replay) - await db.query( - `INSERT INTO events (org_id, project_id, event_name, user_id, session_id, properties, timestamp) - VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING`, - [ - event.org_id, - event.project_id, - event.event_name, - event.user_id, - event.session_id, - event.properties, - event.timestamp, - ] - ); - - // Compute aggregates - await computeDailyAggregates(db, event); - - processedCount++; - - if (processedCount % 100 === 0) { - logger.info({ - processed: processedCount, - errors: errorCount, - error_rate: (errorCount / processedCount * 100).toFixed(2) + '%', - }, 'Processing stats'); - } + setResult = await redis.set(proofKey, '1', 'EX', 3600, 'NX'); } catch (error) { - errorCount++; - logger.error({ error, event }, 'Failed to process event'); - throw error; // Let Kafka handle retry + logger.warn({ + error, + proof_key: proofKey, + retry_probe_run_id: runId, + }, 'Retry proof hook Redis guard failed; continuing to commit offset'); + return; + } + + if (setResult !== 'OK') { + return; } + + logger.warn({ + proof_key: proofKey, + retry_probe_run_id: runId, + kafka_group_id: KAFKA_GROUP_ID, + kafka_topic: params.topic, + kafka_partition: params.partition, + offset: params.offset, + event_id: params.event.id || params.event.event_id, + org_id: params.event.org_id, + project_id: params.event.project_id, + tenant: `${params.event.org_id}:${params.event.project_id}`, + trace_id: params.traceId, + request_id: params.requestId, + batch_id: params.batchId, + exit_code: 86, + }, 'Retry proof hook exiting after processing and before offset commit'); + + process.exit(86); } async function start() { try { + metricsServer = await startMetricsServer({ + host: WORKER_METRICS_HOST, + port: WORKER_METRICS_PORT, + db, + }); + // Connect to Kafka await consumer.connect(); logger.info('Kafka consumer connected'); @@ -89,18 +124,40 @@ async function start() { await consumer.run({ autoCommit: false, eachMessage: async ({ topic, partition, message }) => { - const event = JSON.parse(message.value?.toString() || '{}'); + const startedAt = Date.now(); + let event: any = {}; const traceId = message.headers?.trace_id?.toString(); - - logger.info({ - trace_id: traceId, - event_id: event.id, - event_name: event.event_name, - offset: message.offset, - }, 'Processing event'); + const requestId = message.headers?.request_id?.toString(); + const batchId = message.headers?.batch_id?.toString(); try { - await processEvent(event); + event = JSON.parse(message.value?.toString() || '{}'); + + logger.info({ + trace_id: traceId, + request_id: requestId, + batch_id: batchId, + event_id: event.id, + org_id: event.org_id, + project_id: event.project_id, + tenant: `${event.org_id}:${event.project_id}`, + event_name: event.event_name, + kafka_topic: topic, + kafka_partition: partition, + offset: message.offset, + }, 'Processing event'); + + const result = await processEvent(db, redis, event, traceId); + + await failAfterProcessingOnceIfRequested({ + event, + topic, + partition, + offset: message.offset, + traceId, + requestId, + batchId, + }); // Commit offset after successful processing await consumer.commitOffsets([{ @@ -108,14 +165,55 @@ async function start() { partition, offset: (parseInt(message.offset) + 1).toString(), }]); + + processedCount++; + recordProcessedEvent({ + topic, + partition, + result: result.inserted ? 'inserted' : 'duplicate', + durationMs: Date.now() - startedAt, + cacheInvalidated: result.cacheInvalidated, + }); + + if (processedCount % 100 === 0) { + logger.info({ + processed: processedCount, + errors: errorCount, + duplicates_skipped: result.inserted ? 0 : 1, + error_rate: (errorCount / processedCount * 100).toFixed(2) + '%', + }, 'Processing stats'); + } } catch (error) { - logger.error({ error, trace_id: traceId }, 'Event processing failed, will retry'); + errorCount++; + recordProcessingError({ + topic, + partition, + error, + durationMs: Date.now() - startedAt, + }); + logger.error({ + error, + trace_id: traceId, + request_id: requestId, + batch_id: batchId, + event_id: event.id, + org_id: event.org_id, + project_id: event.project_id, + tenant: `${event.org_id}:${event.project_id}`, + kafka_topic: topic, + kafka_partition: partition, + }, 'Event processing failed, will retry'); // Do not commit offset, allowing retry } }, }); } catch (error) { logger.error({ error }, 'Worker failed to start'); + if (metricsServer) { + await stopMetricsServer(metricsServer).catch((shutdownError) => { + logger.error({ error: shutdownError }, 'Failed to stop worker metrics server after startup failure'); + }); + } process.exit(1); } } @@ -125,6 +223,10 @@ async function shutdown() { logger.info('Shutting down worker...'); await consumer.disconnect(); + if (metricsServer) { + await stopMetricsServer(metricsServer); + } + await redis.quit(); await db.end(); logger.info({ diff --git a/services/worker/src/metrics.ts b/services/worker/src/metrics.ts new file mode 100644 index 0000000..3f8008f --- /dev/null +++ b/services/worker/src/metrics.ts @@ -0,0 +1,182 @@ +import { createServer, type Server } from 'node:http'; +import { collectDefaultMetrics, Counter, Gauge, Histogram, Registry } from 'prom-client'; +import type { Pool } from 'pg'; +import { logger } from './utils/logger'; + +export const register = new Registry(); + +collectDefaultMetrics({ register }); + +export const metrics = { + eventsProcessed: new Counter({ + name: 'worker_events_processed_total', + help: 'Total number of Kafka events processed by the worker', + labelNames: ['topic', 'partition', 'result'], + registers: [register], + }), + + processingErrors: new Counter({ + name: 'worker_processing_errors_total', + help: 'Total number of worker event processing errors', + labelNames: ['topic', 'partition', 'error_type'], + registers: [register], + }), + + eventProcessingDuration: new Histogram({ + name: 'worker_event_processing_duration_seconds', + help: 'Duration of worker event processing in seconds', + labelNames: ['topic', 'partition', 'result'], + buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2, 5, 10], + registers: [register], + }), + + cacheInvalidations: new Counter({ + name: 'worker_cache_invalidations_total', + help: 'Total number of tenant cache invalidations completed by the worker', + registers: [register], + }), + + dbConnectionsActive: new Gauge({ + name: 'worker_db_connections_active', + help: 'Active PostgreSQL connections in the worker pool', + registers: [register], + }), + + dbConnectionsIdle: new Gauge({ + name: 'worker_db_connections_idle', + help: 'Idle PostgreSQL connections in the worker pool', + registers: [register], + }), +}; + +function partitionLabel(partition: number) { + return String(partition); +} + +function secondsFromMs(durationMs: number) { + return Math.max(0, durationMs) / 1000; +} + +export function errorType(error: unknown) { + return error instanceof Error ? error.name : 'UnknownError'; +} + +export function recordProcessedEvent(params: { + topic: string; + partition: number; + result: 'inserted' | 'duplicate'; + durationMs: number; + cacheInvalidated: boolean; +}) { + const labels = { + topic: params.topic, + partition: partitionLabel(params.partition), + result: params.result, + }; + + metrics.eventsProcessed.inc(labels); + metrics.eventProcessingDuration.observe(labels, secondsFromMs(params.durationMs)); + + if (params.cacheInvalidated) { + metrics.cacheInvalidations.inc(); + } +} + +export function recordProcessingError(params: { + topic: string; + partition: number; + error: unknown; + durationMs: number; +}) { + const labels = { + topic: params.topic, + partition: partitionLabel(params.partition), + }; + + metrics.processingErrors.inc({ + ...labels, + error_type: errorType(params.error), + }); + metrics.eventProcessingDuration.observe({ + ...labels, + result: 'error', + }, secondsFromMs(params.durationMs)); +} + +export function recordDbPoolGauges(db: Pool) { + metrics.dbConnectionsActive.set(db.totalCount - db.idleCount); + metrics.dbConnectionsIdle.set(db.idleCount); +} + +export async function metricsHandler() { + return register.metrics(); +} + +export async function startMetricsServer(params: { + host: string; + port: number; + db: Pool; +}) { + const server = createServer(async (request, response) => { + try { + const path = new URL(request.url || '/', `http://${request.headers.host || 'localhost'}`).pathname; + + if (request.method === 'GET' && path === '/metrics') { + recordDbPoolGauges(params.db); + response.writeHead(200, { 'Content-Type': register.contentType }); + response.end(await metricsHandler()); + return; + } + + response.writeHead(404, { 'Content-Type': 'text/plain; charset=utf-8' }); + response.end('Not found\n'); + } catch (error) { + logger.error({ error }, 'Worker metrics request failed'); + response.writeHead(500, { 'Content-Type': 'text/plain; charset=utf-8' }); + response.end('Internal server error\n'); + } + }); + + await listen(server, params.host, params.port); + + server.on('error', (error) => { + logger.error({ error }, 'Worker metrics server error'); + }); + + logger.info({ + host: params.host, + port: params.port, + }, 'Worker metrics server listening'); + + return server; +} + +export async function stopMetricsServer(server: Server) { + await new Promise((resolve, reject) => { + server.close((error) => { + if (error) { + reject(error); + return; + } + + resolve(); + }); + }); +} + +async function listen(server: Server, host: string, port: number) { + await new Promise((resolve, reject) => { + const onError = (error: Error) => { + server.off('listening', onListening); + reject(error); + }; + const onListening = () => { + server.off('error', onError); + resolve(); + }; + + server.once('error', onError); + server.once('listening', onListening); + server.listen(port, host); + }); +} diff --git a/services/worker/src/processing.ts b/services/worker/src/processing.ts new file mode 100644 index 0000000..fc5d033 --- /dev/null +++ b/services/worker/src/processing.ts @@ -0,0 +1,125 @@ +import Redis from 'ioredis'; +import { Pool } from 'pg'; +import { computeDailyAggregates } from './aggregators/daily'; +import { logger } from './utils/logger'; + +export interface RawEvent { + id?: string; + event_id?: string; + org_id: string; + project_id: string; + event_name: string; + user_id?: string; + session_id?: string; + properties?: Record; + timestamp: string; +} + +export async function insertRawEventIfNew(db: Pool, event: RawEvent) { + const eventId = event.id || event.event_id; + + if (!eventId) { + throw new Error('Missing event_id'); + } + + const start = Date.now(); + const client = await db.connect(); + + try { + await client.query('BEGIN'); + + const dedupeResult = await client.query( + `INSERT INTO event_dedup_keys (org_id, project_id, event_id) + VALUES ($1, $2, $3) + ON CONFLICT (org_id, project_id, event_id) DO NOTHING + RETURNING event_id`, + [event.org_id, event.project_id, eventId] + ); + + if ((dedupeResult.rowCount || 0) === 0) { + await client.query('ROLLBACK'); + return { + inserted: false, + durationMs: Date.now() - start, + }; + } + + await client.query( + `INSERT INTO events (event_id, org_id, project_id, event_name, user_id, session_id, properties, timestamp) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + ON CONFLICT (org_id, project_id, event_id, timestamp) DO NOTHING`, + [ + eventId, + event.org_id, + event.project_id, + event.event_name, + event.user_id, + event.session_id, + event.properties || {}, + event.timestamp, + ] + ); + + await computeDailyAggregates(client, { + org_id: event.org_id, + project_id: event.project_id, + event_name: event.event_name, + user_id: event.user_id, + timestamp: event.timestamp, + properties: event.properties || {}, + }); + + await client.query('COMMIT'); + + return { + inserted: true, + durationMs: Date.now() - start, + }; + } catch (error) { + await client.query('ROLLBACK'); + throw error; + } finally { + client.release(); + } +} + +export async function invalidateTenantCache(redis: Redis, event: RawEvent) { + const key = `cache_version:${event.org_id}:${event.project_id}`; + const version = await redis.incr(key); + return { key, version }; +} + +export async function processEvent(db: Pool, redis: Redis, event: RawEvent, traceId?: string) { + const eventId = event.id || event.event_id; + const rawInsert = await insertRawEventIfNew(db, event); + + logger.info({ + trace_id: traceId, + event_id: eventId, + org_id: event.org_id, + project_id: event.project_id, + tenant: `${event.org_id}:${event.project_id}`, + db_write_duration_ms: rawInsert.durationMs, + inserted: rawInsert.inserted, + }, 'Raw event insert complete'); + + if (!rawInsert.inserted) { + return { inserted: false, cacheInvalidated: false }; + } + + const aggregateStart = Date.now(); + const cache = await invalidateTenantCache(redis, event); + + logger.info({ + trace_id: traceId, + event_id: eventId, + org_id: event.org_id, + project_id: event.project_id, + tenant: `${event.org_id}:${event.project_id}`, + db_write_duration_ms: Date.now() - aggregateStart, + cache_version_key: cache.key, + cache_version: cache.version, + }, 'Aggregates updated and cache invalidated'); + + return { inserted: true, cacheInvalidated: true }; +} diff --git a/services/worker/tests/integration/aggregates.test.ts b/services/worker/tests/integration/aggregates.test.ts index 293d00c..4a22d35 100644 --- a/services/worker/tests/integration/aggregates.test.ts +++ b/services/worker/tests/integration/aggregates.test.ts @@ -1,24 +1,38 @@ -import { computeDailyAggregates } from '../src/aggregators/daily'; +import { computeDailyAggregates } from '../../src/aggregators/daily'; import { describe, it, expect, beforeEach, afterEach } from '@jest/globals'; import { Pool } from 'pg'; describe('Daily Aggregates', () => { let db: Pool; + const orgId = '00000000-0000-0000-0000-000000000001'; + const projectId = '00000000-0000-0000-0000-000000000002'; beforeEach(() => { db = new Pool({ - connectionString: process.env.TEST_DATABASE_URL || 'postgresql://pulseops:dev_password_change_in_production@localhost:5432/pulseops_test', + connectionString: process.env.TEST_DATABASE_URL + || process.env.DATABASE_URL + || 'postgresql://pulseops:dev_password_change_in_production@localhost:5432/pulseops_dev', }); }); afterEach(async () => { + await db.query( + `DELETE FROM daily_aggregates + WHERE org_id = $1 AND project_id = $2 AND date = CURRENT_DATE`, + [orgId, projectId] + ); + await db.query( + `DELETE FROM daily_active_users + WHERE org_id = $1 AND project_id = $2 AND date = CURRENT_DATE`, + [orgId, projectId] + ); await db.end(); }); it('should compute DAU aggregate', async () => { const event = { - org_id: '00000000-0000-0000-0000-000000000001', - project_id: '00000000-0000-0000-0000-000000000002', + org_id: orgId, + project_id: projectId, event_name: 'page_view', user_id: 'test_user', timestamp: new Date().toISOString(), @@ -28,9 +42,9 @@ describe('Daily Aggregates', () => { await computeDailyAggregates(db, event); const result = await db.query( - `SELECT metric_value FROM daily_aggregates - WHERE org_id = $1 AND metric_name = 'dau' AND date = CURRENT_DATE`, - [event.org_id] + `SELECT metric_value FROM daily_aggregates + WHERE org_id = $1 AND project_id = $2 AND metric_name = 'dau' AND date = CURRENT_DATE`, + [event.org_id, event.project_id] ); expect(result.rows.length).toBeGreaterThan(0); @@ -39,8 +53,8 @@ describe('Daily Aggregates', () => { it('should compute event count aggregate', async () => { const event = { - org_id: '00000000-0000-0000-0000-000000000001', - project_id: '00000000-0000-0000-0000-000000000002', + org_id: orgId, + project_id: projectId, event_name: 'button_click', timestamp: new Date().toISOString(), properties: {}, @@ -49,9 +63,9 @@ describe('Daily Aggregates', () => { await computeDailyAggregates(db, event); const result = await db.query( - `SELECT metric_value FROM daily_aggregates - WHERE org_id = $1 AND metric_name = 'event_count' AND date = CURRENT_DATE`, - [event.org_id] + `SELECT metric_value FROM daily_aggregates + WHERE org_id = $1 AND project_id = $2 AND metric_name = 'event_count' AND date = CURRENT_DATE`, + [event.org_id, event.project_id] ); expect(result.rows.length).toBeGreaterThan(0); @@ -59,17 +73,17 @@ describe('Daily Aggregates', () => { it('should increment total_events', async () => { const event = { - org_id: '00000000-0000-0000-0000-000000000001', - project_id: '00000000-0000-0000-0000-000000000002', + org_id: orgId, + project_id: projectId, event_name: 'test_event', timestamp: new Date().toISOString(), properties: {}, }; const beforeResult = await db.query( - `SELECT COALESCE(metric_value, 0) as value FROM daily_aggregates - WHERE org_id = $1 AND metric_name = 'total_events' AND date = CURRENT_DATE`, - [event.org_id] + `SELECT COALESCE(metric_value, 0) as value FROM daily_aggregates + WHERE org_id = $1 AND project_id = $2 AND metric_name = 'total_events' AND date = CURRENT_DATE`, + [event.org_id, event.project_id] ); const beforeValue = beforeResult.rows[0]?.value || 0; @@ -77,9 +91,9 @@ describe('Daily Aggregates', () => { await computeDailyAggregates(db, event); const afterResult = await db.query( - `SELECT metric_value as value FROM daily_aggregates - WHERE org_id = $1 AND metric_name = 'total_events' AND date = CURRENT_DATE`, - [event.org_id] + `SELECT metric_value as value FROM daily_aggregates + WHERE org_id = $1 AND project_id = $2 AND metric_name = 'total_events' AND date = CURRENT_DATE`, + [event.org_id, event.project_id] ); const afterValue = Number(afterResult.rows[0].value); diff --git a/services/worker/tests/unit/processing.test.ts b/services/worker/tests/unit/processing.test.ts new file mode 100644 index 0000000..5f44ec3 --- /dev/null +++ b/services/worker/tests/unit/processing.test.ts @@ -0,0 +1,98 @@ +import { describe, it, expect, jest } from '@jest/globals'; +import { processEvent } from '../../src/processing'; + +function createEvent(overrides: Record = {}) { + return { + id: 'event_1', + org_id: '00000000-0000-0000-0000-000000000001', + project_id: '00000000-0000-0000-0000-000000000002', + event_name: 'signup', + user_id: 'user_1', + session_id: '00000000-0000-0000-0000-000000000003', + properties: {}, + timestamp: '2026-06-16T10:00:00.000Z', + ...overrides, + }; +} + +function createDb(responses: Array<{ rowCount: number; rows: any[] }>) { + const client = { + query: jest.fn(async (sql: string, _params?: any[]) => { + if (sql === 'BEGIN' || sql === 'COMMIT' || sql === 'ROLLBACK') { + return { rowCount: 0, rows: [] }; + } + return responses.shift() || { rowCount: 1, rows: [] }; + }), + release: jest.fn(), + }; + + return { + db: { + connect: jest.fn(async () => client), + } as any, + client, + }; +} + +describe('worker event processing', () => { + it('skips aggregation and cache invalidation for duplicate raw events', async () => { + const { db, client } = createDb([{ rowCount: 0, rows: [] }]); + const redis = { + incr: jest.fn(), + } as any; + + const result = await processEvent(db, redis, createEvent(), 'trace_1'); + + expect(result).toEqual({ inserted: false, cacheInvalidated: false }); + expect(client.query).toHaveBeenCalledWith('ROLLBACK'); + expect(redis.incr).not.toHaveBeenCalled(); + expect(client.release).toHaveBeenCalled(); + }); + + it('aggregates late events for the event timestamp date and invalidates tenant cache', async () => { + const { db, client } = createDb([ + { rowCount: 1, rows: [{ event_id: 'event_1' }] }, + { rowCount: 1, rows: [] }, + { rowCount: 1, rows: [{ user_id: 'user_1' }] }, + { rowCount: 1, rows: [] }, + { rowCount: 1, rows: [] }, + { rowCount: 1, rows: [] }, + ]); + const redis = { + incr: jest.fn(async () => 2), + } as any; + + const result = await processEvent(db, redis, createEvent({ + timestamp: '2026-05-10T23:30:00.000Z', + }), 'trace_1'); + + expect(result).toEqual({ inserted: true, cacheInvalidated: true }); + expect(client.query).toHaveBeenCalledWith('COMMIT'); + expect(client.query.mock.calls[3][1]).toEqual([ + '00000000-0000-0000-0000-000000000001', + '00000000-0000-0000-0000-000000000002', + '2026-05-10', + 'user_1', + ]); + expect(redis.incr).toHaveBeenCalledWith( + 'cache_version:00000000-0000-0000-0000-000000000001:00000000-0000-0000-0000-000000000002' + ); + }); + + it('does not increment DAU when the user is already active for the day', async () => { + const { db, client } = createDb([ + { rowCount: 1, rows: [{ event_id: 'event_1' }] }, + { rowCount: 1, rows: [] }, + { rowCount: 0, rows: [] }, + { rowCount: 1, rows: [] }, + { rowCount: 1, rows: [] }, + ]); + const redis = { + incr: jest.fn(async () => 2), + } as any; + + await processEvent(db, redis, createEvent(), 'trace_1'); + + expect(client.query.mock.calls.some((call: unknown[]) => String(call[0]).includes("VALUES ($1, $2, 'dau'"))).toBe(false); + }); +}); diff --git a/services/worker/tsconfig.json b/services/worker/tsconfig.json index d78b0b6..fe14a06 100644 --- a/services/worker/tsconfig.json +++ b/services/worker/tsconfig.json @@ -1,11 +1,11 @@ { "compilerOptions": { "target": "ES2022", - "module": "CommonJS", + "module": "Node16", "lib": [ "ES2022" ], - "moduleResolution": "node", + "moduleResolution": "Node16", "rootDir": "./src", "outDir": "./dist", "esModuleInterop": true, diff --git a/tests/integration/cache-correctness.test.ts b/tests/integration/cache-correctness.test.ts new file mode 100644 index 0000000..9778836 --- /dev/null +++ b/tests/integration/cache-correctness.test.ts @@ -0,0 +1,150 @@ +import { randomUUID } from 'node:crypto'; +import { afterAll, beforeAll, describe, expect, it, vi } from 'vitest'; +import { resolvers } from '../../services/graphql-api/src/resolvers'; +import { processEvent } from '../../services/worker/src/processing'; +import { + cleanupTestTenant, + createDb, + createRedis, + createTestTenant, + dateOnly, +} from './helpers'; + +const orgA = '00000000-0000-0000-0000-000000000001'; +const projectA = '00000000-0000-0000-0000-000000000002'; +const orgB = '00000000-0000-0000-0000-000000000003'; +const projectB = '00000000-0000-0000-0000-000000000004'; + +function contextFor(orgId: string, projectId: string) { + const cache = new Map(); + return { + auth: { orgId, projectId }, + requestId: `cache-${orgId}`, + db: { + query: vi.fn().mockResolvedValue({ rows: [{ total: 42 }] }), + }, + redis: { + get: vi.fn(async (key: string) => cache.get(key) ?? null), + setex: vi.fn(async (key: string, _ttl: number, value: string) => { + cache.set(key, value); + return 'OK'; + }), + }, + } as any; +} + +describe('GraphQL cache correctness evidence', () => { + it('uses cache keys that include tenant and project scope plus a TTL', async () => { + const context = contextFor(orgA, projectA); + + await resolvers.Query.totalEvents(null, { + orgId: orgA, + projectId: projectA, + startDate: '2026-06-01', + endDate: '2026-06-16', + }, context); + + expect(context.redis.setex).toHaveBeenCalledWith( + `total:${orgA}:${projectA}:v0:2026-06-01:2026-06-16:{}`, + 300, + '42' + ); + }); + + it('does not reuse tenant A cached data for tenant B', async () => { + const contextA = contextFor(orgA, projectA); + const contextB = contextFor(orgB, projectB); + + await resolvers.Query.totalEvents(null, { + orgId: orgA, + projectId: projectA, + startDate: '2026-06-01', + endDate: '2026-06-16', + }, contextA); + await resolvers.Query.totalEvents(null, { + orgId: orgB, + projectId: projectB, + startDate: '2026-06-01', + endDate: '2026-06-16', + }, contextB); + + expect(contextA.redis.setex.mock.calls[0][0]).toContain(`${orgA}:${projectA}`); + expect(contextB.redis.setex.mock.calls[0][0]).toContain(`${orgB}:${projectB}`); + expect(contextA.redis.setex.mock.calls[0][0]).not.toBe(contextB.redis.setex.mock.calls[0][0]); + }); + + it('serves a cache hit without another database query', async () => { + const context = contextFor(orgA, projectA); + const args = { + orgId: orgA, + projectId: projectA, + startDate: '2026-06-01', + endDate: '2026-06-16', + }; + + await resolvers.Query.totalEvents(null, args, context); + await resolvers.Query.totalEvents(null, args, context); + + expect(context.db.query).toHaveBeenCalledTimes(1); + }); +}); + +describe('GraphQL cache invalidation integration evidence', () => { + const db = createDb(); + const redis = createRedis(); + let orgId: string; + let projectId: string; + + beforeAll(async () => { + ({ orgId, projectId } = await createTestTenant(db)); + await redis.del(`cache_version:${orgId}:${projectId}`); + }); + + afterAll(async () => { + await redis.del(`cache_version:${orgId}:${projectId}`); + await cleanupTestTenant(db, orgId, projectId); + await redis.quit(); + await db.end(); + }); + + it('uses a fresh cache namespace after a worker aggregate write', async () => { + const timestamp = new Date().toISOString(); + const date = dateOnly(timestamp); + const args = { + orgId, + projectId, + startDate: date, + endDate: date, + }; + const context = { + auth: { orgId, projectId }, + requestId: `cache-invalidation-${randomUUID()}`, + db, + redis, + } as any; + + const coldTotal = await resolvers.Query.totalEvents(null, args, context); + const v0Key = `total:${orgId}:${projectId}:v0:${date}:${date}:{}`; + + await processEvent(db, redis, { + id: randomUUID(), + org_id: orgId, + project_id: projectId, + event_name: `cache_invalidation_${randomUUID()}`, + user_id: `user_${randomUUID()}`, + session_id: randomUUID(), + properties: { test: 'cache-invalidation' }, + timestamp, + }, 'cache-invalidation-test'); + + const version = await redis.get(`cache_version:${orgId}:${projectId}`); + const freshTotal = await resolvers.Query.totalEvents(null, args, context); + const v1Key = `total:${orgId}:${projectId}:v${version}:${date}:${date}:{}`; + + expect(coldTotal).toBe(0); + expect(version).toBe('1'); + expect(freshTotal).toBe(1); + expect(await redis.get(v0Key)).toBe('0'); + expect(await redis.get(v1Key)).toBe('1'); + }); +}); diff --git a/tests/integration/graphql-authz.test.ts b/tests/integration/graphql-authz.test.ts new file mode 100644 index 0000000..14f7358 --- /dev/null +++ b/tests/integration/graphql-authz.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, it } from 'vitest'; + +const graphqlUrl = process.env.GRAPHQL_URL || 'http://localhost:3002/graphql'; +const apiKey = process.env.TEST_API_KEY || process.env.API_KEY || 'demo_key_change_this'; +const orgId = '00000000-0000-0000-0000-000000000001'; +const projectId = '00000000-0000-0000-0000-000000000002'; + +const query = ` +query Total($orgId: ID!, $projectId: ID!, $startDate: Date!, $endDate: Date!) { + totalEvents(orgId: $orgId, projectId: $projectId, startDate: $startDate, endDate: $endDate) +}`; + +async function postGraphql(headers: Record, variables: Record) { + return fetch(graphqlUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + ...headers, + }, + body: JSON.stringify({ query, variables }), + }); +} + +describe('GraphQL API authz evidence', () => { + it('requires tenant context from X-API-Key', async () => { + const response = await postGraphql({}, { + orgId, + projectId, + startDate: '2026-06-01', + endDate: '2026-06-16', + }); + const body = await response.json(); + + expect(body.errors?.[0]?.extensions?.code).toBe('UNAUTHENTICATED'); + }); + + it('prevents tenant A from querying tenant B arguments', async () => { + const response = await postGraphql({ 'X-API-Key': apiKey }, { + orgId: '00000000-0000-0000-0000-000000000003', + projectId, + startDate: '2026-06-01', + endDate: '2026-06-16', + }); + const body = await response.json(); + + expect(body.errors?.[0]?.extensions?.code).toBe('FORBIDDEN'); + }); +}); diff --git a/tests/integration/helpers.ts b/tests/integration/helpers.ts new file mode 100644 index 0000000..2cdfe2d --- /dev/null +++ b/tests/integration/helpers.ts @@ -0,0 +1,53 @@ +import { randomUUID } from 'node:crypto'; +import Redis from 'ioredis'; +import { Pool } from 'pg'; + +export const databaseUrl = process.env.TEST_DATABASE_URL + || process.env.DATABASE_URL + || 'postgresql://pulseops:dev_password_change_in_production@localhost:5432/pulseops_dev'; + +export const redisUrl = process.env.TEST_REDIS_URL + || process.env.REDIS_URL + || 'redis://localhost:6379'; + +export function createDb() { + return new Pool({ connectionString: databaseUrl }); +} + +export function createRedis() { + return new Redis(redisUrl); +} + +export async function createTestTenant(db: Pool) { + const orgId = randomUUID(); + const projectId = randomUUID(); + + await db.query( + `INSERT INTO organizations (id, name) + VALUES ($1, $2)`, + [orgId, `Test Org ${orgId}`] + ); + + await db.query( + `INSERT INTO projects (id, org_id, name) + VALUES ($1, $2, $3)`, + [projectId, orgId, `Test Project ${projectId}`] + ); + + return { orgId, projectId }; +} + +export async function cleanupTestTenant(db: Pool, orgId: string, projectId: string) { + await db.query('DELETE FROM daily_active_users WHERE org_id = $1 AND project_id = $2', [orgId, projectId]); + await db.query('DELETE FROM daily_aggregates WHERE org_id = $1 AND project_id = $2', [orgId, projectId]); + await db.query('DELETE FROM hourly_aggregates WHERE org_id = $1 AND project_id = $2', [orgId, projectId]); + await db.query('DELETE FROM event_dedup_keys WHERE org_id = $1 AND project_id = $2', [orgId, projectId]); + await db.query('DELETE FROM events WHERE org_id = $1 AND project_id = $2', [orgId, projectId]); + await db.query('DELETE FROM api_keys WHERE org_id = $1 AND project_id = $2', [orgId, projectId]); + await db.query('DELETE FROM projects WHERE id = $1 AND org_id = $2', [projectId, orgId]); + await db.query('DELETE FROM organizations WHERE id = $1', [orgId]); +} + +export function dateOnly(timestamp: string) { + return new Date(timestamp).toISOString().split('T')[0]; +} diff --git a/tests/integration/idempotency.test.ts b/tests/integration/idempotency.test.ts new file mode 100644 index 0000000..b2f45d6 --- /dev/null +++ b/tests/integration/idempotency.test.ts @@ -0,0 +1,66 @@ +import { randomUUID } from 'node:crypto'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { processEvent } from '../../services/worker/src/processing'; +import { + cleanupTestTenant, + createDb, + createRedis, + createTestTenant, + dateOnly, +} from './helpers'; + +describe('worker idempotency evidence', () => { + const db = createDb(); + const redis = createRedis(); + let orgId: string; + let projectId: string; + + beforeAll(async () => { + ({ orgId, projectId } = await createTestTenant(db)); + }); + + afterAll(async () => { + await cleanupTestTenant(db, orgId, projectId); + await redis.quit(); + await db.end(); + }); + + it('does not double-count duplicate event_id messages', async () => { + const timestamp = new Date().toISOString(); + const event = { + id: randomUUID(), + org_id: orgId, + project_id: projectId, + event_name: `idempotency_${randomUUID()}`, + user_id: `user_${randomUUID()}`, + session_id: randomUUID(), + properties: { test: 'idempotency' }, + timestamp, + }; + + const first = await processEvent(db, redis, event, 'first-attempt'); + const second = await processEvent(db, redis, event, 'retry-attempt'); + + const raw = await db.query( + `SELECT count(*)::int AS count + FROM events + WHERE org_id = $1 AND project_id = $2 AND event_id = $3`, + [orgId, projectId, event.id] + ); + const aggregate = await db.query( + `SELECT metric_value::int AS value + FROM daily_aggregates + WHERE org_id = $1 + AND project_id = $2 + AND metric_name = 'event_count' + AND date = $3 + AND dimensions = $4::jsonb`, + [orgId, projectId, dateOnly(timestamp), JSON.stringify({ event_name: event.event_name })] + ); + + expect(first).toEqual({ inserted: true, cacheInvalidated: true }); + expect(second).toEqual({ inserted: false, cacheInvalidated: false }); + expect(raw.rows[0].count).toBe(1); + expect(aggregate.rows[0].value).toBe(1); + }); +}); diff --git a/tests/integration/late-events.test.ts b/tests/integration/late-events.test.ts new file mode 100644 index 0000000..0a7ef35 --- /dev/null +++ b/tests/integration/late-events.test.ts @@ -0,0 +1,57 @@ +import { randomUUID } from 'node:crypto'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { processEvent } from '../../services/worker/src/processing'; +import { + cleanupTestTenant, + createDb, + createRedis, + createTestTenant, + dateOnly, +} from './helpers'; + +describe('late and out-of-order event evidence', () => { + const db = createDb(); + const redis = createRedis(); + let orgId: string; + let projectId: string; + + beforeAll(async () => { + ({ orgId, projectId } = await createTestTenant(db)); + }); + + afterAll(async () => { + await cleanupTestTenant(db, orgId, projectId); + await redis.quit(); + await db.end(); + }); + + it('updates the bucket for the event timestamp, not processing time', async () => { + const timestamp = new Date(Date.now() - 2 * 24 * 60 * 60 * 1000).toISOString(); + const eventName = `late_${randomUUID()}`; + + await processEvent(db, redis, { + id: randomUUID(), + org_id: orgId, + project_id: projectId, + event_name: eventName, + user_id: `late_user_${randomUUID()}`, + session_id: randomUUID(), + properties: { test: 'late-event' }, + timestamp, + }); + + const result = await db.query( + `SELECT date, metric_value::int AS value + FROM daily_aggregates + WHERE org_id = $1 + AND project_id = $2 + AND metric_name = 'event_count' + AND dimensions = $3::jsonb`, + [orgId, projectId, JSON.stringify({ event_name: eventName })] + ); + + expect(result.rows).toHaveLength(1); + expect(result.rows[0].date.toISOString().split('T')[0]).toBe(dateOnly(timestamp)); + expect(result.rows[0].value).toBe(1); + }); +}); diff --git a/tests/integration/tenant-isolation.test.ts b/tests/integration/tenant-isolation.test.ts new file mode 100644 index 0000000..40076cd --- /dev/null +++ b/tests/integration/tenant-isolation.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it, vi } from 'vitest'; +import { resolvers } from '../../services/graphql-api/src/resolvers'; + +const orgId = '00000000-0000-0000-0000-000000000001'; +const projectId = '00000000-0000-0000-0000-000000000002'; +const otherOrgId = '00000000-0000-0000-0000-000000000003'; +const otherProjectId = '00000000-0000-0000-0000-000000000004'; + +function context() { + return { + auth: { orgId, projectId }, + requestId: 'tenant-isolation-test', + db: { + query: vi.fn().mockResolvedValue({ rows: [{ total: 1 }] }), + }, + redis: { + get: vi.fn(async (key: string) => (key === `cache_version:${orgId}:${projectId}` ? '0' : null)), + setex: vi.fn(async () => 'OK'), + }, + } as any; +} + +describe('tenant isolation resolver evidence', () => { + it('rejects cross-organization dashboard reads', async () => { + await expect(resolvers.Query.totalEvents(null, { + orgId: otherOrgId, + projectId, + startDate: '2026-06-01', + endDate: '2026-06-16', + }, context())).rejects.toThrow('Forbidden'); + }); + + it('rejects cross-project dashboard reads for project-scoped API keys', async () => { + await expect(resolvers.Query.totalEvents(null, { + orgId, + projectId: otherProjectId, + startDate: '2026-06-01', + endDate: '2026-06-16', + }, context())).rejects.toThrow('Forbidden'); + }); + + it('passes authenticated tenant scope into SQL parameters', async () => { + const testContext = context(); + + await resolvers.Query.totalEvents(null, { + orgId, + projectId, + startDate: '2026-06-01', + endDate: '2026-06-16', + }, testContext); + + expect(testContext.db.query).toHaveBeenCalledWith(expect.stringContaining('WHERE org_id = $1'), [ + orgId, + projectId, + '2026-06-01', + '2026-06-16', + ]); + }); +}); diff --git a/tests/integration/worker-restart.test.ts b/tests/integration/worker-restart.test.ts new file mode 100644 index 0000000..44d7567 --- /dev/null +++ b/tests/integration/worker-restart.test.ts @@ -0,0 +1,66 @@ +import { randomUUID } from 'node:crypto'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { Pool } from 'pg'; +import { processEvent } from '../../services/worker/src/processing'; +import { + cleanupTestTenant, + createDb, + createRedis, + createTestTenant, + databaseUrl, + dateOnly, +} from './helpers'; + +describe('worker restart retry evidence', () => { + const setupDb = createDb(); + const redis = createRedis(); + let orgId: string; + let projectId: string; + + beforeAll(async () => { + ({ orgId, projectId } = await createTestTenant(setupDb)); + }); + + afterAll(async () => { + await cleanupTestTenant(setupDb, orgId, projectId); + await redis.quit(); + await setupDb.end(); + }); + + it('continues safely when the same Kafka message is retried after a new worker pool starts', async () => { + const timestamp = new Date().toISOString(); + const event = { + id: randomUUID(), + org_id: orgId, + project_id: projectId, + event_name: `restart_${randomUUID()}`, + user_id: `restart_user_${randomUUID()}`, + session_id: randomUUID(), + properties: { test: 'worker-restart' }, + timestamp, + }; + + const firstPool = new Pool({ connectionString: databaseUrl }); + const first = await processEvent(firstPool, redis, event, 'before-restart'); + await firstPool.end(); + + const restartedPool = new Pool({ connectionString: databaseUrl }); + const retry = await processEvent(restartedPool, redis, event, 'after-restart'); + await restartedPool.end(); + + const aggregate = await setupDb.query( + `SELECT metric_value::int AS value + FROM daily_aggregates + WHERE org_id = $1 + AND project_id = $2 + AND metric_name = 'event_count' + AND date = $3 + AND dimensions = $4::jsonb`, + [orgId, projectId, dateOnly(timestamp), JSON.stringify({ event_name: event.event_name })] + ); + + expect(first.inserted).toBe(true); + expect(retry).toEqual({ inserted: false, cacheInvalidated: false }); + expect(aggregate.rows[0].value).toBe(1); + }); +}); diff --git a/tests/load/backpressure.js b/tests/load/backpressure.js new file mode 100644 index 0000000..33dd1c6 --- /dev/null +++ b/tests/load/backpressure.js @@ -0,0 +1,117 @@ +import http from 'k6/http'; +import { check, sleep } from 'k6'; +import exec from 'k6/execution'; + +const apiUrl = __ENV.API_URL || 'http://localhost:3001'; +const apiKey = __ENV.API_KEY || 'demo_key_change_this'; +const runId = __ENV.RUN_ID || new Date().toISOString().replace(/[:.]/g, '-'); +const batchSize = Number(__ENV.BATCH_SIZE || 250); +const tenantKeysFile = __ENV.TENANT_KEYS_FILE; +const seededTenants = tenantKeysFile ? JSON.parse(open(tenantKeysFile)).tenants : []; +const hotTenants = seededTenants.filter((tenant) => tenant.tenant_class === 'hot'); +const mediumTenants = seededTenants.filter((tenant) => tenant.tenant_class === 'medium'); + +export const options = { + summaryTrendStats: ['avg', 'min', 'med', 'p(90)', 'p(95)', 'p(99)', 'max'], + scenarios: { + burst: { + executor: 'ramping-arrival-rate', + startRate: Number(__ENV.START_RATE || 50), + timeUnit: '1s', + preAllocatedVUs: Number(__ENV.PREALLOCATED_VUS || 100), + maxVUs: Number(__ENV.MAX_VUS || 500), + stages: [ + { target: Number(__ENV.BURST_RATE || 500), duration: __ENV.BURST_RAMP || '30s' }, + { target: Number(__ENV.BURST_RATE || 500), duration: __ENV.BURST_HOLD || '1m' }, + { target: Number(__ENV.RECOVERY_RATE || 50), duration: __ENV.RECOVERY || '1m' }, + ], + tags: { benchmark: 'backpressure', scenario_part: 'burst', run_id: runId }, + }, + }, + thresholds: { + checks: ['rate>0.80'], + }, +}; + +function tenantForBatch(iteration) { + if (seededTenants.length > 0) { + if (iteration % 2 === 0 && hotTenants.length > 0) { + return hotTenants[iteration % hotTenants.length]; + } + if (mediumTenants.length > 0) { + return mediumTenants[iteration % mediumTenants.length]; + } + return seededTenants[iteration % seededTenants.length]; + } + + return { + tenant_id: `tenant_${String((iteration % 10) + 1).padStart(3, '0')}`, + tenant_class: iteration % 10 === 0 ? 'hot' : 'medium', + api_key: apiKey, + }; +} + +function eventPayload(index, tenant) { + const late = index % 20 === 0; + const timestamp = late + ? new Date(Date.now() - 14 * 24 * 60 * 60 * 1000 - index * 1000).toISOString() + : new Date().toISOString(); + + return { + event_name: index % 7 === 0 ? 'purchase' : 'page_view', + project_id: tenant.project_id, + user_id: `pressure_user_${index % 20000}`, + properties: { + tenant_id: tenant.tenant_id, + tenant_class: tenant.tenant_class, + segment: ['pro', 'team', 'enterprise'][index % 3], + region: ['na', 'eu', 'apac'][index % 3], + device: ['web', 'mobile', 'desktop'][index % 3], + product: ['core', 'insights', 'automation'][index % 3], + late_arrival: late, + run_id: runId, + benchmark: 'backpressure', + }, + timestamp, + }; +} + +export default function () { + const base = exec.scenario.iterationInTest * batchSize; + const tenant = tenantForBatch(exec.scenario.iterationInTest); + const events = Array.from({ length: batchSize }, (_, offset) => eventPayload(base + offset, tenant)); + const response = http.post(`${apiUrl}/api/v1/events/batch`, JSON.stringify({ events }), { + headers: { + 'Content-Type': 'application/json', + 'X-API-Key': tenant.api_key || apiKey, + }, + tags: { + benchmark: 'backpressure', + endpoint: 'batch', + tenant_class: tenant.tenant_class, + tenant_id: tenant.tenant_id, + run_id: runId, + }, + timeout: __ENV.HTTP_TIMEOUT || '30s', + }); + + check(response, { + 'accepted, limited, or saturated': (res) => [202, 429, 500, 502, 503, 504].includes(res.status), + 'not a validation failure': (res) => res.status !== 400, + }); + + sleep(Number(__ENV.SLEEP_SECONDS || 0)); +} + +export function handleSummary(data) { + const path = __ENV.SUMMARY_PATH || `docs/benchmarks/evidence/backpressure-${runId}.json`; + return { + stdout: JSON.stringify({ + benchmark: 'backpressure', + run_id: runId, + summary_path: path, + note: 'Backpressure evidence should be reviewed with service logs, Kafka lag, and database metrics.', + }, null, 2), + [path]: JSON.stringify(data, null, 2), + }; +} diff --git a/tests/load/dashboard-query.js b/tests/load/dashboard-query.js new file mode 100644 index 0000000..8e2999e --- /dev/null +++ b/tests/load/dashboard-query.js @@ -0,0 +1,121 @@ +import http from 'k6/http'; +import { check, sleep } from 'k6'; +import exec from 'k6/execution'; + +const graphqlUrl = __ENV.GRAPHQL_URL || 'http://localhost:3002/graphql'; +const apiKey = __ENV.API_KEY || 'demo_key_change_this'; +const orgId = __ENV.ORG_ID || '00000000-0000-0000-0000-000000000001'; +const projectId = __ENV.PROJECT_ID || '00000000-0000-0000-0000-000000000002'; +const runId = __ENV.RUN_ID || new Date().toISOString().replace(/[:.]/g, '-'); +const startDate = __ENV.START_DATE || '2026-06-01'; +const endDate = __ENV.END_DATE || '2026-06-30'; +const tenantKeysFile = __ENV.TENANT_KEYS_FILE; +const dashboardTenantClass = __ENV.DASHBOARD_TENANT_CLASS; +const seededTenants = tenantKeysFile ? JSON.parse(open(tenantKeysFile)).tenants : []; +const dashboardTenants = dashboardTenantClass + ? seededTenants.filter((tenant) => tenant.tenant_class === dashboardTenantClass) + : seededTenants; + +export const options = { + summaryTrendStats: ['avg', 'min', 'med', 'p(90)', 'p(95)', 'p(99)', 'max'], + scenarios: { + dashboard_query: { + executor: 'constant-vus', + vus: Number(__ENV.VUS || 20), + duration: __ENV.DURATION || '2m', + tags: { benchmark: 'dashboard-query', run_id: runId }, + }, + }, + thresholds: { + http_req_failed: ['rate<0.05'], + }, +}; + +const query = ` +query DashboardMetrics($orgId: ID!, $projectId: ID!, $startDate: Date!, $endDate: Date!, $filters: JSON) { + metrics(orgId: $orgId, projectId: $projectId, startDate: $startDate, endDate: $endDate, filters: $filters) { + totalEvents + dailyActiveUsers { date value } + topEvents { eventName count trend } + dateRange { start end } + } + eventCountsOverTime(orgId: $orgId, projectId: $projectId, startDate: $startDate, endDate: $endDate, filters: $filters) { + date + value + } + recentEvents(orgId: $orgId, projectId: $projectId, limit: 10, filters: $filters) { + id + eventName + timestamp + } +}`; + +function tenantForIteration(iteration) { + if (dashboardTenants.length > 0) { + return dashboardTenants[iteration % dashboardTenants.length]; + } + + return { + org_id: orgId, + project_id: projectId, + api_key: apiKey, + tenant_id: 'demo', + tenant_class: 'demo', + }; +} + +export default function () { + const useFilter = Math.random() < Number(__ENV.FILTER_RATIO || 0.25); + const tenant = tenantForIteration(exec.scenario.iterationInTest); + const payload = { + query, + variables: { + orgId: tenant.org_id, + projectId: tenant.project_id, + startDate, + endDate, + filters: useFilter ? { segment: 'pro' } : null, + }, + }; + + const response = http.post(graphqlUrl, JSON.stringify(payload), { + headers: { + 'Content-Type': 'application/json', + 'X-API-Key': tenant.api_key || apiKey, + }, + tags: { + benchmark: 'dashboard-query', + endpoint: 'graphql', + cache_path: useFilter ? 'filtered' : 'aggregate', + tenant_class: tenant.tenant_class, + tenant_id: tenant.tenant_id, + run_id: runId, + }, + }); + + check(response, { + 'graphql status ok': (res) => res.status === 200, + 'graphql has no errors': (res) => { + try { + return !JSON.parse(res.body).errors; + } catch (_error) { + return false; + } + }, + }); + + sleep(Number(__ENV.SLEEP_SECONDS || 1)); +} + +export function handleSummary(data) { + const path = __ENV.SUMMARY_PATH || `docs/benchmarks/evidence/dashboard-query-${runId}.json`; + return { + stdout: JSON.stringify({ + benchmark: 'dashboard-query', + run_id: runId, + summary_path: path, + note: 'Raw k6 summary only; cache state and query plans are required for article claims.', + }, null, 2), + [path]: JSON.stringify(data, null, 2), + }; +} diff --git a/tests/load/hot-tenant.js b/tests/load/hot-tenant.js new file mode 100644 index 0000000..73e45a4 --- /dev/null +++ b/tests/load/hot-tenant.js @@ -0,0 +1,116 @@ +import http from 'k6/http'; +import { check } from 'k6'; +import exec from 'k6/execution'; + +const apiUrl = __ENV.API_URL || 'http://localhost:3001'; +const apiKey = __ENV.API_KEY || 'demo_key_change_this'; +const runId = __ENV.RUN_ID || new Date().toISOString().replace(/[:.]/g, '-'); +const hotTenantRatio = Number(__ENV.HOT_TENANT_RATIO || 0.8); +const hotTenantCount = Number(__ENV.HOT_TENANT_COUNT || 5); +const tenantCount = Number(__ENV.TENANTS || 100); +const tenantKeysFile = __ENV.TENANT_KEYS_FILE; +const seededTenants = tenantKeysFile ? JSON.parse(open(tenantKeysFile)).tenants : []; +const hotTenants = seededTenants.filter((tenant) => tenant.tenant_class === 'hot'); +const mediumTenants = seededTenants.filter((tenant) => tenant.tenant_class === 'medium'); +const quietTenants = seededTenants.filter((tenant) => tenant.tenant_class === 'quiet'); + +export const options = { + summaryTrendStats: ['avg', 'min', 'med', 'p(90)', 'p(95)', 'p(99)', 'max'], + scenarios: { + hot_tenant: { + executor: 'ramping-arrival-rate', + startRate: Number(__ENV.START_RATE || 25), + timeUnit: '1s', + preAllocatedVUs: Number(__ENV.PREALLOCATED_VUS || 50), + maxVUs: Number(__ENV.MAX_VUS || 250), + stages: [ + { target: Number(__ENV.PEAK_RATE || 250), duration: __ENV.RAMP_DURATION || '1m' }, + { target: Number(__ENV.PEAK_RATE || 250), duration: __ENV.HOLD_DURATION || '2m' }, + { target: Number(__ENV.START_RATE || 25), duration: __ENV.RAMP_DOWN_DURATION || '30s' }, + ], + tags: { benchmark: 'hot-tenant', run_id: runId }, + }, + }, + thresholds: { + http_req_failed: ['rate<0.10'], + }, +}; + +function tenantForIteration(iteration) { + if (seededTenants.length > 0) { + const isHot = Math.random() < hotTenantRatio; + if (isHot && hotTenants.length > 0) { + return hotTenants[iteration % hotTenants.length]; + } + + if (Math.random() < 0.25 && mediumTenants.length > 0) { + return mediumTenants[iteration % mediumTenants.length]; + } + + return quietTenants.length > 0 + ? quietTenants[iteration % quietTenants.length] + : seededTenants[iteration % seededTenants.length]; + } + + const isHot = Math.random() < hotTenantRatio; + const tenantNumber = isHot + ? (iteration % hotTenantCount) + 1 + : hotTenantCount + ((iteration % Math.max(1, tenantCount - hotTenantCount)) + 1); + return { + tenant_id: `tenant_${String(tenantNumber).padStart(3, '0')}`, + tenant_class: isHot ? 'hot' : tenantNumber % 4 === 0 ? 'medium' : 'quiet', + api_key: apiKey, + }; +} + +export default function () { + const iteration = exec.scenario.iterationInTest; + const tenant = tenantForIteration(iteration); + const event = { + event_name: iteration % 5 === 0 ? 'purchase' : 'page_view', + project_id: tenant.project_id, + user_id: `${tenant.tenant_id}_user_${iteration % 5000}`, + properties: { + tenant_id: tenant.tenant_id, + tenant_class: tenant.tenant_class, + segment: tenant.tenant_class === 'hot' ? 'enterprise' : 'pro', + region: ['na', 'eu', 'apac'][iteration % 3], + device: ['web', 'mobile', 'desktop'][iteration % 3], + product: ['core', 'insights', 'automation'][iteration % 3], + run_id: runId, + benchmark: 'hot-tenant', + }, + timestamp: new Date().toISOString(), + }; + + const response = http.post(`${apiUrl}/api/v1/events`, JSON.stringify(event), { + headers: { + 'Content-Type': 'application/json', + 'X-API-Key': tenant.api_key || apiKey, + }, + tags: { + benchmark: 'hot-tenant', + endpoint: 'single', + tenant_class: tenant.tenant_class, + tenant_id: tenant.tenant_id, + run_id: runId, + }, + }); + + check(response, { + 'accepted or rate limited': (res) => res.status === 202 || res.status === 429, + }); +} + +export function handleSummary(data) { + const path = __ENV.SUMMARY_PATH || `docs/benchmarks/evidence/hot-tenant-${runId}.json`; + return { + stdout: JSON.stringify({ + benchmark: 'hot-tenant', + run_id: runId, + summary_path: path, + note: 'Raw k6 summary only; compare with Kafka and PostgreSQL evidence before publishing.', + }, null, 2), + [path]: JSON.stringify(data, null, 2), + }; +} diff --git a/tests/load/ingest-throughput.js b/tests/load/ingest-throughput.js new file mode 100644 index 0000000..cf76cb7 --- /dev/null +++ b/tests/load/ingest-throughput.js @@ -0,0 +1,98 @@ +import http from 'k6/http'; +import { check, sleep } from 'k6'; +import exec from 'k6/execution'; + +const apiUrl = __ENV.API_URL || 'http://localhost:3001'; +const apiKey = __ENV.API_KEY || 'demo_key_change_this'; +const runId = __ENV.RUN_ID || new Date().toISOString().replace(/[:.]/g, '-'); +const batchSize = Number(__ENV.BATCH_SIZE || 50); +const tenantKeysFile = __ENV.TENANT_KEYS_FILE; +const seededTenants = tenantKeysFile ? JSON.parse(open(tenantKeysFile)).tenants : []; + +export const options = { + summaryTrendStats: ['avg', 'min', 'med', 'p(90)', 'p(95)', 'p(99)', 'max'], + scenarios: { + ingest_throughput: { + executor: 'constant-arrival-rate', + rate: Number(__ENV.RATE || 100), + timeUnit: '1s', + duration: __ENV.DURATION || '2m', + preAllocatedVUs: Number(__ENV.PREALLOCATED_VUS || 50), + maxVUs: Number(__ENV.MAX_VUS || 200), + tags: { benchmark: 'ingest-throughput', run_id: runId }, + }, + }, + thresholds: { + http_req_failed: ['rate<0.05'], + }, +}; + +function tenantForBatch(iteration) { + if (seededTenants.length > 0) { + return seededTenants[iteration % seededTenants.length]; + } + + return { + tenant_id: `tenant_${String((iteration % 100) + 1).padStart(3, '0')}`, + tenant_class: iteration % 10 === 0 ? 'hot' : iteration % 3 === 0 ? 'medium' : 'quiet', + api_key: apiKey, + }; +} + +function eventPayload(index, tenant) { + const tenantClass = index % 10 === 0 ? 'hot' : index % 3 === 0 ? 'medium' : 'quiet'; + return { + event_name: ['page_view', 'button_click', 'signup', 'purchase'][index % 4], + project_id: tenant.project_id, + user_id: `load_user_${index % 10000}`, + properties: { + tenant_id: tenant.tenant_id, + tenant_class: tenant.tenant_class || tenantClass, + segment: ['free', 'pro', 'team', 'enterprise'][index % 4], + region: ['na', 'eu', 'apac'][index % 3], + device: ['web', 'mobile', 'desktop'][index % 3], + product: ['core', 'insights', 'automation'][index % 3], + run_id: runId, + benchmark: 'ingest-throughput', + }, + timestamp: new Date(Date.now() - (index % 86400) * 1000).toISOString(), + }; +} + +export default function () { + const base = exec.scenario.iterationInTest * batchSize; + const tenant = tenantForBatch(exec.scenario.iterationInTest); + const events = Array.from({ length: batchSize }, (_, offset) => eventPayload(base + offset, tenant)); + const response = http.post(`${apiUrl}/api/v1/events/batch`, JSON.stringify({ events }), { + headers: { + 'Content-Type': 'application/json', + 'X-API-Key': tenant.api_key || apiKey, + }, + tags: { + benchmark: 'ingest-throughput', + endpoint: 'batch', + tenant_class: tenant.tenant_class, + tenant_id: tenant.tenant_id, + run_id: runId, + }, + }); + + check(response, { + 'accepted or rate limited': (res) => res.status === 202 || res.status === 429, + }); + + sleep(Number(__ENV.SLEEP_SECONDS || 0)); +} + +export function handleSummary(data) { + const path = __ENV.SUMMARY_PATH || `docs/benchmarks/evidence/ingest-throughput-${runId}.json`; + return { + stdout: JSON.stringify({ + benchmark: 'ingest-throughput', + run_id: runId, + summary_path: path, + note: 'Raw k6 summary only; do not publish claims until reviewed with environment details.', + }, null, 2), + [path]: JSON.stringify(data, null, 2), + }; +} diff --git a/web/src/lib/graphql.ts b/web/src/lib/graphql.ts index 2d0c6ce..960293e 100644 --- a/web/src/lib/graphql.ts +++ b/web/src/lib/graphql.ts @@ -1,8 +1,12 @@ import { GraphQLClient } from 'graphql-request'; -export const graphqlClient = new GraphQLClient('http://localhost:3002/graphql', { +const graphqlUrl = import.meta.env.VITE_GRAPHQL_URL || 'http://localhost:3002/graphql'; +const apiKey = import.meta.env.VITE_PULSEOPS_API_KEY; + +export const graphqlClient = new GraphQLClient(graphqlUrl, { headers: { 'Content-Type': 'application/json', + ...(apiKey ? { 'X-API-Key': apiKey } : {}), }, }); diff --git a/web/src/vite-env.d.ts b/web/src/vite-env.d.ts new file mode 100644 index 0000000..11f02fe --- /dev/null +++ b/web/src/vite-env.d.ts @@ -0,0 +1 @@ +///