From 96b3c0f12f8d2391feb41cd55edde082b20be176 Mon Sep 17 00:00:00 2001 From: bdclaw2026 <262853276+bdclaw2026@users.noreply.github.com> Date: Tue, 10 Mar 2026 22:47:48 -0700 Subject: [PATCH 1/2] feat: define v1 event schema fixtures --- README.md | 4 + docs/event-schema-v1.md | 78 +++++++++++++ fixtures/events/v1/json-checkout-failure.json | 22 ++++ fixtures/events/v1/key-value-retry.json | 22 ++++ fixtures/events/v1/logfmt-auth-success.json | 23 ++++ .../events/v1/plain-text-worker-stall.json | 15 +++ pkg/event/v1.go | 108 ++++++++++++++++++ pkg/event/v1_test.go | 36 ++++++ 8 files changed, 308 insertions(+) create mode 100644 docs/event-schema-v1.md create mode 100644 fixtures/events/v1/json-checkout-failure.json create mode 100644 fixtures/events/v1/key-value-retry.json create mode 100644 fixtures/events/v1/logfmt-auth-success.json create mode 100644 fixtures/events/v1/plain-text-worker-stall.json create mode 100644 pkg/event/v1.go create mode 100644 pkg/event/v1_test.go diff --git a/README.md b/README.md index f61a940..fe9f843 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,10 @@ Workspace Notes / Analyze | `workspace add-log --topic ` | Add log file and rebuild patterns/notes | | `workspace analyze --topic [question]` | Run AI analysis (`--acp claude|codex|gemini`) | +## Event Schema + +The initial normalized event contract is documented in [docs/event-schema-v1.md](docs/event-schema-v1.md), with representative fixtures under `fixtures/events/v1/` for JSON, logfmt, `key=value`, and plain text logs. + ## Development ```bash diff --git a/docs/event-schema-v1.md b/docs/event-schema-v1.md new file mode 100644 index 0000000..9ca82db --- /dev/null +++ b/docs/event-schema-v1.md @@ -0,0 +1,78 @@ +# Event Schema v1 + +`lapp` treats every parsed log record as a normalized event with three concerns kept separate: + +- `text`: the raw source line that came in +- `attrs`: structured attributes parsed directly from the source +- `inferred`: metadata synthesized after parsing, such as a generalized pattern or owning entity + +This keeps ingestion lossless while giving downstream steps a stable shape to work with even when log formats vary. + +## Canonical Shape + +```json +{ + "ts": "2026-03-10T21:00:00Z", + "text": "ts=2026-03-10T21:00:00Z level=info service=auth-api request_id=req_123 msg=\"user user_456 authenticated\"", + "attrs": { + "level": "info", + "service": "auth-api", + "request_id": "req_123", + "msg": "user user_456 authenticated" + }, + "inferred": { + "pattern": "user <*> authenticated", + "entity": "auth-api" + } +} +``` + +## Top-Level Fields + +| Field | Type | Required | Notes | +|---|---|---|---| +| `ts` | RFC3339 timestamp string | No | Optional because plain text logs may not expose a trustworthy timestamp. | +| `text` | string | Yes | Raw log line, preserved verbatim as the source of truth. | +| `attrs` | object of string to string | Yes | Parsed key/value attributes extracted directly from the log line. Use `{}` when nothing can be extracted. | +| `inferred` | object | Yes | Metadata derived from parsing or later enrichment. Use `{}` when nothing is inferred yet. | + +## Parsed Attributes + +`attrs` stays intentionally flat in v1. Values are strings so the schema remains stable across JSON, logfmt, `key=value`, and plain text sources. + +Recommended canonical keys when they can be recovered confidently: + +| Key | Required | Meaning | +|---|---|---| +| `level` | No | Severity such as `debug`, `info`, `warn`, or `error`. | +| `service` | No | Service, worker, or subsystem name. | +| `env` | No | Deployment environment such as `prod` or `staging`. | +| `request_id` | No | Request-scoped identifier. | +| `trace_id` | No | Distributed trace identifier. | +| `span_id` | No | Distributed tracing span identifier. | +| `correlation_id` | No | Cross-system correlation token when `request_id` is not the right semantic fit. | +| `user_id` | No | User identifier present in the source line. | +| `endpoint` | No | HTTP or RPC target when available. | +| `method` | No | HTTP or RPC verb when available. | + +Additional keys are allowed when they represent source fields that are useful to preserve. + +## Inferred Metadata + +`inferred` is reserved for values that are not copied verbatim from the source. + +| Key | Required | Meaning | +|---|---|---| +| `pattern` | No | Generalized event template such as `user <*> authenticated`. | +| `entity` | No | Owning component, domain object, or actor inferred from context. | + +## Fixture Coverage + +Representative fixtures live in `fixtures/events/v1/`: + +- `json-checkout-failure.json` +- `logfmt-auth-success.json` +- `key-value-retry.json` +- `plain-text-worker-stall.json` + +Each fixture wraps a normalized event with `name`, `source_format`, and `description` metadata so future parser and schema tests can consume them directly. diff --git a/fixtures/events/v1/json-checkout-failure.json b/fixtures/events/v1/json-checkout-failure.json new file mode 100644 index 0000000..f3c7bb5 --- /dev/null +++ b/fixtures/events/v1/json-checkout-failure.json @@ -0,0 +1,22 @@ +{ + "name": "json_checkout_failure", + "source_format": "json", + "description": "Structured JSON log with canonical request metadata and inferred service ownership.", + "event": { + "ts": "2026-03-10T21:00:00Z", + "text": "{\"ts\":\"2026-03-10T21:00:00Z\",\"level\":\"ERROR\",\"service\":\"payments-api\",\"env\":\"prod\",\"request_id\":\"req_123\",\"trace_id\":\"trace_456\",\"endpoint\":\"/checkout\",\"message\":\"checkout failed for user user_789\"}", + "attrs": { + "level": "error", + "service": "payments-api", + "env": "prod", + "request_id": "req_123", + "trace_id": "trace_456", + "endpoint": "/checkout", + "message": "checkout failed for user user_789" + }, + "inferred": { + "pattern": "checkout failed for user <*>", + "entity": "payments-api" + } + } +} diff --git a/fixtures/events/v1/key-value-retry.json b/fixtures/events/v1/key-value-retry.json new file mode 100644 index 0000000..a96be0b --- /dev/null +++ b/fixtures/events/v1/key-value-retry.json @@ -0,0 +1,22 @@ +{ + "name": "key_value_retry", + "source_format": "key_value", + "description": "Space-delimited key=value log line normalized into canonical service and correlation fields.", + "event": { + "ts": "2026-03-10T21:02:45Z", + "text": "timestamp=2026-03-10T21:02:45Z severity=WARN service_name=billing-worker environment=prod correlation_id=corr_123 tenant_id=tenant_456 action=retrying charge_id=ch_789", + "attrs": { + "level": "warn", + "service": "billing-worker", + "env": "prod", + "correlation_id": "corr_123", + "tenant_id": "tenant_456", + "action": "retrying", + "charge_id": "ch_789" + }, + "inferred": { + "pattern": "retrying charge <*>", + "entity": "billing-worker" + } + } +} diff --git a/fixtures/events/v1/logfmt-auth-success.json b/fixtures/events/v1/logfmt-auth-success.json new file mode 100644 index 0000000..dbd7a3c --- /dev/null +++ b/fixtures/events/v1/logfmt-auth-success.json @@ -0,0 +1,23 @@ +{ + "name": "logfmt_auth_success", + "source_format": "logfmt", + "description": "logfmt line with request and trace identifiers plus a normalized auth pattern.", + "event": { + "ts": "2026-03-10T21:01:12Z", + "text": "ts=2026-03-10T21:01:12Z level=INFO service=auth-api env=staging request_id=req_456 trace_id=trace_789 method=POST endpoint=/login msg=\"user user_123 authenticated\"", + "attrs": { + "level": "info", + "service": "auth-api", + "env": "staging", + "request_id": "req_456", + "trace_id": "trace_789", + "method": "POST", + "endpoint": "/login", + "msg": "user user_123 authenticated" + }, + "inferred": { + "pattern": "user <*> authenticated", + "entity": "auth-api" + } + } +} diff --git a/fixtures/events/v1/plain-text-worker-stall.json b/fixtures/events/v1/plain-text-worker-stall.json new file mode 100644 index 0000000..1b7e7cf --- /dev/null +++ b/fixtures/events/v1/plain-text-worker-stall.json @@ -0,0 +1,15 @@ +{ + "name": "plain_text_worker_stall", + "source_format": "plain_text", + "description": "Unstructured plain text log that keeps the raw line and only a small amount of inferred metadata.", + "event": { + "text": "ERROR worker pool stalled after 3 retries while draining queue payments", + "attrs": { + "level": "error" + }, + "inferred": { + "pattern": "worker pool stalled after <*> retries while draining queue <*>", + "entity": "worker-pool" + } + } +} diff --git a/pkg/event/v1.go b/pkg/event/v1.go new file mode 100644 index 0000000..f34de16 --- /dev/null +++ b/pkg/event/v1.go @@ -0,0 +1,108 @@ +package event + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + "time" + + goerrors "github.com/go-errors/errors" +) + +const ( + SourceFormatJSON = "json" + SourceFormatLogfmt = "logfmt" + SourceFormatKeyValue = "key_value" + SourceFormatPlainText = "plain_text" +) + +var allowedSourceFormats = map[string]struct{}{ + SourceFormatJSON: {}, + SourceFormatLogfmt: {}, + SourceFormatKeyValue: {}, + SourceFormatPlainText: {}, +} + +// Event is the v1 normalized log event shape. +type Event struct { + Timestamp *time.Time `json:"ts,omitempty"` + Text string `json:"text"` + Attrs map[string]string `json:"attrs"` + Inferred *Inferred `json:"inferred"` +} + +// Inferred contains metadata derived after parsing. +type Inferred struct { + Pattern string `json:"pattern,omitempty"` + Entity string `json:"entity,omitempty"` +} + +// Fixture wraps an example event with provenance metadata. +type Fixture struct { + Name string `json:"name"` + SourceFormat string `json:"source_format"` + Description string `json:"description"` + Event Event `json:"event"` +} + +// Validate checks that the fixture satisfies the documented v1 contract. +func (f Fixture) Validate() error { + if strings.TrimSpace(f.Name) == "" { + return goerrors.New("name is required") + } + if strings.TrimSpace(f.Description) == "" { + return goerrors.New("description is required") + } + if _, ok := allowedSourceFormats[f.SourceFormat]; !ok { + return goerrors.Errorf("validate source_format: expected one of %q, %q, %q, %q", SourceFormatJSON, SourceFormatLogfmt, SourceFormatKeyValue, SourceFormatPlainText) + } + if strings.TrimSpace(f.Event.Text) == "" { + return goerrors.New("event.text is required") + } + if f.Event.Attrs == nil { + return goerrors.New("event.attrs is required") + } + if f.Event.Inferred == nil { + return goerrors.New("event.inferred is required") + } + for key := range f.Event.Attrs { + if strings.TrimSpace(key) == "" { + return goerrors.New("event.attrs contains an empty key") + } + } + return nil +} + +// LoadFixtures reads all JSON fixture files from a directory. +func LoadFixtures(dir string) ([]Fixture, error) { + entries, err := os.ReadDir(dir) + if err != nil { + return nil, goerrors.Errorf("read fixtures dir: %w", err) + } + + fixtures := make([]Fixture, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() || filepath.Ext(entry.Name()) != ".json" { + continue + } + + path := filepath.Join(dir, entry.Name()) + data, err := os.ReadFile(path) + if err != nil { + return nil, goerrors.Errorf("read fixture %s: %w", path, err) + } + + var fixture Fixture + if err := json.Unmarshal(data, &fixture); err != nil { + return nil, goerrors.Errorf("decode fixture %s: %w", path, err) + } + if err := fixture.Validate(); err != nil { + return nil, goerrors.Errorf("validate fixture %s: %w", path, err) + } + + fixtures = append(fixtures, fixture) + } + + return fixtures, nil +} diff --git a/pkg/event/v1_test.go b/pkg/event/v1_test.go new file mode 100644 index 0000000..523d3c2 --- /dev/null +++ b/pkg/event/v1_test.go @@ -0,0 +1,36 @@ +package event + +import ( + "path/filepath" + "testing" +) + +func TestLoadFixtures(t *testing.T) { + dir := filepath.Join("..", "..", "fixtures", "events", "v1") + + fixtures, err := LoadFixtures(dir) + if err != nil { + t.Fatalf("LoadFixtures: %v", err) + } + + if len(fixtures) != 4 { + t.Fatalf("expected 4 fixtures, got %d", len(fixtures)) + } + + seen := make(map[string]bool, len(fixtures)) + for _, fixture := range fixtures { + seen[fixture.SourceFormat] = true + } + + required := []string{ + SourceFormatJSON, + SourceFormatLogfmt, + SourceFormatKeyValue, + SourceFormatPlainText, + } + for _, format := range required { + if !seen[format] { + t.Fatalf("missing fixture for %s", format) + } + } +} From d5b7fc8bf3bd02887c7782d96ab89bcaf589c41a Mon Sep 17 00:00:00 2001 From: bdclaw2026 <262853276+bdclaw2026@users.noreply.github.com> Date: Tue, 10 Mar 2026 23:49:46 -0700 Subject: [PATCH 2/2] fix: make event schema protobuf canonical --- README.md | 2 +- docs/event-schema-v1.md | 26 +++++++++++++++- pkg/event/v1.go | 16 ++++++++-- pkg/event/v1_test.go | 18 +++++------ proto/lapp/event/v1/event.proto | 54 +++++++++++++++++++++++++++++++++ 5 files changed, 102 insertions(+), 14 deletions(-) create mode 100644 proto/lapp/event/v1/event.proto diff --git a/README.md b/README.md index fe9f843..7cd9356 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ Workspace Notes / Analyze ## Event Schema -The initial normalized event contract is documented in [docs/event-schema-v1.md](docs/event-schema-v1.md), with representative fixtures under `fixtures/events/v1/` for JSON, logfmt, `key=value`, and plain text logs. +The initial normalized event contract is defined in [proto/lapp/event/v1/event.proto](proto/lapp/event/v1/event.proto) and documented in [docs/event-schema-v1.md](docs/event-schema-v1.md). Representative fixtures live under `fixtures/events/v1/` for JSON, logfmt, `key=value`, and plain text logs. ## Development diff --git a/docs/event-schema-v1.md b/docs/event-schema-v1.md index 9ca82db..12db94a 100644 --- a/docs/event-schema-v1.md +++ b/docs/event-schema-v1.md @@ -8,8 +8,32 @@ This keeps ingestion lossless while giving downstream steps a stable shape to work with even when log formats vary. +The canonical schema definition lives in `proto/lapp/event/v1/event.proto`. This document explains how to use that schema and how the JSON fixtures map onto it. + ## Canonical Shape +### Protobuf + +```proto +message Event { + google.protobuf.Timestamp ts = 1; + string text = 2; + map attrs = 3; + Inferred inferred = 4; +} +``` + +```proto +message Fixture { + string name = 1; + SourceFormat source_format = 2; + string description = 3; + Event event = 4; +} +``` + +### JSON Fixture Encoding + ```json { "ts": "2026-03-10T21:00:00Z", @@ -75,4 +99,4 @@ Representative fixtures live in `fixtures/events/v1/`: - `key-value-retry.json` - `plain-text-worker-stall.json` -Each fixture wraps a normalized event with `name`, `source_format`, and `description` metadata so future parser and schema tests can consume them directly. +Each fixture wraps a normalized event with `name`, `source_format`, and `description` metadata. This allows future parser and schema tests to consume them directly. diff --git a/pkg/event/v1.go b/pkg/event/v1.go index f34de16..4b319ab 100644 --- a/pkg/event/v1.go +++ b/pkg/event/v1.go @@ -4,6 +4,7 @@ import ( "encoding/json" "os" "path/filepath" + "sort" "strings" "time" @@ -24,7 +25,7 @@ var allowedSourceFormats = map[string]struct{}{ SourceFormatPlainText: {}, } -// Event is the v1 normalized log event shape. +// Event mirrors the canonical v1 schema in proto/lapp/event/v1/event.proto. type Event struct { Timestamp *time.Time `json:"ts,omitempty"` Text string `json:"text"` @@ -38,7 +39,7 @@ type Inferred struct { Entity string `json:"entity,omitempty"` } -// Fixture wraps an example event with provenance metadata. +// Fixture mirrors the protobuf fixture contract for JSON-backed examples. type Fixture struct { Name string `json:"name"` SourceFormat string `json:"source_format"` @@ -55,7 +56,7 @@ func (f Fixture) Validate() error { return goerrors.New("description is required") } if _, ok := allowedSourceFormats[f.SourceFormat]; !ok { - return goerrors.Errorf("validate source_format: expected one of %q, %q, %q, %q", SourceFormatJSON, SourceFormatLogfmt, SourceFormatKeyValue, SourceFormatPlainText) + return goerrors.Errorf("validate source_format: invalid format %q, must be one of %s", f.SourceFormat, strings.Join(allowedSourceFormatNames(), ", ")) } if strings.TrimSpace(f.Event.Text) == "" { return goerrors.New("event.text is required") @@ -74,6 +75,15 @@ func (f Fixture) Validate() error { return nil } +func allowedSourceFormatNames() []string { + formats := make([]string, 0, len(allowedSourceFormats)) + for format := range allowedSourceFormats { + formats = append(formats, format) + } + sort.Strings(formats) + return formats +} + // LoadFixtures reads all JSON fixture files from a directory. func LoadFixtures(dir string) ([]Fixture, error) { entries, err := os.ReadDir(dir) diff --git a/pkg/event/v1_test.go b/pkg/event/v1_test.go index 523d3c2..e808368 100644 --- a/pkg/event/v1_test.go +++ b/pkg/event/v1_test.go @@ -7,27 +7,27 @@ import ( func TestLoadFixtures(t *testing.T) { dir := filepath.Join("..", "..", "fixtures", "events", "v1") + required := []string{ + SourceFormatJSON, + SourceFormatLogfmt, + SourceFormatKeyValue, + SourceFormatPlainText, + } fixtures, err := LoadFixtures(dir) if err != nil { t.Fatalf("LoadFixtures: %v", err) } - if len(fixtures) != 4 { - t.Fatalf("expected 4 fixtures, got %d", len(fixtures)) + if len(fixtures) != len(required) { + t.Fatalf("expected %d fixtures, got %d", len(required), len(fixtures)) } - seen := make(map[string]bool, len(fixtures)) + seen := make(map[string]bool, len(required)) for _, fixture := range fixtures { seen[fixture.SourceFormat] = true } - required := []string{ - SourceFormatJSON, - SourceFormatLogfmt, - SourceFormatKeyValue, - SourceFormatPlainText, - } for _, format := range required { if !seen[format] { t.Fatalf("missing fixture for %s", format) diff --git a/proto/lapp/event/v1/event.proto b/proto/lapp/event/v1/event.proto new file mode 100644 index 0000000..7c6da94 --- /dev/null +++ b/proto/lapp/event/v1/event.proto @@ -0,0 +1,54 @@ +syntax = "proto3"; + +package lapp.event.v1; + +option go_package = "github.com/STRRL/lapp/pkg/event/eventpb;eventpb"; + +import "google/protobuf/timestamp.proto"; + +// Event is the canonical v1 normalized log event. +message Event { + // Optional because plain text logs may not provide a trustworthy timestamp. + google.protobuf.Timestamp ts = 1; + + // Required. Preserve the raw log line verbatim as the source of truth. + string text = 2; + + // Required. Parsed source attributes remain flat string pairs in v1. + map attrs = 3; + + // Required. Inferred metadata stays separate from source-derived attributes. + Inferred inferred = 4; +} + +// Inferred contains metadata synthesized after parsing. +message Inferred { + // Optional generalized template such as "user <*> authenticated". + string pattern = 1; + + // Optional owning component, domain object, or actor inferred from context. + string entity = 2; +} + +enum SourceFormat { + SOURCE_FORMAT_UNSPECIFIED = 0; + SOURCE_FORMAT_JSON = 1; + SOURCE_FORMAT_LOGFMT = 2; + SOURCE_FORMAT_KEY_VALUE = 3; + SOURCE_FORMAT_PLAIN_TEXT = 4; +} + +// Fixture wraps a representative event example with provenance metadata. +message Fixture { + // Required stable fixture identifier. + string name = 1; + + // Required source representation used to produce the normalized event. + SourceFormat source_format = 2; + + // Required short explanation of the scenario the fixture covers. + string description = 3; + + // Required normalized event payload. + Event event = 4; +}