From 8d0f34cf59e4b085dff449959f3931b728523389 Mon Sep 17 00:00:00 2001 From: Daniel Gordos Date: Tue, 24 Feb 2026 17:28:20 +1100 Subject: [PATCH 1/2] feat: propagate OTEL env var attributes as default log fields Read OTEL_SERVICE_NAME and OTEL_RESOURCE_ATTRIBUTES at logger init and inject them as default zap fields so logs carry the same resource context as traces and metrics. Also fixes stale ldflags path in Makefile and updates README with corrected examples, span/meter usage docs. --- CLAUDE.md | 72 ++++++++++++++++++++ Makefile | 2 +- README.md | 61 ++++++++++++++--- pkg/logger/configure.go | 28 +++++++- pkg/logger/configure_test.go | 126 +++++++++++++++++++++++++++++++++++ 5 files changed, 276 insertions(+), 13 deletions(-) create mode 100644 CLAUDE.md create mode 100644 pkg/logger/configure_test.go diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..51d825b --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,72 @@ +# CLAUDE.md + +## What is this? + +A shared Go logging library (`github.com/nullify-platform/logger`) used across Nullify services. It wraps Uber Zap with context-based logging, OpenTelemetry tracing/metrics, and HTTP middleware. There is also a parallel Python implementation in `pylogtracer/`. + +## Commands + +```bash +make build # Build (CGO_ENABLED=0, linux/amd64) +make unit # Run all Go tests +make cov # Tests with coverage (generates coverage.html, coverage.xml, coverage.txt) +make lint # golangci-lint v2.10.1 (gofmt, stylecheck, gosec) +make format # gofmt + +# Run a single test +go test ./pkg/logger/... -run TestName -v + +# Python +make lint-python # ruff format + lint check +make fix-python # ruff auto-fix +``` + +## Architecture + +All logging is context-bound (`logger.L(ctx)`), never global. `ConfigureProductionLogger` / `ConfigureDevelopmentLogger` inject the logger into context and set up OTEL providers. + +- **`pkg/logger/`** - Core package: `Logger` interface, configuration, field builders, chunking for oversized Loki entries +- **`pkg/logger/tracer/`** - OTEL tracer wrapping: span creation, AWS service context extraction (SQS, SNS, Lambda) +- **`pkg/logger/meter/`** - OTEL meter provider context management +- **`pkg/logger/middleware/`** - HTTP middleware for request logging, tracing, and metrics (redacts sensitive headers, skips healthchecks) + +### Key patterns + +- `logger.L(ctx)` auto-injects trace-id and span-id into every log line and records errors on the current span +- Field builders in `fields.go` provide both direct helpers (`logger.String()`, `logger.Err()`) and a fluent `LogFields` builder for domain-specific fields (agent, repository, tool calls) +- OTEL env vars (`OTEL_SERVICE_NAME`, `OTEL_RESOURCE_ATTRIBUTES`) are propagated as default log fields in addition to trace/metric resources +- Metrics use cumulative temporality (required by Grafana Mimir) +- OTLP headers can be fetched from AWS SSM Parameter Store via `OTEL_EXPORTER_OTLP_HEADERS_NAME` + +### Spans (tracer sub-package) + +Both tracer and meter are automatically injected into context by the logger configuration functions. + +```go +ctx, span := tracer.StartNewSpan(ctx, "operation-name") // child span +defer span.End() + +// or + +ctx, span := tracer.FromContext(ctx).Start(ctx, "operation-name") +defer span.End() + + +ctx, span := tracer.StartNewRootSpan(ctx, "handler") // root span (no parent) +defer span.End() + +tracer.ForceFlush(ctx) // flush before shutdown - also done by logger.L(ctx).Sync() +``` + +### Metrics (meter sub-package) + +```go +m := meter.FromContext(ctx) +counter, _ := m.Int64Counter("requests.total") +counter.Add(ctx, 1, metric.WithAttributes(attribute.String("method", "GET"))) +meter.ForceFlush(ctx) // flush before shutdown - also done by logger.L(ctx).Sync() +``` + +## Testing + +Tests use `stretchr/testify`. Logger output tests capture to a `bytes.Buffer` writer and verify JSON structure. diff --git a/Makefile b/Makefile index 91cf60a..0bff4d1 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION ?= $(shell git rev-list -1 HEAD) GOENV = CGO_ENABLED=0 GOOS=linux GOARCH=amd64 -GOFLAGS = -ldflags "-X 'moseisleycantina/internal/logger.Version=$(VERSION)'" +GOFLAGS = -ldflags "-X 'github.com/nullify-platform/logger/pkg/logger.Version=$(VERSION)'" GOLANGCI_LINT_VERSION := v2.10.1 build: diff --git a/README.md b/README.md index b96897a..25b687d 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,6 @@ func main() { ctx, span := tracer.FromContext(ctx).Start(ctx, "main") defer span.End() - // You can add more contextual event information to spans through the span.AddEvent method. This is analogous to adding a log message, and has an associated timestamp that is recorded. span.AddEvent("a piece of work is starting within the span") @@ -42,17 +41,57 @@ func main() { } func anotherFunction(ctx context.Context) { - // Retrieve the tracer from the context + // Start a child span - the parent span is automatically linked via context. ctx, span := tracer.FromContext(ctx).Start(ctx, "some-other-work") defer span.End() - // This will automatically set the parent function span to be the parent span of this new span that has started, within this trace. + // To record errors, use logger.L(ctx).Error(). This automatically sets the span to errored, so it is highlighted in Grafana. + err := errors.New("this is an error") + logger.L(ctx).Error("this is an error", logger.Err(err)) +} +``` - // To record errors, you can use the logger.L(ctx).Error() call. This will automatically capture any errors that you pass into it and pass them to GlitchTip. It will also set the span to errored, so it is highlighted in Grafana. +### Spans - error := errors.New("this is an error") - logger.L(err).Error("this is an error", error) -} +Spans are created via the `tracer` sub-package. Both the tracer and meter are automatically injected into context by `ConfigureProductionLogger` / `ConfigureDevelopmentLogger`. + +```go +// Start a child span (inherits parent from context) +ctx, span := tracer.StartNewSpan(ctx, "span-name") +defer span.End() + +// Start a root span (no parent, e.g. at the entry point of an API handler) +ctx, span := tracer.StartNewRootSpan(ctx, "request-handler") +defer span.End() + +// With options +ctx, span := tracer.StartNewSpan(ctx, "span-name", + trace.WithAttributes(attribute.String("key", "value")), + trace.WithSpanKind(trace.SpanKindServer), +) +defer span.End() + +// Flush traces before shutdown (e.g. in Lambda handlers) +tracer.ForceFlush(ctx) +``` + +### Metrics + +Metrics are created via the `meter` sub-package. The meter is retrieved from context. + +```go +m := meter.FromContext(ctx) + +// Create instruments +counter, _ := m.Int64Counter("requests.total") +histogram, _ := m.Float64Histogram("request.duration_ms") + +// Record measurements +counter.Add(ctx, 1, metric.WithAttributes(attribute.String("method", "GET"))) +histogram.Record(ctx, 42.5) + +// Flush metrics before shutdown +meter.ForceFlush(ctx) ``` ## OpenTelemetry Exporting @@ -62,8 +101,8 @@ To actually have your traces exported, you need to set a few environment variabl - `OTEL_EXPORTER_OTLP_PROTOCOL`: typically set to `http/protobuf`; this is the protocol that the traces are sent over. - `OTEL_EXPORTER_OTLP_ENDPOINT`: the endpoint that the traces are sent to. - `OTEL_EXPORTER_OTLP_HEADERS_NAME`: the name of the parameter in aws parameter store that contains the headers for the OTLP exporter. -- `OTEL_RESOURCE_ATTRIBUTES`: the attributes that are associated with the service. Typically, you would set the environment here like `deployment.environment=production`. -- `OTEL_SERVICE_NAME`: the name of the service that is being traced. +- `OTEL_RESOURCE_ATTRIBUTES`: comma-separated `key=value` attributes associated with the service (e.g. `deployment.environment=production`). These are propagated to traces, metrics, and as default log fields. +- `OTEL_SERVICE_NAME`: the name of the service. Propagated to traces, metrics, and as a default `service.name` log field. ## Install @@ -89,13 +128,13 @@ make ## Test ``` -make test # without coverage +make unit # without coverage make cov # with coverage ``` ## Lint -Run the golangci-lint docker image with the following tools: +Run golangci-lint with the following tools: - gofmt - stylecheck diff --git a/pkg/logger/configure.go b/pkg/logger/configure.go index 2fc45e7..5798607 100644 --- a/pkg/logger/configure.go +++ b/pkg/logger/configure.go @@ -105,11 +105,14 @@ func configureLogger(ctx context.Context, level, scopeName string, encoder zapco version = BuildInfoRevision } + defaultFields := []zapcore.Field{zap.String("service.version", version)} + defaultFields = append(defaultFields, otelEnvFields()...) + zapLogger := zap.New( zapcore.NewCore(encoder, multiSync, zapLevel), zap.AddCaller(), zap.AddCallerSkip(1), - zap.Fields(zap.String("version", version)), + zap.Fields(defaultFields...), ) zap.ReplaceGlobals(zapLogger) @@ -229,6 +232,29 @@ func newMetricExporter(ctx context.Context, headers map[string]string) (sdkmetri return nil, nil } +// otelEnvFields reads OTEL_SERVICE_NAME and OTEL_RESOURCE_ATTRIBUTES env vars +// and returns them as zap fields, mirroring the resource attributes that the +// OTEL SDK picks up for traces and metrics. +func otelEnvFields() []zapcore.Field { + var fields []zapcore.Field + + if serviceName := os.Getenv("OTEL_SERVICE_NAME"); serviceName != "" { + fields = append(fields, zap.String("service.name", serviceName)) + } + + if attrs := os.Getenv("OTEL_RESOURCE_ATTRIBUTES"); attrs != "" { + for entry := range strings.SplitSeq(attrs, ",") { + parts := strings.SplitN(entry, "=", 2) + if len(parts) != 2 { + continue + } + fields = append(fields, zap.String(parts[0], parts[1])) + } + } + + return fields +} + func getSecretFromParamStore(ctx context.Context, varName string) *string { paramName := os.Getenv(varName) if paramName == "" { diff --git a/pkg/logger/configure_test.go b/pkg/logger/configure_test.go new file mode 100644 index 0000000..31f3913 --- /dev/null +++ b/pkg/logger/configure_test.go @@ -0,0 +1,126 @@ +package logger + +import ( + "bytes" + "context" + "encoding/json" + "testing" + + "go.uber.org/zap/zapcore" +) + +func TestOtelEnvFields(t *testing.T) { + tests := []struct { + name string + serviceName string + resourceAttrs string + expectedFields map[string]string + }{ + { + name: "no env vars set", + expectedFields: map[string]string{}, + }, + { + name: "only OTEL_SERVICE_NAME set", + serviceName: "my-service", + expectedFields: map[string]string{ + "service.name": "my-service", + }, + }, + { + name: "only OTEL_RESOURCE_ATTRIBUTES set", + resourceAttrs: "env=prod,region=us-east-1", + expectedFields: map[string]string{ + "env": "prod", + "region": "us-east-1", + }, + }, + { + name: "both set", + serviceName: "my-service", + resourceAttrs: "env=staging", + expectedFields: map[string]string{ + "service.name": "my-service", + "env": "staging", + }, + }, + { + name: "malformed entries are skipped", + resourceAttrs: "valid=yes,malformed,also-bad,good=ok", + expectedFields: map[string]string{ + "valid": "yes", + "good": "ok", + }, + }, + { + name: "value containing equals sign", + resourceAttrs: "key=val=ue", + expectedFields: map[string]string{ + "key": "val=ue", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Setenv("OTEL_SERVICE_NAME", tt.serviceName) + t.Setenv("OTEL_RESOURCE_ATTRIBUTES", tt.resourceAttrs) + + fields := otelEnvFields() + + if len(fields) != len(tt.expectedFields) { + t.Fatalf("expected %d fields, got %d: %v", len(tt.expectedFields), len(fields), fields) + } + + for _, f := range fields { + expected, ok := tt.expectedFields[f.Key] + if !ok { + t.Errorf("unexpected field key %q", f.Key) + continue + } + if f.Type != zapcore.StringType { + t.Errorf("expected string type for field %q, got %v", f.Key, f.Type) + continue + } + if f.String != expected { + t.Errorf("field %q: expected %q, got %q", f.Key, expected, f.String) + } + } + }) + } +} + +func TestLoggerOutputContainsOtelFields(t *testing.T) { + t.Setenv("OTEL_SERVICE_NAME", "test-svc") + t.Setenv("OTEL_RESOURCE_ATTRIBUTES", "deployment.environment=test,team=platform") + + var buf bytes.Buffer + ctx, err := ConfigureProductionLogger(context.Background(), "info", &buf) + if err != nil { + t.Fatalf("failed to configure logger: %v", err) + } + + l := L(ctx) + l.Info("hello") + + var entry map[string]any + if err := json.Unmarshal(buf.Bytes(), &entry); err != nil { + t.Fatalf("failed to parse log output: %v\nraw: %s", err, buf.String()) + } + + expected := map[string]string{ + "service.name": "test-svc", + "deployment.environment": "test", + "team": "platform", + } + for key, want := range expected { + got, ok := entry[key] + if !ok { + t.Errorf("expected field %q in log output, not found", key) + continue + } + if got != want { + t.Errorf("field %q: expected %q, got %q", key, want, got) + } + } +} From ade015c600e65ee82d81d505331d08a425011897 Mon Sep 17 00:00:00 2001 From: Daniel Gordos Date: Tue, 24 Feb 2026 17:31:41 +1100 Subject: [PATCH 2/2] fix: update tests for version -> service.version field rename --- tests/development_test.go | 2 +- tests/production_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/development_test.go b/tests/development_test.go index f250548..6a8ea74 100644 --- a/tests/development_test.go +++ b/tests/development_test.go @@ -30,7 +30,7 @@ func TestDevelopmentLogger(t *testing.T) { assert.True(t, strings.Contains(output.String(), "INFO"), "stdout didn't include INFO") assert.True(t, strings.Contains(output.String(), "test"), "stdout didn't include the 'test' log message") assert.True(t, strings.Contains(output.String(), "tests/development_test.go:25"), "stdout didn't include the file path and line number") - assert.True(t, strings.Contains(output.String(), `{"version": "0.0.0"}`), "stdout didn't include version") + assert.True(t, strings.Contains(output.String(), `{"service.version": "0.0.0"}`), "stdout didn't include service.version") } func TestDevelopmentLoggerMeterAvailable(t *testing.T) { diff --git a/tests/production_test.go b/tests/production_test.go index e456e70..ceef7de 100644 --- a/tests/production_test.go +++ b/tests/production_test.go @@ -39,7 +39,7 @@ func TestProductionLogger(t *testing.T) { assert.NoError(t, err, "did not get working directory successfully") assert.Equal(t, pwd+"/production_test.go:26", jsonOutput["caller"], "stdout didn't include the file path and line number") - assert.Equal(t, "0.0.0", jsonOutput["version"], "stdout didn't include version") + assert.Equal(t, "0.0.0", jsonOutput["service.version"], "stdout didn't include service.version") } func TestProductionLoggerMeterAvailable(t *testing.T) {