diff --git a/bun.lock b/bun.lock index 1f47e46..dd3d60b 100644 --- a/bun.lock +++ b/bun.lock @@ -1,10 +1,12 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "judgeval", "dependencies": { "@opentelemetry/api": "^1.9.0", + "@opentelemetry/context-async-hooks": "^2.2.0", "@opentelemetry/core": "^2.2.0", "@opentelemetry/exporter-trace-otlp-http": "^0.207.0", "@opentelemetry/resources": "^2.2.0", diff --git a/examples/simple_chat/instrumentation.ts b/examples/simple_chat/instrumentation.ts index 1255dcb..b61c0c4 100644 --- a/examples/simple_chat/instrumentation.ts +++ b/examples/simple_chat/instrumentation.ts @@ -3,16 +3,10 @@ import { Judgeval, type NodeTracer } from "judgeval"; export const client = Judgeval.create(); -const initPromise = client.nodeTracer - .create({ - projectName: "auto_instrumentation_example", - enableEvaluation: true, - enableMonitoring: true, - instrumentations: [new OpenAIInstrumentation()], - }) - .then((t: NodeTracer) => { - return t; - }); +const initPromise = client.nodeTracer.create({ + projectName: "auto_instrumentation_example", + instrumentations: [new OpenAIInstrumentation()], +}); export async function getTracer(): Promise { return await initPromise; diff --git a/package.json b/package.json index 69c1d7f..6a43f4a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "judgeval", - "version": "0.7.2", + "version": "0.7.3", "description": "JavaScript/TypeScript client for Judgment evaluation platform", "main": "./dist/index.cjs", "module": "./dist/index.mjs", @@ -26,7 +26,7 @@ "lint:fix": "eslint src/**/*.ts --fix --no-warn-ignored", "format": "prettier --write \"src/**/*.ts\"", "generate-client": "bunx tsx scripts/generate-client.ts && bun run format", - "test": "bun test", + "test": "bun test --coverage", "clean": "rimraf dist", "prepublishOnly": "bun run clean && bun run build", "example": "bun scripts/run-example.ts" @@ -48,6 +48,7 @@ }, "dependencies": { "@opentelemetry/api": "^1.9.0", + "@opentelemetry/context-async-hooks": "^2.2.0", "@opentelemetry/core": "^2.2.0", "@opentelemetry/exporter-trace-otlp-http": "^0.207.0", "@opentelemetry/resources": "^2.2.0", diff --git a/src/env.ts b/src/env.ts index d23193c..f59ba78 100644 --- a/src/env.ts +++ b/src/env.ts @@ -14,13 +14,9 @@ export const JUDGMENT_API_URL = getEnvVar( "JUDGMENT_API_URL", "https://api.judgmentlabs.ai", ); -export const JUDGMENT_LLM_PROXY_URL = getEnvVar( - "JUDGMENT_LLM_PROXY_URL", - "https://api.judgmentlabs.ai/llm/proxy/v1", -); export const JUDGMENT_DEFAULT_GPT_MODEL = getEnvVar( "JUDGMENT_DEFAULT_GPT_MODEL", - "gpt-4.1", + "gpt-5-mini", ); export const JUDGMENT_ENABLE_MONITORING = getEnvVar( "JUDGMENT_ENABLE_MONITORING", diff --git a/src/scorers/promptScorer/PromptScorer.ts b/src/scorers/promptScorer/PromptScorer.ts index 11ced65..f133d1a 100644 --- a/src/scorers/promptScorer/PromptScorer.ts +++ b/src/scorers/promptScorer/PromptScorer.ts @@ -1,5 +1,5 @@ -import type { ScorerConfig } from "../../internal/api/models"; import { APIScorerType } from "../../data/APIScorerType"; +import type { ScorerConfig } from "../../internal/api/models"; import { BaseScorer } from "../BaseScorer"; export interface PromptScorerConfig { @@ -91,9 +91,11 @@ export class PromptScorer extends BaseScorer { if (this._options) { kwargs.options = this._options; } + if (this._model) { kwargs.model = this._model; } + if (this._description) { kwargs.description = this._description; } diff --git a/src/scorers/promptScorer/PromptScorerFactory.ts b/src/scorers/promptScorer/PromptScorerFactory.ts index 898c362..fd186a2 100644 --- a/src/scorers/promptScorer/PromptScorerFactory.ts +++ b/src/scorers/promptScorer/PromptScorerFactory.ts @@ -5,7 +5,8 @@ import type { FetchPromptScorersRequest, FetchPromptScorersResponse, } from "../../internal/api/models"; -import { PromptScorer, type PromptScorerConfig } from "./PromptScorer"; +import { Logger } from "../../utils"; +import { PromptScorer } from "./PromptScorer"; export class PromptScorerFactory { private readonly client: JudgmentApiClient; @@ -17,12 +18,12 @@ export class PromptScorerFactory { this.isTrace = isTrace; } - async get(name: string): Promise { + async get(name: string): Promise { const cacheKey = this.getCacheKey(name); const cached = PromptScorerFactory.cache.get(cacheKey); if (cached) { - return this.createFromModel(cached, name); + return this._create(cached, name); } try { @@ -51,34 +52,18 @@ export class PromptScorerFactory { } PromptScorerFactory.cache.set(cacheKey, scorer); - return this.createFromModel(scorer, name); + return this._create(scorer, name); } catch (error) { - if (error instanceof Error) { - throw error; - } - throw new Error(`Failed to fetch prompt scorer '${name}': ${error}`); - } - } - - create(config: PromptScorerConfig): PromptScorer { - if (!config.name) { - throw new Error("Name is required"); - } - if (!config.prompt) { - throw new Error("Prompt is required"); + Logger.error(`Failed to fetch prompt scorer '${name}': ${error}`); + return null; } - - return new PromptScorer({ - ...config, - isTrace: this.isTrace, - }); } - private createFromModel(model: APIPromptScorer, name: string): PromptScorer { + private _create(scorer: APIPromptScorer, name: string): PromptScorer { let options: Record | undefined; - if (model.options && typeof model.options === "object") { + if (scorer.options && typeof scorer.options === "object") { options = {}; - for (const [key, value] of Object.entries(model.options)) { + for (const [key, value] of Object.entries(scorer.options)) { if (typeof value === "number") { options[key] = value; } @@ -87,11 +72,11 @@ export class PromptScorerFactory { return new PromptScorer({ name, - prompt: model.prompt, - threshold: model.threshold, + prompt: scorer.prompt, + threshold: scorer.threshold, options: options ?? {}, - model: model.model ?? JUDGMENT_DEFAULT_GPT_MODEL, - description: model.description ?? "", + model: scorer.model ?? JUDGMENT_DEFAULT_GPT_MODEL, + description: scorer.description ?? "", isTrace: this.isTrace, }); } diff --git a/src/tracer/BaseTracer.test.ts b/src/tracer/BaseTracer.test.ts new file mode 100644 index 0000000..bac0809 --- /dev/null +++ b/src/tracer/BaseTracer.test.ts @@ -0,0 +1,536 @@ +/* eslint-disable @typescript-eslint/no-empty-function */ +import { context, SpanStatusCode, trace } from "@opentelemetry/api"; +import { AsyncLocalStorageContextManager } from "@opentelemetry/context-async-hooks"; +import { + BasicTracerProvider, + InMemorySpanExporter, + type ReadableSpan, + SimpleSpanProcessor, +} from "@opentelemetry/sdk-trace-base"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { JudgmentApiClient } from "../internal/api"; +import { BaseTracer } from "./BaseTracer"; + +class TestTracer extends BaseTracer { + initialize(): Promise { + return Promise.resolve(); + } + shutdown(): Promise { + return Promise.resolve(); + } +} + +describe("BaseTracer - Span Linking and Context Propagation", () => { + let tracer: TestTracer; + let mockApiClient: JudgmentApiClient; + let provider: BasicTracerProvider; + let exporter: InMemorySpanExporter; + + beforeEach(() => { + const contextManager = new AsyncLocalStorageContextManager(); + contextManager.enable(); + context.setGlobalContextManager(contextManager); + + exporter = new InMemorySpanExporter(); + provider = new BasicTracerProvider({ + spanProcessors: [new SimpleSpanProcessor(exporter)], + }); + trace.setGlobalTracerProvider(provider); + + mockApiClient = { + getOrganizationId: () => "test-org", + getBaseUrl: () => "https://test.example.com", + getApiKey: () => "test-key", + } as JudgmentApiClient; + + tracer = Reflect.construct(TestTracer, [ + "test-project", + false, + mockApiClient, + JSON.stringify, + ]) as TestTracer; + + tracer.getTracer = () => provider.getTracer(BaseTracer.TRACER_NAME); + }); + + afterEach(async () => { + await provider.forceFlush(); + exporter.reset(); + context.disable(); + }); + + async function getSpans(): Promise { + await provider.forceFlush(); + return exporter.getFinishedSpans(); + } + + async function findSpanByName( + name: string + ): Promise { + const spans = await getSpans(); + return spans.find((s) => s.name === name); + } + + describe("span() - manual spans without auto-linking", () => { + test("simple parent-child with startActiveSpan", async () => { + const testTracer = provider.getTracer("test"); + + testTracer.startActiveSpan("parent", (parentSpan) => { + testTracer.startActiveSpan("child", (childSpan) => { + childSpan.end(); + }); + parentSpan.end(); + }); + + const spans = await getSpans(); + const parent = spans.find((s) => s.name === "parent"); + const child = spans.find((s) => s.name === "child"); + + expect(parent).toBeDefined(); + expect(child).toBeDefined(); + expect(child?.spanContext().traceId).toBe(parent?.spanContext().traceId); + expect(child?.parentSpanContext?.spanId).toBe( + parent?.spanContext().spanId + ); + }); + + test("manual span created inside with() DOES auto-link", async () => { + tracer.with("parent", () => { + const manualSpan = tracer.span("manual-child"); + manualSpan.end(); + }); + + const spans = await getSpans(); + expect(spans.length).toBe(2); + + const parent = await findSpanByName("parent"); + const child = await findSpanByName("manual-child"); + + expect(parent).toBeDefined(); + expect(child).toBeDefined(); + expect(child?.parentSpanContext?.spanId).toBe( + parent?.spanContext().spanId + ); + }); + + test("manual span links to parent when wrapped in context.with", async () => { + const manualParent = tracer.span("manual-parent"); + + context.with(trace.setSpan(context.active(), manualParent), () => { + tracer.with("auto-child", () => {}); + }); + + manualParent.end(); + + const spans = await getSpans(); + expect(spans.length).toBe(2); + + const parent = await findSpanByName("manual-parent"); + const child = await findSpanByName("auto-child"); + + expect(parent).toBeDefined(); + expect(child).toBeDefined(); + expect(child?.parentSpanContext?.spanId).toBe( + parent?.spanContext().spanId + ); + }); + + test("nested manual spans with explicit context management properly link", async () => { + const span1 = tracer.span("manual-1"); + + context.with(trace.setSpan(context.active(), span1), () => { + const span2 = tracer.span("manual-2"); + + context.with(trace.setSpan(context.active(), span2), () => { + const span3 = tracer.span("manual-3"); + span3.end(); + }); + + span2.end(); + }); + + span1.end(); + + const spans = await getSpans(); + expect(spans.length).toBe(3); + + const s1 = await findSpanByName("manual-1"); + const s2 = await findSpanByName("manual-2"); + const s3 = await findSpanByName("manual-3"); + + expect(s1?.parentSpanContext?.spanId).toBeUndefined(); + expect(s2?.parentSpanContext?.spanId).toBe(s1?.spanContext().spanId); + expect(s3?.parentSpanContext?.spanId).toBe(s2?.spanContext().spanId); + }); + }); + + describe("with() - automatic span lifecycle and linking", () => { + test("creates root span when no parent context exists", async () => { + tracer.with("root", () => {}); + + const spans = await getSpans(); + expect(spans.length).toBe(1); + expect(spans[0].name).toBe("root"); + expect(spans[0].parentSpanContext?.spanId).toBeUndefined(); + }); + + test("child span auto-links to parent", async () => { + tracer.with("parent", () => { + tracer.with("child", () => {}); + }); + + const spans = await getSpans(); + expect(spans.length).toBe(2); + + const parent = await findSpanByName("parent"); + const child = await findSpanByName("child"); + + expect(child?.parentSpanContext?.spanId).toBe( + parent?.spanContext().spanId + ); + }); + + test("deeply nested spans maintain correct hierarchy", async () => { + tracer.with("level-1", () => { + tracer.with("level-2", () => { + tracer.with("level-3", () => {}); + }); + }); + + const spans = await getSpans(); + expect(spans.length).toBe(3); + + const l1 = await findSpanByName("level-1"); + const l2 = await findSpanByName("level-2"); + const l3 = await findSpanByName("level-3"); + + expect(l1?.parentSpanContext?.spanId).toBeUndefined(); + expect(l2?.parentSpanContext?.spanId).toBe(l1?.spanContext().spanId); + expect(l3?.parentSpanContext?.spanId).toBe(l2?.spanContext().spanId); + }); + + test("sibling spans share same parent", async () => { + tracer.with("parent", () => { + tracer.with("child-1", () => {}); + tracer.with("child-2", () => {}); + }); + + const spans = await getSpans(); + expect(spans.length).toBe(3); + + const parent = await findSpanByName("parent"); + const child1 = await findSpanByName("child-1"); + const child2 = await findSpanByName("child-2"); + + expect(child1?.parentSpanContext?.spanId).toBe( + parent?.spanContext().spanId + ); + expect(child2?.parentSpanContext?.spanId).toBe( + parent?.spanContext().spanId + ); + }); + + test("async operations maintain context correctly", async () => { + await tracer.with("async-parent", async () => { + await tracer.with("async-child", async () => { + await Promise.resolve(); + }); + }); + + const spans = await getSpans(); + expect(spans.length).toBe(2); + + const parent = await findSpanByName("async-parent"); + const child = await findSpanByName("async-child"); + + expect(child?.parentSpanContext?.spanId).toBe( + parent?.spanContext().spanId + ); + }); + + test("parallel async spans maintain correct context", async () => { + await tracer.with("parent", async () => { + await Promise.all([ + tracer.with("parallel-1", async () => { + await Promise.resolve(); + }), + tracer.with("parallel-2", async () => { + await Promise.resolve(); + }), + ]); + }); + + const spans = await getSpans(); + expect(spans.length).toBe(3); + + const parent = await findSpanByName("parent"); + const p1 = await findSpanByName("parallel-1"); + const p2 = await findSpanByName("parallel-2"); + + expect(p1?.parentSpanContext?.spanId).toBe(parent?.spanContext().spanId); + expect(p2?.parentSpanContext?.spanId).toBe(parent?.spanContext().spanId); + }); + + test("records error status and exception on failure", async () => { + try { + void tracer.with("error-span", () => { + throw new Error("test error"); + }); + } catch { + // Expected error + } + + const spans = await getSpans(); + expect(spans.length).toBe(1); + expect(spans[0].status.code).toBe(SpanStatusCode.ERROR); + expect(spans[0].events.length).toBeGreaterThan(0); + expect(spans[0].events[0].name).toBe("exception"); + }); + + test("async error handling maintains context", async () => { + try { + await tracer.with("parent", async () => { + await tracer.with("child-error", async () => { + await Promise.resolve(); + throw new Error("child error"); + }); + }); + } catch { + // Expected error + } + + const spans = await getSpans(); + expect(spans.length).toBe(2); + + const parent = await findSpanByName("parent"); + const child = await findSpanByName("child-error"); + + expect(child?.parentSpanContext?.spanId).toBe( + parent?.spanContext().spanId + ); + expect(child?.status.code).toBe(SpanStatusCode.ERROR); + }); + }); + + describe("observe() - function wrapping with auto-linking", () => { + test("creates root span when called without parent context", async () => { + const func = () => 42; + const wrapped = tracer.observe(func, "span", "observed-root"); + + wrapped(); + + const spans = await getSpans(); + expect(spans.length).toBe(1); + expect(spans[0].name).toBe("observed-root"); + expect(spans[0].parentSpanContext?.spanId).toBeUndefined(); + }); + + test("observed function auto-links to active parent", async () => { + const func = () => "result"; + const wrapped = tracer.observe(func, "span", "observed-child"); + + tracer.with("parent", () => { + wrapped(); + }); + + const spans = await getSpans(); + expect(spans.length).toBe(2); + + const parent = await findSpanByName("parent"); + const child = await findSpanByName("observed-child"); + + expect(child?.parentSpanContext?.spanId).toBe( + parent?.spanContext().spanId + ); + }); + + test("nested observed calls maintain hierarchy", async () => { + const inner = (x: number) => x * 2; + const outer = (x: number) => wrappedInner(x) + 1; + + const wrappedInner = tracer.observe(inner, "span", "inner"); + const wrappedOuter = tracer.observe(outer, "span", "outer"); + + wrappedOuter(5); + + const spans = await getSpans(); + expect(spans.length).toBe(2); + + const outerSpan = await findSpanByName("outer"); + const innerSpan = await findSpanByName("inner"); + + expect(innerSpan?.parentSpanContext?.spanId).toBe( + outerSpan?.spanContext().spanId + ); + }); + + test("async observed functions maintain context", async () => { + const asyncFunc = async (n: number) => { + await Promise.resolve(); + return n * 2; + }; + + const wrapped = tracer.observe(asyncFunc, "span", "async-observed"); + + await tracer.with("parent", async () => { + await wrapped(5); + }); + + const spans = await getSpans(); + expect(spans.length).toBe(2); + + const parent = await findSpanByName("parent"); + const child = await findSpanByName("async-observed"); + + expect(child?.parentSpanContext?.spanId).toBe( + parent?.spanContext().spanId + ); + }); + + test("multiple invocations create separate spans with same parent", async () => { + const func = () => Math.random(); + const wrapped = tracer.observe(func, "span", "multi-call"); + + tracer.with("parent", () => { + wrapped(); + wrapped(); + wrapped(); + }); + + const spans = await getSpans(); + expect(spans.length).toBe(4); + + const parent = await findSpanByName("parent"); + const children = spans.filter((s) => s.name === "multi-call"); + + expect(children.length).toBe(3); + children.forEach((child) => { + expect(child.parentSpanContext?.spanId).toBe( + parent?.spanContext().spanId + ); + }); + }); + }); + + describe("complex integration scenarios", () => { + test("span() + with() + observe() mixed correctly", async () => { + const manualSpan = tracer.span("manual-root"); + + context.with(trace.setSpan(context.active(), manualSpan), () => { + tracer.with("with-child", () => { + const func = () => "result"; + const wrapped = tracer.observe(func, "span", "observe-grandchild"); + wrapped(); + }); + }); + + manualSpan.end(); + + const spans = await getSpans(); + expect(spans.length).toBe(3); + + const root = await findSpanByName("manual-root"); + const child = await findSpanByName("with-child"); + const grandchild = await findSpanByName("observe-grandchild"); + + expect(child?.parentSpanContext?.spanId).toBe(root?.spanContext().spanId); + expect(grandchild?.parentSpanContext?.spanId).toBe( + child?.spanContext().spanId + ); + }); + + test("with() inside observe() inside with() maintains hierarchy", async () => { + const func = () => { + return tracer.with("inner-with", () => "nested"); + }; + + const wrapped = tracer.observe(func, "span", "middle-observe"); + + tracer.with("outer-with", () => { + wrapped(); + }); + + const spans = await getSpans(); + expect(spans.length).toBe(3); + + const outer = await findSpanByName("outer-with"); + const middle = await findSpanByName("middle-observe"); + const inner = await findSpanByName("inner-with"); + + expect(middle?.parentSpanContext?.spanId).toBe( + outer?.spanContext().spanId + ); + expect(inner?.parentSpanContext?.spanId).toBe( + middle?.spanContext().spanId + ); + }); + + test("error in nested context maintains span hierarchy", async () => { + try { + await tracer.with("root", async () => { + await tracer.with("middle", async () => { + await tracer.with("error-leaf", async () => { + await Promise.resolve(); + throw new Error("nested error"); + }); + }); + }); + } catch { + // Expected error + } + + const spans = await getSpans(); + expect(spans.length).toBe(3); + + const root = await findSpanByName("root"); + const middle = await findSpanByName("middle"); + const leaf = await findSpanByName("error-leaf"); + + expect(middle?.parentSpanContext?.spanId).toBe( + root?.spanContext().spanId + ); + expect(leaf?.parentSpanContext?.spanId).toBe( + middle?.spanContext().spanId + ); + expect(leaf?.status.code).toBe(SpanStatusCode.ERROR); + }); + }); + + describe("span attributes and metadata", () => { + test("with() allows setting custom attributes", async () => { + tracer.with("custom-attrs", (span) => { + span.setAttribute("custom.key", "value"); + span.setAttribute("custom.number", 42); + }); + + await getSpans(); + const span = await findSpanByName("custom-attrs"); + + expect(span?.attributes["custom.key"]).toBe("value"); + expect(span?.attributes["custom.number"]).toBe(42); + }); + + test("observe() captures input and output", async () => { + const func = (a: number, b: number) => a + b; + const wrapped = tracer.observe(func, "span", "math-add"); + + wrapped(5, 10); + + const span = await findSpanByName("math-add"); + + expect(span?.attributes["judgment.input"]).toBeDefined(); + expect(span?.attributes["judgment.output"]).toBe("15"); + }); + + test("observe() sets span kind attribute", async () => { + const func = () => "llm response"; + const wrapped = tracer.observe(func, "llm", "llm-call"); + + wrapped(); + + const span = await findSpanByName("llm-call"); + + expect(span?.attributes["judgment.span_kind"]).toBe("llm"); + }); + }); +}); diff --git a/src/tracer/BaseTracer.ts b/src/tracer/BaseTracer.ts index 97ae0d1..ac0ba8c 100644 --- a/src/tracer/BaseTracer.ts +++ b/src/tracer/BaseTracer.ts @@ -21,6 +21,8 @@ import { AttributeKeys } from "../judgmentAttributeKeys"; import { BaseScorer } from "../scorers/BaseScorer"; import { Logger } from "../utils/logger"; import { JudgmentSpanExporter, NoOpSpanExporter } from "./exporters"; +import { JudgmentSpanProcessor } from "./processors/JudgmentSpanProcessor"; +import { NoOpSpanProcessor } from "./processors/NoOpJudgmentSpanProcessor"; export type Serializer = (obj: unknown) => string; @@ -39,7 +41,7 @@ export abstract class BaseTracer { enableEvaluation: boolean, apiClient: JudgmentApiClient, serializer: Serializer, - jsonEncoder: (obj: unknown) => string = JSON.stringify, + jsonEncoder: (obj: unknown) => string = JSON.stringify ) { this.projectName = projectName; this.enableEvaluation = enableEvaluation; @@ -57,7 +59,7 @@ export abstract class BaseTracer { Logger.error( `Failed to resolve project ${this.projectName}, ` + `please create it first at https://app.judgmentlabs.ai/org/${this.apiClient.getOrganizationId()}/projects. ` + - "Skipping Judgment export.", + "Skipping Judgment export." ); this.projectId = null; } @@ -68,14 +70,29 @@ export abstract class BaseTracer { getSpanExporter(): SpanExporter { if (this.projectId !== null) { - return this.createJudgmentSpanExporter(this.projectId); + return new JudgmentSpanExporter( + this.buildEndpoint(this.apiClient.getBaseUrl()), + this.apiClient.getApiKey(), + this.apiClient.getOrganizationId(), + this.projectId + ); } Logger.error( - "Project not resolved; cannot create exporter, returning NoOpSpanExporter", + "Project not resolved; cannot create exporter, returning NoOpSpanExporter" ); return new NoOpSpanExporter(); } + getSpanProcessor(): JudgmentSpanProcessor { + if (this.projectId !== null) { + return new JudgmentSpanProcessor(this, this.getSpanExporter()); + } + Logger.error( + "Project not resolved; cannot create processor, returning NoOpSpanProcessor" + ); + return new NoOpSpanProcessor(this); + } + getTracer(): Tracer { return trace.getTracer(BaseTracer.TRACER_NAME); } @@ -153,14 +170,14 @@ export abstract class BaseTracer { "asyncEvaluate", traceId, spanId, - scorer.getName(), + scorer.getName() ); const evaluationRun = this.createEvaluationRun( scorer, example, traceId, - spanId, + spanId ); this.enqueueEvaluation(evaluationRun).catch((e: unknown) => { @@ -188,19 +205,19 @@ export abstract class BaseTracer { "asyncTraceEvaluate", traceId, spanId, - scorer.getName(), + scorer.getName() ); const evaluationRun = this.createTraceEvaluationRun( scorer, traceId, - spanId, + spanId ); try { const traceEvalJson = JSON.stringify(evaluationRun); currentSpan.setAttribute( AttributeKeys.JUDGMENT_PENDING_TRACE_EVAL, - traceEvalJson, + traceEvalJson ); } catch (e) { Logger.error(`Failed to serialize trace evaluation: ${e}`); @@ -208,12 +225,71 @@ export abstract class BaseTracer { }); } - span( + /** + * Creates a new span for manual instrumentation. + * + * WARNING: You probably don't want this method. Use with() instead for most cases. + * + * This returns a span that is NOT active in the context, meaning child operations + * will NOT automatically link to it as a parent. You must manually manage the span + * lifecycle including calling span.end() and handling errors. + * + * To make the span active so child operations auto-link to it: + * ``` + * import { context, trace } from "@opentelemetry/api"; + * const span = tracer.span("my-span"); + * context.with(trace.setSpan(context.active(), span), () => { + * // span is active here, child ops auto-link + * }); + * span.end(); + * ``` + * + * Consider using with() instead, which handles context and lifecycle automatically. + * + * @param spanName - The name of the span + * @param options - Optional span configuration (attributes, links, etc) + * @param ctx - Optional context to use as parent. Defaults to the active context + * @returns A Span object that must be manually ended + */ + span(spanName: string, options?: SpanOptions, ctx?: Context): Span { + const tracer = this.getTracer(); + return tracer.startSpan(spanName, options ?? {}, ctx ?? context.active()); + } + + /** + * Wraps a function execution in a span with automatic lifecycle management. + * + * Automatically handles span creation, ending, and error recording. The span is passed to + * your callback function, allowing you to add custom attributes or access span properties. + * The span will be ended automatically when the function completes or throws an error. + * + * Supports both synchronous and asynchronous functions. For async functions, the span + * remains active until the Promise resolves or rejects. + * + * @param spanName - The name of the span + * @param callableFunc - The function to execute within the span. Receives the span as a parameter + * @param options - Optional span configuration + * @param ctx - Optional context to use as parent. Defaults to the active context + * @returns The return value of callableFunc (Promise if async, direct value if sync) + */ + with( + spanName: string, + callableFunc: (span: Span) => Promise, + options?: SpanOptions, + ctx?: Context + ): Promise; + with( spanName: string, - callableFunc: () => T, + callableFunc: (span: Span) => T, options?: SpanOptions, - ctx?: Context, - ): T { + ctx?: Context + ): T; + with( + spanName: string, + callableFunc: (span: Span) => T | Promise, + options?: SpanOptions, + ctx?: Context + ): T | Promise { const tracer = this.getTracer(); return tracer.startActiveSpan( spanName, @@ -221,25 +297,58 @@ export abstract class BaseTracer { ctx ?? context.active(), (span) => { try { - return callableFunc(); + const result = callableFunc(span); + + if (result instanceof Promise) { + return result + .catch((err: unknown) => { + span.recordException(err as Error); + span.setStatus({ + code: SpanStatusCode.ERROR, + message: String(err), + }); + throw err; + }) + .finally(() => { + span.end(); + }); + } + + span.end(); + return result; } catch (e) { span.setStatus({ code: SpanStatusCode.ERROR }); span.recordException(e as Error); - throw e; - } finally { span.end(); + throw e; } - }, + } ); } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - observe( + /** + * Wraps a function to automatically trace all its invocations. + * + * Returns a new function that, when called, will automatically create a span, capture input + * arguments, execute the original function, capture the output, and handle errors. The span + * is automatically ended after the function completes. + * + * Supports both synchronous and asynchronous functions. Input arguments are serialized and + * stored as span attributes, and the return value is captured as output. + * + * @param func - The function to wrap with automatic tracing + * @param spanType - The type of span to create (default: "span"). Common values: "span", "llm", "tool" + * @param spanName - Optional custom name for the span. Defaults to the function name + * @param options - Optional span configuration + * @param ctx - Optional context to use as parent. Defaults to the active context + * @returns A wrapped version of the function that creates spans on each invocation + */ + observe( func: (...args: TArgs) => TResult, spanType = "span", spanName?: string | null, options?: SpanOptions, - ctx?: Context, + ctx?: Context ): (...args: TArgs) => TResult { const tracer = this.getTracer(); const name = spanName ?? func.name; @@ -257,11 +366,11 @@ export abstract class BaseTracer { try { const inputData = this.formatInputs( func as (...args: unknown[]) => unknown, - args as unknown[], + args as unknown[] ); span.setAttribute( AttributeKeys.JUDGMENT_INPUT, - this.serializer(inputData), + this.serializer(inputData) ); const result = func(...args); @@ -271,7 +380,7 @@ export abstract class BaseTracer { .then((res: TResult) => { span.setAttribute( AttributeKeys.JUDGMENT_OUTPUT, - this.serializer(res), + this.serializer(res) ); return res; }) @@ -290,7 +399,7 @@ export abstract class BaseTracer { span.setAttribute( AttributeKeys.JUDGMENT_OUTPUT, - this.serializer(result), + this.serializer(result) ); span.end(); return result; @@ -300,7 +409,7 @@ export abstract class BaseTracer { span.end(); throw e; } - }, + } ); }; } @@ -319,7 +428,7 @@ export abstract class BaseTracer { return projectId; } catch (error) { throw new Error( - `Failed to resolve project ID: ${error instanceof Error ? error.message : String(error)}`, + `Failed to resolve project ID: ${error instanceof Error ? error.message : String(error)}` ); } } @@ -330,15 +439,6 @@ export abstract class BaseTracer { : baseUrl + "/otel/v1/traces"; } - private createJudgmentSpanExporter(projectId: string): SpanExporter { - return new JudgmentSpanExporter( - this.buildEndpoint(this.apiClient.getBaseUrl()), - this.apiClient.getApiKey(), - this.apiClient.getOrganizationId(), - projectId, - ); - } - private generateRunId(prefix: string, spanId?: string | null): string { return prefix + (spanId ?? Date.now().toString()); } @@ -347,7 +447,7 @@ export abstract class BaseTracer { scorer: BaseScorer, example: Example, traceId: string, - spanId: string, + spanId: string ): ExampleEvaluationRun { const runId = this.generateRunId("async_evaluate_", spanId); @@ -365,7 +465,7 @@ export abstract class BaseTracer { private createTraceEvaluationRun( scorer: BaseScorer, traceId: string, - spanId: string, + spanId: string ): TraceEvaluationRun { const evalName = this.generateRunId("async_trace_evaluate_", spanId); @@ -380,7 +480,7 @@ export abstract class BaseTracer { } private async enqueueEvaluation( - evaluationRun: ExampleEvaluationRun, + evaluationRun: ExampleEvaluationRun ): Promise { try { await this.apiClient.addToRunEvalQueueExamples(evaluationRun); @@ -417,10 +517,10 @@ export abstract class BaseTracer { method: string, traceId: string, spanId: string, - scorerName: string, + scorerName: string ): void { Logger.info( - `${method}: project=${this.projectName}, traceId=${traceId}, spanId=${spanId}, scorer=${scorerName}`, + `${method}: project=${this.projectName}, traceId=${traceId}, spanId=${spanId}, scorer=${scorerName}` ); } @@ -438,7 +538,7 @@ export abstract class BaseTracer { private formatInputs( f: (...args: unknown[]) => unknown, - args: unknown[], + args: unknown[] ): Record { try { const funcStr = f.toString(); diff --git a/src/tracer/BrowserTracer.ts b/src/tracer/BrowserTracer.ts index f79b9c3..1ced8c7 100644 --- a/src/tracer/BrowserTracer.ts +++ b/src/tracer/BrowserTracer.ts @@ -1,8 +1,6 @@ import { resourceFromAttributes } from "@opentelemetry/resources"; -import { - BatchSpanProcessor, - WebTracerProvider, -} from "@opentelemetry/sdk-trace-web"; +import type { Sampler } from "@opentelemetry/sdk-trace-base"; +import { WebTracerProvider } from "@opentelemetry/sdk-trace-web"; import { JudgmentApiClient } from "../internal/api"; import { Logger } from "../utils/logger"; import { VERSION } from "../version"; @@ -14,17 +12,22 @@ export interface BrowserTracerConfig { enableMonitoring?: boolean; serializer?: Serializer; resourceAttributes?: Record; + sampler?: Sampler; initialize?: boolean; } interface InternalBrowserTracerConfig - extends Required> { + extends Required< + Omit + > { resourceAttributes: Record; + sampler?: Sampler; } export class BrowserTracer extends BaseTracer { private webTracerProvider: WebTracerProvider | null = null; private resourceAttributes: Record; + private sampler?: Sampler; private constructor( projectName: string, @@ -32,9 +35,11 @@ export class BrowserTracer extends BaseTracer { apiClient: JudgmentApiClient, serializer: Serializer, resourceAttributes: Record, + sampler?: Sampler, ) { super(projectName, enableEvaluation, apiClient, serializer); this.resourceAttributes = resourceAttributes; + this.sampler = sampler; } static async create( @@ -47,6 +52,7 @@ export class BrowserTracer extends BaseTracer { apiClient, config.serializer, config.resourceAttributes, + config.sampler, ); await tracer.resolveAndSetProjectId(); @@ -72,11 +78,12 @@ export class BrowserTracer extends BaseTracer { ...this.resourceAttributes, }; - const spanExporter = this.getSpanExporter(); + const spanProcessor = this.getSpanProcessor(); this.webTracerProvider = new WebTracerProvider({ resource: resourceFromAttributes(attributes), - spanProcessors: [new BatchSpanProcessor(spanExporter)], + spanProcessors: [spanProcessor], + sampler: this.sampler, }); this.webTracerProvider.register(); diff --git a/src/tracer/BrowserTracerFactory.ts b/src/tracer/BrowserTracerFactory.ts index efb55fd..7786e07 100644 --- a/src/tracer/BrowserTracerFactory.ts +++ b/src/tracer/BrowserTracerFactory.ts @@ -20,6 +20,7 @@ export class BrowserTracerFactory { enableMonitoring: config.enableMonitoring ?? false, serializer: config.serializer ?? JSON.stringify, resourceAttributes: config.resourceAttributes ?? {}, + sampler: config.sampler, initialize: config.initialize ?? true, }, this.client, diff --git a/src/tracer/JudgmentNodeTracerProvider.ts b/src/tracer/JudgmentNodeTracerProvider.ts new file mode 100644 index 0000000..89d4473 --- /dev/null +++ b/src/tracer/JudgmentNodeTracerProvider.ts @@ -0,0 +1,62 @@ +import { Tracer } from "@opentelemetry/api"; +import { + NodeTracerConfig, + NodeTracerProvider, +} from "@opentelemetry/sdk-trace-node"; +import { Logger } from "../utils"; +import { BaseTracer } from "./BaseTracer"; +import { NoOpTracer } from "./NoOpTracer"; + +export interface filterTracerParams { + name: string; + version?: string; + options?: { schemaUrl?: string }; +} + +interface JudgmentNodeTracerProviderConfig extends NodeTracerConfig { + /** + * Filters what tracers are allowed to be created. This is useful when you want to disable any instrumentation / control instrumentation + * that is automatically created by auto-instrumentations or other libraries. + * + * If set to false, the caller will receive a NoOpTracer. + * + * @param params The parameters of the tracer to check if it should be allowed. + * @returns Whether the tracer should be allowed. + */ + filterTracer?: (params: filterTracerParams) => boolean; +} + +export class JudgmentNodeTracerProvider extends NodeTracerProvider { + private readonly filterTracer: (params: filterTracerParams) => boolean; + + constructor(config: JudgmentNodeTracerProviderConfig) { + super(config); + this.filterTracer = config.filterTracer ?? (() => true); + } + + getTracer( + name: string, + version?: string, + options?: { schemaUrl?: string }, + ): Tracer { + if (name === BaseTracer.TRACER_NAME) { + return super.getTracer(name, version, options); + } + + try { + if (this.filterTracer({ name, version, options })) { + return super.getTracer(name, version, options); + } else { + Logger.debug( + `[JudgmentNodeTracerProvider] Returning NoOpTracer for tracer ${name} as it is disallowed by the filterTracer callback.`, + ); + return new NoOpTracer(); + } + } catch (error: unknown) { + Logger.error( + `[JudgmentNodeTracerProvider] Failed to filter tracer ${name}: ${error}.`, + ); + return super.getTracer(name, version, options); + } + } +} diff --git a/src/tracer/NoOpSpan.ts b/src/tracer/NoOpSpan.ts new file mode 100644 index 0000000..ad9f66b --- /dev/null +++ b/src/tracer/NoOpSpan.ts @@ -0,0 +1,61 @@ +import { + INVALID_SPAN_CONTEXT, + type Attributes, + type AttributeValue, + type Exception, + type Link, + type Span, + type SpanContext, + type SpanStatus, + type TimeInput, +} from "@opentelemetry/api"; + +export class NoOpSpan implements Span { + setAttribute(_key: string, _value: AttributeValue): this { + return this; + } + + setAttributes(_attributes: Attributes): this { + return this; + } + + addEvent( + _name: string, + _attributesOrStartTime?: Attributes | TimeInput, + _startTime?: TimeInput, + ): this { + return this; + } + + addLink(_link: Link): this { + return this; + } + + addLinks(_links: Link[]): this { + return this; + } + + setStatus(_status: SpanStatus): this { + return this; + } + + updateName(_name: string): this { + return this; + } + + end(_endTime?: TimeInput): void { + return; + } + + isRecording(): boolean { + return false; + } + + recordException(_exception: Exception, _time?: TimeInput): void { + return; + } + + spanContext(): SpanContext { + return INVALID_SPAN_CONTEXT; + } +} diff --git a/src/tracer/NoOpTracer.ts b/src/tracer/NoOpTracer.ts new file mode 100644 index 0000000..612445d --- /dev/null +++ b/src/tracer/NoOpTracer.ts @@ -0,0 +1,38 @@ +import { + type Context, + type Span, + type SpanOptions, + type Tracer, +} from "@opentelemetry/api"; +import { NoOpSpan } from "./NoOpSpan"; + +export class NoOpTracer implements Tracer { + private readonly noopSpan = new NoOpSpan(); + + startSpan(_name: string, _options?: SpanOptions, _context?: Context): Span { + return this.noopSpan; + } + + startActiveSpan unknown>( + _name: string, + fn: F, + ): ReturnType; + startActiveSpan unknown>( + _name: string, + _options: SpanOptions, + fn: F, + ): ReturnType; + startActiveSpan unknown>( + _name: string, + _options: SpanOptions, + _context: Context, + fn: F, + ): ReturnType; + startActiveSpan unknown>( + _name: string, + ...args: unknown[] + ): ReturnType { + const fn = args[args.length - 1] as F; + return fn(this.noopSpan) as ReturnType; + } +} diff --git a/src/tracer/NodeTracer.ts b/src/tracer/NodeTracer.ts index 6b34368..9b93754 100644 --- a/src/tracer/NodeTracer.ts +++ b/src/tracer/NodeTracer.ts @@ -1,10 +1,24 @@ -import type { Instrumentation } from "@opentelemetry/instrumentation"; -import { resourceFromAttributes } from "@opentelemetry/resources"; -import { NodeSDK } from "@opentelemetry/sdk-node"; +import { context } from "@opentelemetry/api"; +import { AsyncLocalStorageContextManager } from "@opentelemetry/context-async-hooks"; +import { + registerInstrumentations, + type Instrumentation, +} from "@opentelemetry/instrumentation"; +import { + defaultResource, + resourceFromAttributes, +} from "@opentelemetry/resources"; +import { type Sampler } from "@opentelemetry/sdk-trace-base"; +import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node"; import { JudgmentApiClient } from "../internal/api"; +import { ResourceKeys } from "../judgmentAttributeKeys"; import { Logger } from "../utils/logger"; import { VERSION } from "../version"; import { BaseTracer, type Serializer } from "./BaseTracer"; +import { + JudgmentNodeTracerProvider, + filterTracerParams, +} from "./JudgmentNodeTracerProvider"; export interface NodeTracerConfig { projectName: string; @@ -13,21 +27,31 @@ export interface NodeTracerConfig { serializer?: Serializer; resourceAttributes?: Record; instrumentations?: Instrumentation[]; + sampler?: Sampler; initialize?: boolean; + filterTracer?: (params: filterTracerParams) => boolean; } interface InternalNodeTracerConfig extends Required< - Omit + Omit< + NodeTracerConfig, + "resourceAttributes" | "instrumentations" | "sampler" | "filterTracer" + > > { resourceAttributes: Record; instrumentations: Instrumentation[]; + sampler?: Sampler; + filterTracer?: (params: filterTracerParams) => boolean; } export class NodeTracer extends BaseTracer { - private nodeSDK: NodeSDK | null = null; + private tracerProvider: NodeTracerProvider | null = null; + private contextManager: AsyncLocalStorageContextManager | null = null; private resourceAttributes: Record; private instrumentations: Instrumentation[]; + private sampler?: Sampler; + private filterTracer?: (params: filterTracerParams) => boolean; private constructor( projectName: string, @@ -36,10 +60,14 @@ export class NodeTracer extends BaseTracer { serializer: Serializer, resourceAttributes: Record, instrumentations: Instrumentation[], + sampler?: Sampler, + filterTracer?: (params: filterTracerParams) => boolean, ) { super(projectName, enableEvaluation, apiClient, serializer); this.resourceAttributes = resourceAttributes; this.instrumentations = instrumentations; + this.sampler = sampler; + this.filterTracer = filterTracer; } static async create( @@ -53,6 +81,8 @@ export class NodeTracer extends BaseTracer { config.serializer, config.resourceAttributes, config.instrumentations, + config.sampler, + config.filterTracer, ); await tracer.resolveAndSetProjectId(); @@ -66,27 +96,39 @@ export class NodeTracer extends BaseTracer { /* eslint-disable @typescript-eslint/require-await */ async initialize(): Promise { - if (this.nodeSDK !== null) { + if (this.tracerProvider !== null) { Logger.warn("NodeTracer already initialized"); return; } try { - const attributes = { - "service.name": this.projectName, - "telemetry.sdk.version": VERSION, - ...this.resourceAttributes, - }; - - const spanExporter = this.getSpanExporter(); - - this.nodeSDK = new NodeSDK({ - resource: resourceFromAttributes(attributes), - traceExporter: spanExporter, - instrumentations: this.instrumentations, + this.contextManager = new AsyncLocalStorageContextManager(); + this.contextManager.enable(); + context.setGlobalContextManager(this.contextManager); + + const resource = defaultResource().merge( + resourceFromAttributes({ + [ResourceKeys.SERVICE_NAME]: this.projectName, + [ResourceKeys.TELEMETRY_SDK_NAME]: BaseTracer.TRACER_NAME, + [ResourceKeys.TELEMETRY_SDK_VERSION]: VERSION, + ...this.resourceAttributes, + }), + ); + + this.tracerProvider = new JudgmentNodeTracerProvider({ + resource, + sampler: this.sampler, + spanProcessors: [this.getSpanProcessor()], + filterTracer: this.filterTracer, }); - this.nodeSDK.start(); + this.tracerProvider.register(); + + if (this.instrumentations.length > 0) { + registerInstrumentations({ + instrumentations: this.instrumentations, + }); + } Logger.info("NodeTracer initialized successfully"); } catch (error) { throw new Error( @@ -96,13 +138,19 @@ export class NodeTracer extends BaseTracer { } async shutdown(): Promise { - if (!this.nodeSDK) { + if (!this.tracerProvider) { Logger.warn("NodeTracer not initialized, skipping shutdown"); return; } try { - await this.nodeSDK.shutdown(); - this.nodeSDK = null; + await this.tracerProvider.shutdown(); + this.tracerProvider = null; + + if (this.contextManager) { + this.contextManager.disable(); + this.contextManager = null; + } + Logger.info("NodeTracer shut down successfully"); } catch (error) { Logger.error(`Failed to shutdown NodeTracer: ${error}`); diff --git a/src/tracer/NodeTracerFactory.ts b/src/tracer/NodeTracerFactory.ts index 7ba5bdf..1ab5918 100644 --- a/src/tracer/NodeTracerFactory.ts +++ b/src/tracer/NodeTracerFactory.ts @@ -21,6 +21,8 @@ export class NodeTracerFactory { serializer: config.serializer ?? JSON.stringify, resourceAttributes: config.resourceAttributes ?? {}, instrumentations: config.instrumentations ?? [], + sampler: config.sampler, + filterTracer: config.filterTracer, initialize: config.initialize ?? true, }, this.client, diff --git a/src/tracer/processors/JudgmentSpanProcessor.ts b/src/tracer/processors/JudgmentSpanProcessor.ts new file mode 100644 index 0000000..34b93b7 --- /dev/null +++ b/src/tracer/processors/JudgmentSpanProcessor.ts @@ -0,0 +1,33 @@ +import { + BatchSpanProcessor, + type BatchSpanProcessorBrowserConfig, + type SpanExporter, +} from "@opentelemetry/sdk-trace-base"; +import type { BaseTracer } from "../BaseTracer"; + +export interface JudgmentSpanProcessorConfig { + maxQueueSize?: number; + scheduledDelayMillis?: number; + maxExportBatchSize?: number; + exportTimeoutMillis?: number; +} + +export class JudgmentSpanProcessor extends BatchSpanProcessor { + // The span processor does have custom functionality that at the moment is not ported over and still in discussion. + // Current implementation enforces that tracer will always exist for predicted compatibility in the future. + private tracer: BaseTracer; + constructor( + _tracer: BaseTracer, + exporter: SpanExporter, + config?: JudgmentSpanProcessorConfig, + ) { + const batchConfig: BatchSpanProcessorBrowserConfig = { + maxQueueSize: config?.maxQueueSize, + scheduledDelayMillis: config?.scheduledDelayMillis, + maxExportBatchSize: config?.maxExportBatchSize, + exportTimeoutMillis: config?.exportTimeoutMillis, + }; + super(exporter, batchConfig); + this.tracer = _tracer; + } +} diff --git a/src/tracer/processors/NoOpJudgmentSpanProcessor.ts b/src/tracer/processors/NoOpJudgmentSpanProcessor.ts new file mode 100644 index 0000000..8d8eb03 --- /dev/null +++ b/src/tracer/processors/NoOpJudgmentSpanProcessor.ts @@ -0,0 +1,31 @@ +import { type Context } from "@opentelemetry/api"; +import type { ReadableSpan, Span } from "@opentelemetry/sdk-trace-base"; +import { type BaseTracer } from "../BaseTracer"; +import { NoOpSpanExporter } from "../exporters/NoOpSpanExporter"; +import { JudgmentSpanProcessor } from "./JudgmentSpanProcessor"; + +export class NoOpSpanProcessor extends JudgmentSpanProcessor { + constructor(tracer: BaseTracer) { + super(tracer, new NoOpSpanExporter()); + } + + onStart(_span: Span, _parentContext: Context): void { + return; + } + + onEnd(_span: ReadableSpan): void { + return; + } + + forceFlush(): Promise { + return Promise.resolve(); + } + + shutdown(): Promise { + return Promise.resolve(); + } + + protected onShutdown(): void { + return; + } +} diff --git a/yarn.lock b/yarn.lock index 4f2c9cc..a94e396 100644 --- a/yarn.lock +++ b/yarn.lock @@ -174,7 +174,7 @@ dependencies: "@opentelemetry/api" "^1.3.0" -"@opentelemetry/context-async-hooks@2.2.0": +"@opentelemetry/context-async-hooks@2.2.0", "@opentelemetry/context-async-hooks@^2.2.0": version "2.2.0" resolved "https://registry.npmjs.org/@opentelemetry/context-async-hooks/-/context-async-hooks-2.2.0.tgz" integrity sha512-qRkLWiUEZNAmYapZ7KGS5C4OmBLcP/H2foXeOEaowYCR0wi89fHejrfYfbuLVCMLp/dWZXKvQusdbUEZjERfwQ==