Skip to content
45 changes: 39 additions & 6 deletions packages/core/src/tracing/langchain/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
GEN_AI_USAGE_OUTPUT_TOKENS_ATTRIBUTE,
GEN_AI_USAGE_TOTAL_TOKENS_ATTRIBUTE,
} from '../ai/gen-ai-attributes';
import { isContentMedia, stripInlineMediaFromSingleMessage } from '../ai/mediaStripping';
import { truncateGenAiMessages } from '../ai/messageTruncation';
import { extractSystemInstructions } from '../ai/utils';
import { LANGCHAIN_ORIGIN, ROLE_MAP } from './constants';
Expand Down Expand Up @@ -62,6 +63,38 @@ function asString(v: unknown): string {
}
}

/**
* Converts message content to a string, stripping inline media (base64 images, audio, etc.)
* from multimodal content before stringification so downstream media stripping can't miss it.
*
* @example
* // String content passes through unchanged:
* normalizeContent("Hello") // => "Hello"
*
* // Multimodal array content — media is replaced with "[Blob substitute]" before JSON.stringify:
* normalizeContent([
* { type: "text", text: "What color?" },
* { type: "image_url", image_url: { url: "data:image/png;base64,iVBOR..." } }
* ])
* // => '[{"type":"text","text":"What color?"},{"type":"image_url","image_url":{"url":"[Blob substitute]"}}]'
*
* // Without this, asString() would JSON.stringify the raw array and the base64 blob
* // would end up in span attributes, since downstream stripping only works on objects.
*/
function normalizeContent(v: unknown): string {
if (Array.isArray(v)) {
try {
const stripped = v.map(part =>
part && typeof part === 'object' && isContentMedia(part) ? stripInlineMediaFromSingleMessage(part) : part,
);
return JSON.stringify(stripped);
} catch {
return String(v);
}
}
return asString(v);
}

/**
* Normalizes a single role token to our canonical set.
*
Expand Down Expand Up @@ -123,7 +156,7 @@ export function normalizeLangChainMessages(messages: LangChainMessage[]): Array<
const messageType = maybeGetType.call(message);
return {
role: normalizeMessageRole(messageType),
content: asString(message.content),
content: normalizeContent(message.content),
};
}

Expand All @@ -136,7 +169,7 @@ export function normalizeLangChainMessages(messages: LangChainMessage[]): Array<

return {
role: normalizeMessageRole(role),
content: asString(message.kwargs?.content),
content: normalizeContent(message.kwargs?.content),
};
}

Expand All @@ -145,7 +178,7 @@ export function normalizeLangChainMessages(messages: LangChainMessage[]): Array<
const role = String(message.type).toLowerCase();
return {
role: normalizeMessageRole(role),
content: asString(message.content),
content: normalizeContent(message.content),
};
}

Expand All @@ -154,7 +187,7 @@ export function normalizeLangChainMessages(messages: LangChainMessage[]): Array<
if (message.role) {
return {
role: normalizeMessageRole(String(message.role)),
content: asString(message.content),
content: normalizeContent(message.content),
};
}

Expand All @@ -164,14 +197,14 @@ export function normalizeLangChainMessages(messages: LangChainMessage[]): Array<
if (ctor && ctor !== 'Object') {
return {
role: normalizeMessageRole(normalizeRoleNameFromCtor(ctor)),
content: asString(message.content),
content: normalizeContent(message.content),
};
}

// 6) Fallback: treat as user text
return {
role: 'user',
content: asString(message.content),
content: normalizeContent(message.content),
};
});
}
Expand Down
248 changes: 248 additions & 0 deletions packages/core/test/lib/tracing/langchain-utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
import { describe, expect, it } from 'vitest';
import { GEN_AI_INPUT_MESSAGES_ATTRIBUTE } from '../../../src/tracing/ai/gen-ai-attributes';
import type { LangChainMessage } from '../../../src/tracing/langchain/types';
import { extractChatModelRequestAttributes, normalizeLangChainMessages } from '../../../src/tracing/langchain/utils';

describe('normalizeLangChainMessages', () => {
it('normalizes messages with _getType()', () => {
const messages = [
{
_getType: () => 'human',
content: 'Hello',
},
{
_getType: () => 'ai',
content: 'Hi there!',
},
] as unknown as LangChainMessage[];

const result = normalizeLangChainMessages(messages);
expect(result).toEqual([
{ role: 'user', content: 'Hello' },
{ role: 'assistant', content: 'Hi there!' },
]);
});

it('normalizes messages with type property', () => {
const messages: LangChainMessage[] = [
{ type: 'human', content: 'Hello' },
{ type: 'ai', content: 'Hi!' },
];

const result = normalizeLangChainMessages(messages);
expect(result).toEqual([
{ role: 'user', content: 'Hello' },
{ role: 'assistant', content: 'Hi!' },
]);
});

it('normalizes messages with role property', () => {
const messages: LangChainMessage[] = [
{ role: 'user', content: 'Hello' },
{ role: 'assistant', content: 'Hi!' },
];

const result = normalizeLangChainMessages(messages);
expect(result).toEqual([
{ role: 'user', content: 'Hello' },
{ role: 'assistant', content: 'Hi!' },
]);
});

it('normalizes serialized LangChain format', () => {
const messages: LangChainMessage[] = [
{
lc: 1,
id: ['langchain_core', 'messages', 'HumanMessage'],
kwargs: { content: 'Hello from serialized' },
},
];

const result = normalizeLangChainMessages(messages);
expect(result).toEqual([{ role: 'user', content: 'Hello from serialized' }]);
});

describe('multimodal content media stripping', () => {
const b64Data = `iVBORw0KGgoAAAANSUhEUgAAAAUA${'A'.repeat(200)}`;
const BLOB_SUBSTITUTE = '[Blob substitute]';

it('strips base64 image_url from multimodal array content via _getType()', () => {
const messages = [
{
_getType: () => 'human',
content: [
{ type: 'text', text: 'What color is in this image?' },
{ type: 'image_url', image_url: { url: `data:image/png;base64,${b64Data}` } },
],
},
] as unknown as LangChainMessage[];

const result = normalizeLangChainMessages(messages);
expect(result).toHaveLength(1);
expect(result[0]!.role).toBe('user');

const parsed = JSON.parse(result[0]!.content);
expect(parsed).toHaveLength(2);
expect(parsed[0]).toEqual({ type: 'text', text: 'What color is in this image?' });
expect(parsed[1].image_url.url).toBe(BLOB_SUBSTITUTE);
expect(result[0]!.content).not.toContain(b64Data);
});

it('strips base64 data from Anthropic-style source blocks', () => {
const messages = [
{
_getType: () => 'human',
content: [
{ type: 'text', text: 'Describe this image' },
{
type: 'image',
source: {
type: 'base64',
media_type: 'image/png',
data: b64Data,
},
},
],
},
] as unknown as LangChainMessage[];

const result = normalizeLangChainMessages(messages);
const parsed = JSON.parse(result[0]!.content);
expect(parsed[1].source.data).toBe(BLOB_SUBSTITUTE);
expect(result[0]!.content).not.toContain(b64Data);
});

it('strips base64 from inline_data (Google GenAI style)', () => {
const messages: LangChainMessage[] = [
{
type: 'human',
content: [
{ type: 'text', text: 'Describe' },
{ inlineData: { mimeType: 'image/png', data: b64Data } },
] as unknown as string,
},
];

const result = normalizeLangChainMessages(messages);
const parsed = JSON.parse(result[0]!.content);
expect(parsed[1].inlineData.data).toBe(BLOB_SUBSTITUTE);
expect(result[0]!.content).not.toContain(b64Data);
});

it('strips base64 from input_audio content parts', () => {
const messages = [
{
_getType: () => 'human',
content: [
{ type: 'text', text: 'What do you hear?' },
{ type: 'input_audio', input_audio: { data: b64Data } },
],
},
] as unknown as LangChainMessage[];

const result = normalizeLangChainMessages(messages);
const parsed = JSON.parse(result[0]!.content);
expect(parsed[1].input_audio.data).toBe(BLOB_SUBSTITUTE);
expect(result[0]!.content).not.toContain(b64Data);
});

it('preserves text-only array content without modification', () => {
const messages = [
{
_getType: () => 'human',
content: [
{ type: 'text', text: 'First part' },
{ type: 'text', text: 'Second part' },
],
},
] as unknown as LangChainMessage[];

const result = normalizeLangChainMessages(messages);
const parsed = JSON.parse(result[0]!.content);
expect(parsed).toEqual([
{ type: 'text', text: 'First part' },
{ type: 'text', text: 'Second part' },
]);
});

it('strips media from serialized LangChain format with array content', () => {
const messages: LangChainMessage[] = [
{
lc: 1,
id: ['langchain_core', 'messages', 'HumanMessage'],
kwargs: {
content: [
{ type: 'text', text: 'Describe this' },
{ type: 'image_url', image_url: { url: `data:image/png;base64,${b64Data}` } },
] as unknown as string,
},
},
];

const result = normalizeLangChainMessages(messages);
const parsed = JSON.parse(result[0]!.content);
expect(parsed[1].image_url.url).toBe(BLOB_SUBSTITUTE);
expect(result[0]!.content).not.toContain(b64Data);
});

it('strips media from messages with role property and array content', () => {
const messages: LangChainMessage[] = [
{
role: 'user',
content: [
{ type: 'text', text: 'Look at this' },
{ type: 'image_url', image_url: { url: `data:image/jpeg;base64,${b64Data}` } },
] as unknown as string,
},
];

const result = normalizeLangChainMessages(messages);
const parsed = JSON.parse(result[0]!.content);
expect(parsed[1].image_url.url).toBe(BLOB_SUBSTITUTE);
expect(result[0]!.content).not.toContain(b64Data);
});

it('strips media from messages with type property and array content', () => {
const messages: LangChainMessage[] = [
{
type: 'human',
content: [
{ type: 'text', text: 'Check this' },
{ type: 'image_url', image_url: { url: `data:image/png;base64,${b64Data}` } },
] as unknown as string,
},
];

const result = normalizeLangChainMessages(messages);
const parsed = JSON.parse(result[0]!.content);
expect(parsed[1].image_url.url).toBe(BLOB_SUBSTITUTE);
});
});
});

describe('extractChatModelRequestAttributes with multimodal content', () => {
const b64Data = `iVBORw0KGgoAAAANSUhEUgAAAAUA${'A'.repeat(200)}`;

it('strips base64 from input messages attribute', () => {
const serialized = { id: ['langchain', 'chat_models', 'openai'], name: 'ChatOpenAI' };
const messages: LangChainMessage[][] = [
[
{
_getType: () => 'human',
content: [
{ type: 'text', text: 'What is in this image?' },
{ type: 'image_url', image_url: { url: `data:image/png;base64,${b64Data}` } },
],
} as unknown as LangChainMessage,
],
];

const attrs = extractChatModelRequestAttributes(serialized, messages, true);
const inputMessages = attrs[GEN_AI_INPUT_MESSAGES_ATTRIBUTE] as string | undefined;

expect(inputMessages).toBeDefined();
expect(inputMessages).not.toContain(b64Data);
expect(inputMessages).toContain('[Blob substitute]');
expect(inputMessages).toContain('What is in this image?');
});
});
Loading