Skip to content
This repository was archived by the owner on Sep 29, 2025. It is now read-only.

Commit c639fc3

Browse files
mongodbenBen Perlmutter
andauthored
(EAI-1104 and EAI-1094): metadata in tracing, scrub PII before keywords (#779)
tracing metadata + err catching Co-authored-by: Ben Perlmutter <mongodben@mongodb.com>
1 parent b287568 commit c639fc3

File tree

8 files changed

+191
-146
lines changed

8 files changed

+191
-146
lines changed

packages/chatbot-server-mongodb-public/src/tracing/extractTracingData.test.ts

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -52,21 +52,6 @@ describe("extractTracingData", () => {
5252
expect(tracingData.rejectQuery).toBe(true);
5353
expect(tracingData.tags.includes("rejected_query")).toBe(true);
5454
});
55-
test("should extract metadata", () => {
56-
const messages: Message[] = [
57-
{
58-
...baseUserMessage,
59-
customData: {
60-
programmingLanguage: "javascript",
61-
mongoDbProduct: "MongoDB Atlas",
62-
},
63-
},
64-
baseAssistantMessage,
65-
];
66-
const tracingData = extractTracingData(messages, msgId, conversationId);
67-
expect(tracingData.tags.includes("javascript")).toBe(true);
68-
expect(tracingData.tags.includes("mongodb_atlas")).toBe(true);
69-
});
7055
test("should get number of retrieved chunks", () => {
7156
const messagesNoContext: Message[] = [
7257
{

packages/chatbot-server-mongodb-public/src/tracing/extractTracingData.ts

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { strict as assert } from "assert";
1111
import { SEARCH_TOOL_NAME } from "../tools/search";
1212
import { logRequest } from "../utils";
1313
import { OriginCode } from "mongodb-chatbot-server";
14+
import { tagify } from "./tagify";
1415

1516
export function extractTracingData(
1617
messages: Message[],
@@ -29,31 +30,22 @@ export function extractTracingData(
2930
const previousUserMessageIdx = messages
3031
.slice(0, evalAssistantMessageIdx)
3132
.findLastIndex((m): m is DbMessage<UserMessage> => m.role === "user");
32-
const previousUserMessage = messages[previousUserMessageIdx] as
33-
| DbMessage<UserMessage>
34-
| undefined;
3533
assert(previousUserMessageIdx !== -1, "User message not found");
34+
const previousUserMessage = messages[
35+
previousUserMessageIdx
36+
] as DbMessage<UserMessage>;
3637

3738
const tags = [];
3839

39-
const rejectQuery = previousUserMessage?.rejectQuery;
40+
const rejectQuery = previousUserMessage.rejectQuery;
4041
if (rejectQuery === true) {
4142
tags.push("rejected_query");
4243
}
43-
const programmingLanguage = previousUserMessage?.customData
44-
?.programmingLanguage as string | undefined;
45-
const mongoDbProduct = previousUserMessage?.customData?.mongoDbProduct as
46-
| string
47-
| undefined;
48-
const requestOriginCode = previousUserMessage?.customData?.originCode as
44+
45+
const requestOriginCode = previousUserMessage.customData?.originCode as
4946
| OriginCode
5047
| undefined;
51-
if (programmingLanguage) {
52-
tags.push(tagify(programmingLanguage));
53-
}
54-
if (mongoDbProduct) {
55-
tags.push(tagify(mongoDbProduct));
56-
}
48+
5749
if (requestOriginCode) {
5850
tags.push(tagify(requestOriginCode));
5951
}
@@ -68,21 +60,21 @@ export function extractTracingData(
6860
}
6961

7062
const isVerifiedAnswer =
71-
evalAssistantMessage?.metadata?.verifiedAnswer !== undefined
63+
evalAssistantMessage.metadata?.verifiedAnswer !== undefined
7264
? true
7365
: undefined;
7466
if (isVerifiedAnswer) {
7567
tags.push("verified_answer");
7668
}
77-
const llmDoesNotKnow = evalAssistantMessage?.content.includes(
69+
const llmDoesNotKnow = evalAssistantMessage.content.includes(
7870
llmDoesNotKnowMessage
7971
);
8072
if (llmDoesNotKnow) {
8173
tags.push("llm_does_not_know");
8274
}
8375

84-
const rating = evalAssistantMessage?.rating;
85-
const comment = evalAssistantMessage?.userComment;
76+
const rating = evalAssistantMessage.rating;
77+
const comment = evalAssistantMessage.userComment;
8678

8779
return {
8880
conversationId: conversationId,
@@ -101,10 +93,6 @@ export function extractTracingData(
10193
};
10294
}
10395

104-
function tagify(s: string) {
105-
return s.replaceAll(/ /g, "_").toLowerCase();
106-
}
107-
10896
export function getContextsFromMessages(
10997
messages: Message[],
11098
reqId: string

packages/chatbot-server-mongodb-public/src/tracing/routesUpdateTraceHandlers.ts

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ import { LanguageModel } from "mongodb-rag-core/aiSdk";
2222
import { makeScrubbedMessagesFromTracingData } from "./scrubbedMessages/makeScrubbedMessagesFromTracingData";
2323
import { redactPii } from "./scrubbedMessages/redactPii";
2424
import { MessageAnalysis } from "./scrubbedMessages/analyzeMessage";
25+
import { classifyMongoDbMetadata } from "mongodb-rag-core/mongoDbMetadata";
26+
import { tagify } from "./tagify";
2527

2628
export function makeAddMessageToConversationUpdateTrace({
2729
k,
@@ -93,6 +95,37 @@ export function makeAddMessageToConversationUpdateTrace({
9395
});
9496
}
9597

98+
// classify metadata
99+
try {
100+
const metadata = await classifyMongoDbMetadata(
101+
analyzerModel,
102+
`The following is a back and forth conversation between a user and an assistant. The user is asking a question about MongoDB. The assistant is trying to answer the user's question. The user's message is in <user_message_content> tags and the assistant's message is in <assistant_message_content> tags.
103+
<user_message_content>
104+
${tracingData.userMessage?.content}
105+
</user_message_content>
106+
<assistant_message_content>
107+
${tracingData.assistantMessage?.content}
108+
</assistant_message_content>`
109+
);
110+
// update tags
111+
for (const tag of Object.values(metadata)) {
112+
if (tag !== null) {
113+
tracingData.tags.push(tagify(tag));
114+
}
115+
}
116+
// Add metadata to user message
117+
tracingData.userMessage.metadata = {
118+
...tracingData.userMessage.metadata,
119+
...metadata,
120+
};
121+
} catch (error) {
122+
logRequest({
123+
reqId,
124+
message: `Error classifying metadata while adding message ${error}`,
125+
type: "error",
126+
});
127+
}
128+
96129
// Send Segment events
97130
try {
98131
if (segmentTrackUserSentMessage) {
@@ -384,9 +417,9 @@ export function makeCommentMessageUpdateTrace({
384417
assert(userMessage?.id, "Missing user message for comment");
385418
await scrubbedMessageStore.updateScrubbedMessage({
386419
id: userMessage.id,
387-
message: {
420+
message: {
388421
"response.userCommented": true,
389-
"response.userComment": userComment
422+
"response.userComment": userComment,
390423
} as Partial<Omit<ScrubbedMessage, "_id">>,
391424
});
392425
} catch (error) {

packages/chatbot-server-mongodb-public/src/tracing/scrubbedMessages/analyzeMessage.ts

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { mongoDbTopics } from "mongodb-rag-core/mongoDbMetadata";
22
import { z } from "zod";
33
import { generateObject, LanguageModel } from "mongodb-rag-core/aiSdk";
4+
import { wrapTraced } from "mongodb-rag-core/braintrust";
45

56
export const MessageAnalysisSchema = z.object({
67
topics: z
@@ -44,23 +45,28 @@ For all nullable fields, set to \`null\` if it is unclear or unknown.`;
4445

4546
export type MessageAnalysis = z.infer<typeof MessageAnalysisSchema>;
4647

47-
export async function analyzeMessage(
48-
messageContent: string,
49-
model: LanguageModel
50-
): Promise<MessageAnalysis> {
51-
const result = await generateObject({
52-
model,
53-
schema: MessageAnalysisSchema,
54-
messages: [
55-
{
56-
role: "system",
57-
content: systemPrompt,
58-
},
59-
{
60-
role: "user",
61-
content: messageContent,
62-
},
63-
],
64-
});
65-
return result.object;
66-
}
48+
export const analyzeMessage = wrapTraced(
49+
async function (
50+
messageContent: string,
51+
model: LanguageModel
52+
): Promise<MessageAnalysis> {
53+
const result = await generateObject({
54+
model,
55+
schema: MessageAnalysisSchema,
56+
messages: [
57+
{
58+
role: "system",
59+
content: systemPrompt,
60+
},
61+
{
62+
role: "user",
63+
content: messageContent,
64+
},
65+
],
66+
});
67+
return result.object;
68+
},
69+
{
70+
name: "analyzeMessage",
71+
}
72+
);

packages/chatbot-server-mongodb-public/src/tracing/scrubbedMessages/makeScrubbedMessagesFromTracingData.ts

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,11 @@ export async function makeScrubbedMessagesFromTracingData({
2424
throw new Error("User message not found");
2525
}
2626

27+
const { redactedText: redactedUserContent, piiFound: userMessagePii } =
28+
redactPii(userMessage.content);
29+
2730
const userAnalysis = analysis
28-
? await analyzeMessage(userMessage.content, analysis.model).catch(
31+
? await analyzeMessage(redactedUserContent, analysis.model).catch(
2932
(error) => {
3033
logRequest({
3134
reqId,
@@ -39,9 +42,6 @@ export async function makeScrubbedMessagesFromTracingData({
3942
)
4043
: undefined;
4144

42-
const { redactedText: redactedUserContent, piiFound: userMessagePii } =
43-
redactPii(userMessage.content);
44-
4545
const scrubbedUserMessage = {
4646
_id: userMessage.id,
4747
conversationId: tracingData.conversationId,
@@ -64,15 +64,27 @@ export async function makeScrubbedMessagesFromTracingData({
6464
} satisfies ScrubbedMessage<MessageAnalysis>;
6565

6666
// Assistant message scrubbing
67-
const assistantAnalysis =
68-
analysis && !tracingData.isVerifiedAnswer
69-
? await analyzeMessage(assistantMessage.content, analysis.model)
70-
: undefined;
7167
const {
7268
redactedText: redactedAssistantContent,
7369
piiFound: assistantMessagePii,
7470
} = redactPii(assistantMessage.content);
7571

72+
const assistantAnalysis =
73+
analysis && !tracingData.isVerifiedAnswer
74+
? await analyzeMessage(redactedAssistantContent, analysis.model).catch(
75+
(error) => {
76+
logRequest({
77+
reqId,
78+
message: `Error analyzing scrubbed assistant message in tracing: ${JSON.stringify(
79+
error
80+
)}`,
81+
type: "error",
82+
});
83+
return undefined;
84+
}
85+
)
86+
: undefined;
87+
7688
const scrubbedAssistantMessage = {
7789
_id: assistantMessage.id,
7890
conversationId: tracingData.conversationId,
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
export function tagify(s: string) {
2+
return s.replaceAll(/ /g, "_").toLowerCase();
3+
}

packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.eval.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@ import { Scorer } from "autoevals";
55
import { classifyMongoDbMetadata, MongoDbTag } from "./";
66
import { createOpenAI } from "@ai-sdk/openai";
77
import { getOpenAiEndpointAndApiKey, models } from "../models";
8+
import { wrapAISDKModel } from "../braintrust";
89

910
async function main() {
10-
const modelLabel = "gpt-4.1";
11+
const modelLabel = "gpt-4.1-mini";
1112
const modelConfig = models.find((m) => m.label === modelLabel);
1213
assert(modelConfig, `Model ${modelLabel} not found`);
1314

@@ -392,7 +393,7 @@ async function main() {
392393
async task(input) {
393394
try {
394395
return await classifyMongoDbMetadata(
395-
openai.languageModel(modelLabel),
396+
wrapAISDKModel(openai.languageModel(modelLabel)),
396397
input
397398
);
398399
} catch (error) {

0 commit comments

Comments
 (0)