Skip to content
This repository was archived by the owner on Sep 29, 2025. It is now read-only.

Commit 236316c

Browse files
mongodbenBen Perlmutter
andauthored
(EAI-998): Low-hanging improvements to existing tracing metrics (#691)
* update tracing metrics * clrify metric names * code cleanup * fix type err in tests --------- Co-authored-by: Ben Perlmutter <mongodben@mongodb.com>
1 parent b949bb7 commit 236316c

File tree

4 files changed

+31
-11
lines changed

4 files changed

+31
-11
lines changed

packages/chatbot-server-mongodb-public/src/tracing/extractTracingData.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ export function extractTracingData(
6363
tags.push("llm_does_not_know");
6464
}
6565

66+
const rating = evalAssistantMessage?.rating;
67+
const comment = evalAssistantMessage?.userComment;
68+
6669
return {
6770
tags,
6871
rejectQuery,
@@ -71,6 +74,8 @@ export function extractTracingData(
7174
numRetrievedChunks,
7275
userMessage: previousUserMessage,
7376
assistantMessage: evalAssistantMessage,
77+
rating,
78+
comment,
7479
};
7580
}
7681

packages/chatbot-server-mongodb-public/src/tracing/getLlmAsAJudgeScores.test.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ describe("getLlmAsAJudgeScores", () => {
4949
llmDoesNotKnow: false,
5050
numRetrievedChunks: 1,
5151
rejectQuery: false,
52+
rating: undefined,
53+
comment: undefined,
5254
} satisfies Parameters<typeof getLlmAsAJudgeScores>[1];
5355

5456
it("shouldn't judge verified answer", async () => {

packages/chatbot-server-mongodb-public/src/tracing/routesUpdateTraceHandlers.ts

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,25 @@ function getTracingScores(
145145
k: number
146146
) {
147147
return {
148-
RejectedQuery: tracingData.rejectQuery === true ? 1 : null,
149-
VerifiedAnswer: tracingData.isVerifiedAnswer === true ? 1 : null,
150-
LlmDoesNotKnow: tracingData.llmDoesNotKnow === true ? 1 : null,
151-
[`RetrievedChunksOver${k}`]:
152-
tracingData.isVerifiedAnswer !== true
153-
? tracingData.numRetrievedChunks / k
154-
: null,
148+
// These metrics should start at 0,
149+
// and are updated in other update trace handlers as needed
150+
HasRating: tracingData.rating !== undefined ? 1 : 0,
151+
HasComment: tracingData.comment !== undefined ? 1 : 0,
152+
VerifiedAnswer: tracingData.isVerifiedAnswer === true ? 1 : 0,
153+
// Only calculate these metrics if the answer is not verified
154+
InputGuardrailPass: tracingData.isVerifiedAnswer
155+
? null
156+
: tracingData.rejectQuery === true
157+
? 0
158+
: 1,
159+
LlmAnswerAttempted: tracingData.isVerifiedAnswer
160+
? null
161+
: tracingData.llmDoesNotKnow === true
162+
? 0
163+
: 1,
164+
[`RetrievedChunksOver${k}`]: tracingData.isVerifiedAnswer
165+
? null
166+
: tracingData.numRetrievedChunks / k,
155167
};
156168
}
157169

@@ -214,7 +226,10 @@ export function makeRateMessageUpdateTrace({
214226
try {
215227
logger.updateSpan({
216228
id: traceId,
217-
scores: await getLlmAsAJudgeScores(llmAsAJudge, tracingData),
229+
scores: {
230+
...(await getLlmAsAJudgeScores(llmAsAJudge, tracingData)),
231+
HasRating: 1,
232+
},
218233
});
219234
} catch (error) {
220235
logRequest({
@@ -309,6 +324,7 @@ export function makeCommentMessageUpdateTrace({
309324
logger.updateSpan({
310325
id: traceId,
311326
scores: {
327+
HasComment: 1,
312328
CommentSentiment: (
313329
await judgeMongoDbChatbotCommentSentiment({
314330
judgeLlm,

packages/mongodb-chatbot-server/src/routes/conversations/commentMessage.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,6 @@ export function makeCommentMessageRoute({
150150
braintrustLogger.logFeedback({
151151
id: traceId,
152152
comment,
153-
scores: {
154-
HasComment: 1,
155-
},
156153
});
157154
await updateTraceIfExists({
158155
updateTrace,

0 commit comments

Comments
 (0)