Skip to content
This repository was archived by the owner on Sep 29, 2025. It is now read-only.

Commit 2e42cf4

Browse files
mongodbenBen Perlmutter
andauthored
(EAI-649): Draft code example classifier type (#595)
* migrate classifier to core * start creating docs team classifier * configurable model * working e2e * fix build err --------- Co-authored-by: Ben Perlmutter <mongodben@mongodb.com>
1 parent c734096 commit 2e42cf4

File tree

11 files changed

+265
-59
lines changed

11 files changed

+265
-59
lines changed

packages/datasets/.env.example

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
OPENAI_API_KEY=<YOUR_API_KEY>
2-
OPENAI_DEPLOYMENT_NAME=<YOUR_DEPLOYMENT_NAME>
2+
OPENAI_CHAT_COMPLETION_DEPLOYMENT="gpt-4o-mini"
3+
OPENAI_CHAT_COMPLETION_MODEL_VERSION="placeholder...must include but value doesn't matter"
34
OPENAI_ENDPOINT=<YOUR_ENDPOINT>
4-
OPENAI_GPT_4o_COMPLETION_DEPLOYMENT="<some deployment>"
5+
OPENAI_API_VERSION="2024-06-01"
6+
BRAINTRUST_API_KEY="<some key>"
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import "dotenv/config";
2+
import { EvalCase, Eval, EvalScorer } from "mongodb-rag-core/braintrust";
3+
import {
4+
CodeExampleClassification,
5+
makeClassifyCodeExampleDocsTeam,
6+
} from "./classifyCodeExampleDocsTeam.js";
7+
import {
8+
assertEnvVars,
9+
Classification,
10+
CORE_OPENAI_CHAT_COMPLETION_ENV_VARS,
11+
} from "mongodb-rag-core";
12+
import { AzureOpenAI } from "mongodb-rag-core/openai";
13+
14+
const {
15+
OPENAI_API_KEY,
16+
OPENAI_API_VERSION,
17+
OPENAI_CHAT_COMPLETION_DEPLOYMENT,
18+
OPENAI_ENDPOINT,
19+
} = assertEnvVars(CORE_OPENAI_CHAT_COMPLETION_ENV_VARS);
20+
21+
type ClassifyCodeExampleEvalCaseInput = {
22+
text: string;
23+
};
24+
25+
type ClassifyCodeExampleEvalCaseOutput = Classification;
26+
27+
type ClassifyCodeExampleEvalCaseExpected = CodeExampleClassification;
28+
29+
type ClassifyCodeExampleEvalCase = EvalCase<
30+
ClassifyCodeExampleEvalCaseInput,
31+
ClassifyCodeExampleEvalCaseExpected,
32+
Record<string, unknown> | undefined
33+
>;
34+
35+
// TODO: add eval cases
36+
const evalCases = [
37+
{
38+
input: {
39+
text: `\`\`\`ruby
40+
puts "Hello, World!"
41+
\`\`\``,
42+
},
43+
expected: "unknown",
44+
},
45+
] satisfies ClassifyCodeExampleEvalCase[];
46+
47+
const isCorrectClassification: EvalScorer<
48+
ClassifyCodeExampleEvalCaseInput,
49+
ClassifyCodeExampleEvalCaseOutput,
50+
ClassifyCodeExampleEvalCaseExpected
51+
> = function ({ output, expected }) {
52+
return {
53+
name: "CorrectClassification",
54+
score: output.type === expected ? 1 : 0,
55+
};
56+
};
57+
58+
const classifyCodeExample = makeClassifyCodeExampleDocsTeam({
59+
model: OPENAI_CHAT_COMPLETION_DEPLOYMENT,
60+
openAiClient: new AzureOpenAI({
61+
endpoint: OPENAI_ENDPOINT,
62+
apiKey: OPENAI_API_KEY,
63+
apiVersion: OPENAI_API_VERSION,
64+
}),
65+
});
66+
67+
Eval("classify-code-example-docs-team", {
68+
maxConcurrency: 10,
69+
experimentName: "classify-code-example",
70+
data: evalCases,
71+
async task(input) {
72+
const { text } = input;
73+
74+
const { classification } = await classifyCodeExample({ input: text });
75+
76+
return classification;
77+
},
78+
scores: [isCorrectClassification],
79+
});
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/**
2+
@fileoverview Contains classification as defined by MongoDB Docs team (https://docs.google.com/document/d/199fTaNlvMXb1GmloQRkh8MYPfYyuQakaYIsAuZvgKvU/)
3+
*/
4+
import { ClassificationType, makeClassifier } from "mongodb-rag-core";
5+
6+
const classificationTypes = [
7+
{
8+
type: "api_method_signature",
9+
description:
10+
"API method signature code block showing an API method name and arguments",
11+
examples: [
12+
{
13+
// From https://www.mongodb.com/docs/languages/python/pymongo-driver/current/write/insert/#insert-one-document
14+
text: `\`\`\`\`python
15+
sample_restaurants.restaurants.insert_one({"name" : "Mongo's Burgers"})
16+
\`\`\``,
17+
reason:
18+
'Shows the minimum required arguments for the "insert_one" method. Therefore this is an API method signature.',
19+
},
20+
{
21+
// From https://www.mongodb.com/docs/manual/reference/method/db.collection.insertOne/#syntax
22+
text: `\`\`\`\`javascript
23+
db.collection.insertOne(
24+
<document>,
25+
{
26+
writeConcern: <document>
27+
}
28+
)
29+
\`\`\``,
30+
reason:
31+
"Complete API method signature, showing all possible arguments in an abstract form. Therefore this is an API method signature.",
32+
},
33+
],
34+
},
35+
{
36+
type: "return_example",
37+
description:
38+
"A JSON blob, example document, or other return object type demonstrating what a user might expect from executing a piece of code",
39+
// TODO: add other examples
40+
examples: [],
41+
},
42+
{
43+
type: "example_configuration_object",
44+
description:
45+
"Example object, often represented in YAML or JSON, enumerating parameters and their types",
46+
// TODO: add other examples
47+
examples: [],
48+
},
49+
{
50+
type: "usage_example",
51+
description:
52+
"A longer code snippet that establishes parameters, performs basic set-up code, and includes the larger context to demonstrate how to accomplish a task",
53+
// TODO: add other examples
54+
examples: [],
55+
},
56+
{
57+
type: "sample_application",
58+
description:
59+
"Runnable applications that connect more discrete pieces of code, and may include error handling, framework integrations, or User Interface elements",
60+
// TODO: add other examples
61+
examples: [],
62+
},
63+
// Note: adding this b/c there will certainly be types of code examples that don't cleanly fit into these buckets.
64+
// For example, if there was a file tree in a code block.
65+
{
66+
type: "unknown",
67+
description:
68+
"Unknown classification type. The code example doesn't easily fit into any of the other categories.",
69+
},
70+
] as const satisfies ClassificationType[];
71+
72+
export type CodeExampleClassification =
73+
(typeof classificationTypes)[number]["type"];
74+
75+
export const makeClassifyCodeExampleDocsTeam = ({
76+
openAiClient,
77+
model,
78+
}: Pick<Parameters<typeof makeClassifier>[0], "openAiClient" | "model">) =>
79+
makeClassifier({
80+
classificationTypes,
81+
chainOfThought: true,
82+
openAiClient,
83+
model,
84+
});
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import { Classifier, makeClassifier } from "mongodb-rag-core";
2+
import { RunLogger } from "../runlogger";
3+
import { stripIndents } from "common-tags";
4+
5+
export function makeClassifierWithLogger(
6+
args: Parameters<typeof makeClassifier>[0] & { logger?: RunLogger }
7+
) {
8+
const { logger, ...classifierBuilder } = args;
9+
const classifier = makeClassifier(classifierBuilder);
10+
const classifierWithLogger: Classifier = async (
11+
args: Parameters<typeof classifier>[0]
12+
) => {
13+
const result = await classifier(args);
14+
logger?.appendArtifact(
15+
`chatTemplates/classifier-${Date.now()}.json`,
16+
stripIndents`
17+
<SystemMessage>
18+
${result.inputMessages[0].content}
19+
</SystemMessage>
20+
<Classification>
21+
${JSON.stringify(result.classification)}
22+
</Classification>
23+
`
24+
);
25+
return result;
26+
};
27+
return classifierWithLogger;
28+
}

packages/mongodb-artifact-generator/src/commands/generateReleaseNotes.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import {
2020
ClassifiedChangelog,
2121
makeClassifyChangelogs,
2222
} from "../release-notes/classifyChangelog";
23+
import { assertEnvVars } from "mongodb-rag-core";
2324

2425
let logger: RunLogger;
2526

@@ -29,6 +30,9 @@ type GenerateReleaseNotesCommandArgs = {
2930
llmMaxConcurrency: number;
3031
};
3132

33+
const { OPENAI_CHAT_COMPLETION_DEPLOYMENT } = assertEnvVars({
34+
OPENAI_CHAT_COMPLETION_DEPLOYMENT: "",
35+
});
3236
export default createCommand<GenerateReleaseNotesCommandArgs>({
3337
command: "generateReleaseNotes",
3438
builder(args) {
@@ -175,6 +179,7 @@ export const action = createConfiguredAction<GenerateReleaseNotesCommandArgs>(
175179

176180
const classifyChangelogs = makeClassifyChangelogs({
177181
openAiClient,
182+
model: OPENAI_CHAT_COMPLETION_DEPLOYMENT,
178183
logger,
179184
});
180185

packages/mongodb-artifact-generator/src/release-notes/classifyChangelog.ts

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { makeClassifyChangelogAudience } from "./classifyChangelogAudience";
44
import { makeClassifyChangelogScope } from "./classifyChangelogScope";
55
import { iOfN } from "../utils";
66
import { RunLogger } from "../runlogger";
7-
import { Classification } from "../chat/makeClassifier";
7+
import { Classification } from "mongodb-rag-core";
88

99
export type ClassifiedChangelog = {
1010
audience: Classification;
@@ -14,19 +14,23 @@ export type ClassifiedChangelog = {
1414

1515
export type MakeClassifyChangelogScope = {
1616
openAiClient: OpenAI;
17+
model: string;
1718
logger?: RunLogger;
1819
};
1920

2021
export function makeClassifyChangelog({
2122
openAiClient,
23+
model,
2224
logger,
2325
}: MakeClassifyChangelogScope) {
2426
const classifyChangelogAudience = makeClassifyChangelogAudience({
2527
openAiClient,
28+
model,
2629
logger,
2730
});
2831
const classifyChangelogScope = makeClassifyChangelogScope({
2932
openAiClient,
33+
model,
3034
logger,
3135
});
3236

@@ -35,8 +39,12 @@ export function makeClassifyChangelog({
3539
}: {
3640
changelog: string;
3741
}): Promise<ClassifiedChangelog> {
38-
const audience = await classifyChangelogAudience({ input: changelog });
39-
const scope = await classifyChangelogScope({ input: changelog });
42+
const { classification: audience } = await classifyChangelogAudience({
43+
input: changelog,
44+
});
45+
const { classification: scope } = await classifyChangelogScope({
46+
input: changelog,
47+
});
4048

4149
return {
4250
audience,
@@ -49,8 +57,13 @@ export function makeClassifyChangelog({
4957
export function makeClassifyChangelogs({
5058
openAiClient,
5159
logger,
60+
model,
5261
}: MakeClassifyChangelogScope) {
53-
const classifyChangelog = makeClassifyChangelog({ openAiClient, logger });
62+
const classifyChangelog = makeClassifyChangelog({
63+
openAiClient,
64+
logger,
65+
model,
66+
});
5467

5568
return async function classifyChangelogs({
5669
changelogs,

packages/mongodb-artifact-generator/src/release-notes/classifyChangelogAudience.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { OpenAI } from "mongodb-rag-core/openai";
2-
import { makeClassifier } from "../chat/makeClassifier";
2+
import { makeClassifierWithLogger } from "../chat/makeClassifierWithLogger";
33
import { RunLogger } from "../runlogger";
44

55
const classificationTypes = [
@@ -41,14 +41,17 @@ const classificationTypes = [
4141

4242
export type MakeClassifyChangelogAudienceArgs = {
4343
openAiClient: OpenAI;
44+
model: string;
4445
logger?: RunLogger;
4546
};
4647

4748
export function makeClassifyChangelogAudience({
4849
openAiClient,
50+
model,
4951
}: MakeClassifyChangelogAudienceArgs) {
50-
return makeClassifier({
52+
return makeClassifierWithLogger({
5153
openAiClient,
54+
model,
5255
classificationTypes,
5356
chainOfThought: true,
5457
});

packages/mongodb-artifact-generator/src/release-notes/classifyChangelogScope.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { OpenAI } from "mongodb-rag-core/openai";
2-
import { makeClassifier } from "../chat/makeClassifier";
2+
import { makeClassifierWithLogger } from "../chat/makeClassifierWithLogger";
33
import { RunLogger } from "../runlogger";
44

55
const classificationTypes = [
@@ -86,17 +86,20 @@ const classificationTypes = [
8686

8787
export type MakeClassifyChangelogScope = {
8888
openAiClient: OpenAI;
89+
model: string;
8990
logger?: RunLogger;
9091
};
9192

9293
export function makeClassifyChangelogScope({
9394
openAiClient,
95+
model,
9496
logger,
9597
}: MakeClassifyChangelogScope) {
96-
return makeClassifier({
98+
return makeClassifierWithLogger({
9799
openAiClient,
98100
logger,
99101
classificationTypes,
100102
chainOfThought: true,
103+
model,
101104
});
102105
}

packages/mongodb-rag-core/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export * from "./DatabaseConnection";
2020
export * from "./DataStreamer";
2121
export * from "./logger";
2222
export * from "./conversations/MongoDbConversations";
23+
export * from "./makeClassifier";
2324
export * from "./References";
2425
export * from "./VectorStore";
2526
export * from "./arrayFilters";

0 commit comments

Comments
 (0)