Skip to content

Commit d5d662c

Browse files
committed
fix: set correct model for custom analysis
Currently we're using the same model for the custom categories as for the eval itself. This is incorrect because the model may not be available. These changes add the option to set the model and default to Gemini 2.5 Flash Lite.
1 parent 7e953e8 commit d5d662c

File tree

5 files changed

+13
-5
lines changed

5 files changed

+13
-5
lines changed

runner/configuration/constants.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ export const DEFAULT_MODEL_NAME = 'gemini-2.5-pro'; // slower than `flash`, but
1717
*/
1818
export const DEFAULT_AUTORATER_MODEL_NAME = 'gemini-2.5-flash'; // use less expensive model
1919

20+
/** Model used for AI summarization by default. */
21+
export const DEFAULT_SUMMARY_MODEL = 'gemini-2.5-flash-lite';
22+
2023
/** Name of the root folder where we store LLM-generated code for debugging */
2124
export const LLM_OUTPUT_DIR = join(rootDir, 'llm-output');
2225

runner/configuration/environment-config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ export const environmentConfigSchema = z.object({
108108
z.object({
109109
name: z.string(),
110110
path: z.string(),
111+
model: z.string().optional(),
111112
reportsFilter: z
112113
.enum([ReportContextFilter.AllReports, ReportContextFilter.NonPerfectReports])
113114
.optional(),

runner/configuration/environment.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {EnvironmentConfig} from './environment-config.js';
1818
import {EvalPromptWithMetadata, MultiStepPrompt} from './prompts.js';
1919
import {renderPromptTemplate} from './prompt-templating.js';
2020
import {getSha256Hash} from '../utils/hashing.js';
21+
import {DEFAULT_SUMMARY_MODEL} from './constants.js';
2122

2223
interface CategoryConfig {
2324
name: string;
@@ -27,6 +28,7 @@ interface CategoryConfig {
2728
interface AnalysisPrompt {
2829
name: string;
2930
prompt: string;
31+
model: string;
3032
reportsFilter: ReportContextFilter;
3133
ratingsFilter: RatingContextFilter;
3234
}
@@ -463,12 +465,13 @@ export class Environment {
463465
private resolveAnalysisPrompts(config: EnvironmentConfig): AnalysisPrompt[] {
464466
const result: AnalysisPrompt[] = [];
465467

466-
config.analysisPrompts?.forEach(({name, path, reportsFilter, ratingsFilter}) => {
468+
config.analysisPrompts?.forEach(({name, path, model, reportsFilter, ratingsFilter}) => {
467469
const prompt = this.renderEnvironmentPrompt(path).result;
468470

469471
result.push({
470472
name,
471473
prompt,
474+
model: model || DEFAULT_SUMMARY_MODEL,
472475
reportsFilter: reportsFilter ?? ReportContextFilter.NonPerfectReports,
473476
ratingsFilter: ratingsFilter ?? RatingContextFilter.NonPerfectRatings,
474477
});

runner/orchestration/generate-summary.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import {AssessmentResult, CompletionStats, RunSummary} from '../shared-interface
1212
export async function prepareSummary(
1313
generateAiSummaryLlm: GenkitRunner | null,
1414
abortSignal: AbortSignal,
15-
model: string,
15+
evalRunModel: string,
1616
env: Environment,
1717
assessments: AssessmentResult[],
1818
completionStats: CompletionStats,
@@ -75,7 +75,7 @@ export async function prepareSummary(
7575
abortSignal,
7676
assessments,
7777
[],
78-
model,
78+
config.model,
7979
{
8080
reportContextFilter: config.reportsFilter,
8181
ratingContextFilter: config.ratingsFilter,
@@ -101,7 +101,7 @@ export async function prepareSummary(
101101
const executorInfo = await env.executor.getExecutorInfo?.();
102102

103103
return {
104-
model,
104+
model: evalRunModel,
105105
environmentId: env.id,
106106
displayName: env.displayName,
107107
framework: {

runner/reporting/report-ai-summary.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import {GenkitRunner} from '../codegen/genkit/genkit-runner.js';
2+
import {DEFAULT_SUMMARY_MODEL} from '../configuration/constants.js';
23
import {AssessmentResult, ReportContextFilter, RatingContextFilter} from '../shared-interfaces.js';
34
import {chatWithReportAI} from './report-ai-chat.js';
45

@@ -7,7 +8,7 @@ export async function summarizeReportWithAI(
78
abortSignal: AbortSignal,
89
assessments: AssessmentResult[],
910
) {
10-
const model = 'gemini-2.5-flash-lite';
11+
const model = DEFAULT_SUMMARY_MODEL;
1112

1213
if (!llm.getSupportedModels().includes(model)) {
1314
throw new Error(`Unable to generate AI summary due to unsupported model: ${model}`);

0 commit comments

Comments
 (0)