diff --git a/packages/global/core/evaluation/api.d.ts b/packages/global/core/evaluation/api.d.ts index 07cb6822c188..e15509ef2699 100644 --- a/packages/global/core/evaluation/api.d.ts +++ b/packages/global/core/evaluation/api.d.ts @@ -5,7 +5,8 @@ import type { EvaluationItemSchemaType, EvaluationDisplayType, EvaluationItemDisplayType, - EvaluationDataItemType + EvaluationDataItemType, + EvaluationStatistics } from './type'; // ===== Common Types ===== @@ -51,7 +52,6 @@ export type EvaluationStatsResponse = { evaluating: number; queuing: number; error: number; - avgScore?: number; }; // Export Evaluation Items @@ -112,12 +112,7 @@ export type DataItemGroupedItem = { dataItemId: string; dataItem: EvaluationDataItemType; items: EvaluationItemDisplayType[]; - summary: { - totalItems: number; - completedItems: number; - errorItems: number; - avgScore?: number; - }; + statistics?: EvaluationStatistics; }; export type DataItemListResponse = PaginationResponse; diff --git a/packages/global/core/evaluation/type.d.ts b/packages/global/core/evaluation/type.d.ts index f70c6175f052..6c54bb85d985 100644 --- a/packages/global/core/evaluation/type.d.ts +++ b/packages/global/core/evaluation/type.d.ts @@ -143,7 +143,6 @@ export type EvaluationDisplayType = Pick< | 'statistics' > & { _id: string; - avgScore?: number; datasetName?: string; target: EvalTarget; // Complete target object with extended config metricNames: string[]; @@ -161,6 +160,7 @@ export interface CreateEvaluationParams { datasetId: string; target: EvalTarget; // Only supports workflow type target configuration evaluators: EvaluatorSchema[]; // Replace metricIds with evaluators + autoStart?: boolean; // Whether to automatically start the evaluation task after creation (default: true) } // Queue job data types diff --git a/packages/service/core/evaluation/task/index.ts b/packages/service/core/evaluation/task/index.ts index 08e8b53773f4..15577e724cde 100644 --- a/packages/service/core/evaluation/task/index.ts +++ b/packages/service/core/evaluation/task/index.ts @@ -8,6 +8,7 @@ import type { EvaluationDataItemType, EvaluationDisplayType } from '@fastgpt/global/core/evaluation/type'; +import type { DataItemListResponse } from '@fastgpt/global/core/evaluation/api'; import type { MetricResult } from '@fastgpt/global/core/evaluation/metric/type'; import { Types } from 'mongoose'; import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; @@ -28,83 +29,6 @@ import { type ClientSession } from '../../../common/mongo'; // Constants const MAX_EXPORT_PAGE_SIZE = 100000; -// ===== Service Layer Response Types ===== - -// List response type for evaluations -export interface EvaluationListResponse { - list: EvaluationDisplayType[]; - total: number; -} - -// List response type for evaluation items -export interface EvaluationItemListResponse { - items: EvaluationItemDisplayType[]; - total: number; -} - -// Statistics response for evaluation task -export interface EvaluationStatsResponse { - total: number; - completed: number; - evaluating: number; - queuing: number; - error: number; - avgScore?: number; -} - -// Result response for individual evaluation item -export interface EvaluationItemResultResponse { - item: EvaluationItemSchemaType; - dataItem: EvaluationDataItemType; - response?: string; - result?: MetricResult; - score?: number; -} - -// Export response for evaluation results -export interface EvaluationExportResponse { - results: Buffer; - total: number; -} - -// Export response for grouped data items -export interface EvaluationExportByDataItemResponse { - results: Buffer; - totalItems: number; -} - -// Grouped data item response -export interface DataItemGroupedResponse { - list: DataItemGroupedType[]; - total: number; -} - -// Individual grouped data item type -export interface DataItemGroupedType { - dataItemId: string; - dataItem: EvaluationDataItemType; - items: EvaluationItemDisplayType[]; - summary: { - totalItems: number; - completedItems: number; - errorItems: number; - avgScore?: number; - }; -} - -// Batch operation response types -export interface BatchDeleteResponse { - deletedCount: number; -} - -export interface BatchRetryResponse { - retriedCount: number; -} - -export interface BatchUpdateResponse { - updatedCount: number; -} - export class EvaluationTaskService { static async createEvaluation( params: CreateEvaluationParams & { @@ -112,7 +36,7 @@ export class EvaluationTaskService { tmbId: string; } ): Promise { - const { teamId, tmbId, ...evaluationParams } = params; + const { teamId, tmbId, autoStart = true, ...evaluationParams } = params; // Create usage record const { billId } = await createEvaluationUsage({ @@ -124,17 +48,50 @@ export class EvaluationTaskService { // Apply default configuration to evaluators (weights, thresholds, etc.) const evaluatorsWithDefaultConfig = buildEvalDataConfig(evaluationParams.evaluators); - const evaluation = await MongoEvaluation.create({ - ...evaluationParams, - evaluators: evaluatorsWithDefaultConfig, - teamId, - tmbId, - usageId: billId, - status: EvaluationStatusEnum.queuing, - createTime: new Date() - }); + const createAndStart = async (session: ClientSession) => { + // Create evaluation within transaction + const evaluation = await MongoEvaluation.create( + [ + { + ...evaluationParams, + evaluators: evaluatorsWithDefaultConfig, + teamId, + tmbId, + usageId: billId, + status: EvaluationStatusEnum.queuing, + createTime: new Date() + } + ], + { session } + ); - return evaluation.toObject(); + const evaluationObject = evaluation[0].toObject(); + + // Auto-start the evaluation if autoStart is true + if (autoStart) { + // Update status to evaluating within transaction + await MongoEvaluation.updateOne( + { _id: evaluationObject._id }, + { $set: { status: EvaluationStatusEnum.evaluating } }, + { session } + ); + + // Queue operation within transaction - if it fails, transaction will rollback + await evaluationTaskQueue.add(`eval_task_${evaluationObject._id}`, { + evalId: evaluationObject._id.toString() + }); + + // Update status in returned object + evaluationObject.status = EvaluationStatusEnum.evaluating; + addLog.debug(`[Evaluation] Task created and auto-started: ${evaluationObject._id}`); + } else { + addLog.debug(`[Evaluation] Task created: ${evaluationObject._id}`); + } + + return evaluationObject; + }; + + return await mongoSessionRun(createAndStart); } static async getEvaluation(evalId: string, teamId: string): Promise { @@ -213,7 +170,7 @@ export class EvaluationTaskService { accessibleIds?: string[], tmbId?: string, isOwner: boolean = false - ): Promise { + ): Promise<{ list: EvaluationDisplayType[]; total: number }> { // Build basic filter and pagination const filter: any = { teamId: new Types.ObjectId(teamId) }; if (searchKey) { @@ -275,6 +232,29 @@ export class EvaluationTaskService { as: 'appVersion' } }, + // Add real-time statistics lookup + { + $lookup: { + from: 'eval_items', + localField: '_id', + foreignField: 'evalId', + pipeline: [ + { + $group: { + _id: null, + totalItems: { $sum: 1 }, + completedItems: { + $sum: { $cond: [{ $eq: ['$status', EvaluationStatusEnum.completed] }, 1, 0] } + }, + errorItems: { + $sum: { $cond: [{ $eq: ['$status', EvaluationStatusEnum.error] }, 1, 0] } + } + } + } + ], + as: 'realTimeStats' + } + }, { $addFields: { datasetName: { $arrayElemAt: ['$dataset.name', 0] }, @@ -288,6 +268,23 @@ export class EvaluationTaskService { as: 'evaluator', in: '$$evaluator.metric.name' } + }, + // Use real-time statistics if available, otherwise fallback to stored statistics + statistics: { + $cond: { + if: { $gt: [{ $size: '$realTimeStats' }, 0] }, + then: { + $let: { + vars: { stats: { $arrayElemAt: ['$realTimeStats', 0] } }, + in: { + totalItems: '$$stats.totalItems', + completedItems: '$$stats.completedItems', + errorItems: '$$stats.errorItems' + } + } + }, + else: '$statistics' + } } } }, @@ -299,7 +296,6 @@ export class EvaluationTaskService { finishTime: 1, status: 1, errorMessage: 1, - avgScore: 1, datasetName: 1, target: { type: '$target.type', @@ -359,11 +355,51 @@ export class EvaluationTaskService { as: 'appVersion' } }, + // Add real-time statistics lookup + { + $lookup: { + from: 'eval_items', + localField: '_id', + foreignField: 'evalId', + pipeline: [ + { + $group: { + _id: null, + totalItems: { $sum: 1 }, + completedItems: { + $sum: { $cond: [{ $eq: ['$status', EvaluationStatusEnum.completed] }, 1, 0] } + }, + errorItems: { + $sum: { $cond: [{ $eq: ['$status', EvaluationStatusEnum.error] }, 1, 0] } + } + } + } + ], + as: 'realTimeStats' + } + }, { $addFields: { 'target.config.appName': { $arrayElemAt: ['$app.name', 0] }, 'target.config.avatar': { $arrayElemAt: ['$app.avatar', 0] }, - 'target.config.versionName': { $arrayElemAt: ['$appVersion.versionName', 0] } + 'target.config.versionName': { $arrayElemAt: ['$appVersion.versionName', 0] }, + // Use real-time statistics if available, otherwise fallback to stored statistics + statistics: { + $cond: { + if: { $gt: [{ $size: '$realTimeStats' }, 0] }, + then: { + $let: { + vars: { stats: { $arrayElemAt: ['$realTimeStats', 0] } }, + in: { + totalItems: '$$stats.totalItems', + completedItems: '$$stats.completedItems', + errorItems: '$$stats.errorItems' + } + } + }, + else: '$statistics' + } + } } }, { @@ -408,7 +444,7 @@ export class EvaluationTaskService { teamId: string, offset: number = 0, pageSize: number = 20 - ): Promise { + ): Promise<{ items: EvaluationItemDisplayType[]; total: number }> { const evaluation = await this.getEvaluation(evalId, teamId); const skip = offset; @@ -552,7 +588,13 @@ export class EvaluationTaskService { static async getEvaluationStats( evalId: string, teamId: string - ): Promise { + ): Promise<{ + total: number; + completed: number; + evaluating: number; + queuing: number; + error: number; + }> { const evaluation = await this.getEvaluation(evalId, teamId); const [statsResult] = await MongoEvalItem.aggregate([ @@ -572,33 +614,18 @@ export class EvaluationTaskService { }, error: { $sum: { $cond: [{ $eq: ['$status', EvaluationStatusEnum.error] }, 1, 0] } - }, - avgScore: { - $avg: { - $cond: [ - { - $and: [ - { $eq: ['$status', EvaluationStatusEnum.completed] }, - { $ne: ['$evaluatorOutput.data.score', null] } - ] - }, - '$evaluatorOutput.data.score', - null - ] - } } } } ]); // Return stats with defaults for empty results - const result: EvaluationStatsResponse = { + const result = { total: statsResult?.total || 0, completed: statsResult?.completed || 0, evaluating: statsResult?.evaluating || 0, queuing: statsResult?.queuing || 0, - error: statsResult?.error || 0, - avgScore: statsResult?.avgScore ? Math.round(statsResult.avgScore * 100) / 100 : undefined + error: statsResult?.error || 0 }; return result; @@ -856,13 +883,21 @@ export class EvaluationTaskService { return itemsToRetry.length; }; - return await mongoSessionRun(retryItems); + const retriedCount = await mongoSessionRun(retryItems); + + return retriedCount; } static async getEvaluationItemResult( itemId: string, teamId: string - ): Promise { + ): Promise<{ + item: EvaluationItemSchemaType; + dataItem: EvaluationDataItemType; + response?: string; + result?: MetricResult; + score?: number; + }> { const item = await this.getEvaluationItem(itemId, teamId); return { @@ -886,7 +921,7 @@ export class EvaluationTaskService { page?: number; pageSize?: number; } = {} - ): Promise { + ): Promise<{ items: EvaluationItemDisplayType[]; total: number }> { const evaluation = await this.getEvaluation(evalId, teamId); const { status, hasError, scoreRange, keyword, page = 1, pageSize = 20 } = options; @@ -950,7 +985,7 @@ export class EvaluationTaskService { evalId: string, teamId: string, format: 'csv' | 'json' = 'json' - ): Promise { + ): Promise<{ results: Buffer; total: number }> { const evaluation = await this.getEvaluation(evalId, teamId); const items = await MongoEvalItem.find({ evalId: evaluation._id }) @@ -1022,7 +1057,7 @@ export class EvaluationTaskService { offset?: number; pageSize?: number; } - ): Promise { + ): Promise { const { evalId, status, keyword, offset = 0, pageSize = 20 } = options; // Verify team access to the evaluation task @@ -1058,17 +1093,15 @@ export class EvaluationTaskService { }, errorItems: { $sum: { $cond: [{ $eq: ['$status', EvaluationStatusEnum.error] }, 1, 0] } - }, - avgScore: { $avg: '$evaluatorOutput.data.score' } + } } }, { $addFields: { dataItemId: '$_id', - 'summary.totalItems': '$totalItems', - 'summary.completedItems': '$completedItems', - 'summary.errorItems': '$errorItems', - 'summary.avgScore': { $round: ['$avgScore', 2] } + 'statistics.totalItems': '$totalItems', + 'statistics.completedItems': '$completedItems', + 'statistics.errorItems': '$errorItems' } }, { $sort: { totalItems: -1 as const, _id: 1 as const } } @@ -1094,11 +1127,10 @@ export class EvaluationTaskService { } } }, - summary: { + statistics: { totalItems: '$totalItems', completedItems: '$completedItems', - errorItems: '$errorItems', - avgScore: '$summary.avgScore' + errorItems: '$errorItems' } } } @@ -1119,12 +1151,12 @@ export class EvaluationTaskService { dataItemId: string, teamId: string, evalId: string - ): Promise { + ): Promise<{ deletedCount: number }> { // Verify team access to the evaluation task await this.getEvaluation(evalId, teamId); const filter: any = { - 'dataItem._id': dataItemId, + 'dataItem._id': new Types.ObjectId(dataItemId), evalId: new Types.ObjectId(evalId) }; @@ -1167,12 +1199,12 @@ export class EvaluationTaskService { dataItemId: string, teamId: string, evalId: string - ): Promise { + ): Promise<{ retriedCount: number }> { // Verify evaluation access first await this.getEvaluation(evalId, teamId); const filter: any = { - 'dataItem._id': dataItemId, + 'dataItem._id': new Types.ObjectId(dataItemId), evalId: new Types.ObjectId(evalId), status: EvaluationStatusEnum.error }; @@ -1253,7 +1285,7 @@ export class EvaluationTaskService { }, teamId: string, evalId: string - ): Promise { + ): Promise<{ updatedCount: number }> { // Verify evaluation access first await this.getEvaluation(evalId, teamId); @@ -1264,7 +1296,7 @@ export class EvaluationTaskService { } const filter: any = { - 'dataItem._id': dataItemId, + 'dataItem._id': new Types.ObjectId(dataItemId), evalId: new Types.ObjectId(evalId) }; @@ -1281,7 +1313,7 @@ export class EvaluationTaskService { teamId: string, evalId: string, format: 'csv' | 'json' = 'json' - ): Promise { + ): Promise<{ results: Buffer; totalItems: number }> { // Get evaluation config for metric names const evaluation = await this.getEvaluation(evalId, teamId); diff --git a/packages/service/core/evaluation/task/processor.ts b/packages/service/core/evaluation/task/processor.ts index 8d6aea7839d9..eb819a8ba8c3 100644 --- a/packages/service/core/evaluation/task/processor.ts +++ b/packages/service/core/evaluation/task/processor.ts @@ -214,16 +214,6 @@ const finishEvaluationTask = async (evalId: string) => { }, queuingCount: { $sum: { $cond: [{ $eq: ['$status', EvaluationStatusEnum.queuing] }, 1, 0] } - }, - // Calculate average score only for successfully completed items - avgScore: { - $avg: { - $cond: [ - { $eq: ['$status', EvaluationStatusEnum.completed] }, - '$evaluatorOutput?.data?.score', - null - ] - } } } } @@ -240,43 +230,42 @@ const finishEvaluationTask = async (evalId: string) => { completedCount = 0, errorCount = 0, evaluatingCount = 0, - queuingCount = 0, - avgScore = 0 + queuingCount = 0 } = statsResult; // Check if truly completed const pendingCount = evaluatingCount + queuingCount; + + // Task status is always completed when all items are finished + let taskStatus: EvaluationStatusEnum = EvaluationStatusEnum.completed; if (pendingCount > 0) { addLog.debug( `[Evaluation] Task not yet completed: ${evalId}, pending items: ${pendingCount}` ); - return; // Still have incomplete items, do not update task status + taskStatus = EvaluationStatusEnum.evaluating; } - // Task status is always completed when all items are finished - const taskStatus = EvaluationStatusEnum.completed; - // Update task status with statistical fields - await MongoEvaluation.updateOne( - { _id: new Types.ObjectId(evalId) }, - { - $set: { - finishTime: new Date(), - avgScore: avgScore != null ? Math.round(avgScore * 100) / 100 : undefined, - status: taskStatus, - // Use statistics object to store execution statistics - statistics: { - totalItems: totalCount, - completedItems: completedCount, - errorItems: errorCount - } - } + const updateFields: any = { + status: taskStatus, + // Use statistics object to store execution statistics + statistics: { + totalItems: totalCount, + completedItems: completedCount, + errorItems: errorCount } - ); + }; + + // Only set finishTime if the task is actually completed + if (taskStatus === EvaluationStatusEnum.completed) { + updateFields.finishTime = new Date(); + } + + await MongoEvaluation.updateOne({ _id: new Types.ObjectId(evalId) }, { $set: updateFields }); addLog.debug( `[Evaluation] Task completed: ${evalId}, status: ${taskStatus}, total: ${totalCount}, ` + - `success: ${completedCount}, failed: ${errorCount}, avg score: ${avgScore ? avgScore.toFixed(2) : 'N/A'}` + `success: ${completedCount}, failed: ${errorCount}` ); } catch (error) { addLog.error(`[Evaluation] Error occurred while completing task: ${evalId}`, error); @@ -672,14 +661,7 @@ const evaluationItemProcessor = async (job: Job) => { // After try-catch, check if all evaluation items are completed try { - const pendingCount = await MongoEvalItem.countDocuments({ - evalId: new Types.ObjectId(evalId), - status: { $in: [EvaluationStatusEnum.queuing, EvaluationStatusEnum.evaluating] } - }); - - if (pendingCount === 0) { - await finishEvaluationTask(evalId); - } + await finishEvaluationTask(evalId); } catch (finishError) { addLog.error( `[Evaluation] Error occurred while checking task completion status: ${evalId}`, diff --git a/projects/app/src/pageComponents/dashboard/evaluation/DetailModal.tsx b/projects/app/src/pageComponents/dashboard/evaluation/DetailModal.tsx index 7175c60b2e5a..08ec378b1e15 100644 --- a/projects/app/src/pageComponents/dashboard/evaluation/DetailModal.tsx +++ b/projects/app/src/pageComponents/dashboard/evaluation/DetailModal.tsx @@ -281,7 +281,7 @@ const EvaluationDetailModal = ({ fontSize={14} color={'myGray.900'} fontWeight={'medium'} - >{`${t('dashboard_evaluation:data_list')}: ${evalDetail?.totalCount}`} + >{`${t('dashboard_evaluation:data_list')}: ${evalDetail?.totalCount}`}