From f4132e2dc8faf608fc8dcf94531e165a7cbeacdd Mon Sep 17 00:00:00 2001 From: chanzhi82020 Date: Mon, 8 Sep 2025 19:16:45 +0800 Subject: [PATCH] feat: add error code for eval task --- .../global/common/error/code/evaluation.ts | 246 +++++++++++- packages/global/core/evaluation/type.d.ts | 4 +- packages/global/core/evaluation/utils.ts | 46 --- packages/service/core/evaluation/common.ts | 126 +----- .../core/evaluation/dataset/dataQualityMq.ts | 6 +- .../evaluation/dataset/dataSynthesizeMq.ts | 6 +- .../evaluation/dataset/smartGenerateMq.ts | 6 +- .../service/core/evaluation/task/index.ts | 118 +++++- packages/service/core/evaluation/task/mq.ts | 88 ++++- .../service/core/evaluation/task/processor.ts | 366 ++++++++++++------ .../service/core/evaluation/utils/index.ts | 142 +++++++ .../{dataset => utils}/jobCleanup.ts | 0 .../support/permission/evaluation/auth.ts | 35 +- packages/service/type/env.d.ts | 1 + packages/web/i18n/en/evaluation.json | 48 ++- packages/web/i18n/zh-CN/evaluation.json | 48 ++- packages/web/i18n/zh-Hant/evaluation.json | 48 ++- projects/app/.env.template | 1 + .../pages/api/core/evaluation/task/create.ts | 40 +- .../api/core/evaluation/task/item/update.ts | 2 +- .../pages/api/core/evaluation/task/update.ts | 22 +- .../api/core/evaluation/task/create.test.ts | 35 +- .../evaluation/dataset/jobCleanup.test.ts | 4 +- .../service/core/evaluation/task.test.ts | 82 ++-- 24 files changed, 1100 insertions(+), 420 deletions(-) delete mode 100644 packages/global/core/evaluation/utils.ts create mode 100644 packages/service/core/evaluation/utils/index.ts rename packages/service/core/evaluation/{dataset => utils}/jobCleanup.ts (100%) diff --git a/packages/global/common/error/code/evaluation.ts b/packages/global/common/error/code/evaluation.ts index 367aa9420450..3af6a872ca44 100644 --- a/packages/global/common/error/code/evaluation.ts +++ b/packages/global/common/error/code/evaluation.ts @@ -5,11 +5,63 @@ import { i18nT } from '../../../../web/i18n/utils'; export enum EvaluationErrEnum { // Dataset related errors evalDatasetCollectionNotFound = 'evaluationDatasetCollectionNotFound', - evalDatasetDataNotFound = 'evaluationDatasetDataNotFound' + evalDatasetDataNotFound = 'evaluationDatasetDataNotFound', + + // Validation errors (510002-510049) + evalNameRequired = 'evaluationNameRequired', + evalNameTooLong = 'evaluationNameTooLong', + evalDescriptionTooLong = 'evaluationDescriptionTooLong', + evalDatasetIdRequired = 'evaluationDatasetIdRequired', + evalTargetRequired = 'evaluationTargetRequired', + evalTargetInvalidConfig = 'evaluationTargetInvalidConfig', + evalTargetAppIdMissing = 'evaluationTargetAppIdMissing', + evalEvaluatorsRequired = 'evaluationEvaluatorsRequired', + evalEvaluatorInvalidConfig = 'evaluationEvaluatorInvalidConfig', + evalCollectionIdRequired = 'evaluationCollectionIdRequired', + evalUserInputRequired = 'evaluationUserInputRequired', + evalExpectedOutputRequired = 'evaluationExpectedOutputRequired', + evalInvalidPageNumber = 'evaluationInvalidPageNumber', + evalInvalidPageSize = 'evaluationInvalidPageSize', + evalMetricNameRequired = 'evaluationMetricNameRequired', + evalMetricNameTooLong = 'evaluationMetricNameTooLong', + evalMetricPromptRequired = 'evaluationMetricPromptRequired', + evalMetricPromptTooLong = 'evaluationMetricPromptTooLong', + evalInvalidFormat = 'evaluationInvalidFormat', + evalCountMustBePositive = 'evaluationCountMustBePositive', + evalInvalidContext = 'evaluationInvalidContext', + evalInvalidRetrievalContext = 'evaluationInvalidRetrievalContext', + + // Authentication errors (510050-510069) + evalInsufficientPermission = 'evaluationInsufficientPermission', + evalAppNotFound = 'evaluationAppNotFound', + evalAppNoPermission = 'evaluationAppNoPermission', + evalTaskNotFound = 'evaluationTaskNotFound', + evalItemNotFound = 'evaluationItemNotFound', + evalMetricNotFound = 'evaluationMetricNotFound', + evalMetricBuiltinCannotModify = 'evaluationMetricBuiltinCannotModify', + evalMetricBuiltinCannotDelete = 'evaluationMetricBuiltinCannotDelete', + + // Business logic errors (510070-510099) + evalInvalidStatus = 'evaluationInvalidStatus', + evalOnlyQueuingCanStart = 'evaluationOnlyQueuingCanStart', + evalOnlyRunningCanStop = 'evaluationOnlyRunningCanStop', + evalOnlyFailedCanRetry = 'evaluationOnlyFailedCanRetry', + evalItemNoErrorToRetry = 'evaluationItemNoErrorToRetry', + evalTargetOutputRequired = 'evaluationTargetOutputRequired', + evalEvaluatorOutputRequired = 'evaluationEvaluatorOutputRequired', + evalDatasetLoadFailed = 'evaluationDatasetLoadFailed', + evalTargetConfigInvalid = 'evaluationTargetConfigInvalid', + evalEvaluatorsConfigInvalid = 'evaluationEvaluatorsConfigInvalid', + evalUnsupportedTargetType = 'evaluationUnsupportedTargetType', + evalAppVersionNotFound = 'evaluationAppVersionNotFound', + evalDuplicateDatasetName = 'evaluationDuplicateDatasetName', + evalNoDataInCollections = 'evaluationNoDataInCollections', + evalUpdateFailed = 'evaluationUpdateFailed', + evalLockAcquisitionFailed = 'evaluationLockAcquisitionFailed' } const evaluationErrList = [ - // Evaluation Dataset related errors + // Dataset related errors { statusText: EvaluationErrEnum.evalDatasetCollectionNotFound, message: i18nT('evaluation:dataset_collection_not_found') @@ -17,6 +69,196 @@ const evaluationErrList = [ { statusText: EvaluationErrEnum.evalDatasetDataNotFound, message: i18nT('evaluation:dataset_data_not_found') + }, + + // Validation errors + { + statusText: EvaluationErrEnum.evalNameRequired, + message: i18nT('evaluation:name_required') + }, + { + statusText: EvaluationErrEnum.evalNameTooLong, + message: i18nT('evaluation:name_too_long') + }, + { + statusText: EvaluationErrEnum.evalDescriptionTooLong, + message: i18nT('evaluation:description_too_long') + }, + { + statusText: EvaluationErrEnum.evalDatasetIdRequired, + message: i18nT('evaluation:dataset_id_required') + }, + { + statusText: EvaluationErrEnum.evalTargetRequired, + message: i18nT('evaluation:target_required') + }, + { + statusText: EvaluationErrEnum.evalTargetInvalidConfig, + message: i18nT('evaluation:target_invalid_config') + }, + { + statusText: EvaluationErrEnum.evalTargetAppIdMissing, + message: i18nT('evaluation:target_app_id_missing') + }, + { + statusText: EvaluationErrEnum.evalEvaluatorsRequired, + message: i18nT('evaluation:evaluators_required') + }, + { + statusText: EvaluationErrEnum.evalEvaluatorInvalidConfig, + message: i18nT('evaluation:evaluator_invalid_config') + }, + { + statusText: EvaluationErrEnum.evalCollectionIdRequired, + message: i18nT('evaluation:collection_id_required') + }, + { + statusText: EvaluationErrEnum.evalUserInputRequired, + message: i18nT('evaluation:user_input_required') + }, + { + statusText: EvaluationErrEnum.evalExpectedOutputRequired, + message: i18nT('evaluation:expected_output_required') + }, + { + statusText: EvaluationErrEnum.evalInvalidPageNumber, + message: i18nT('evaluation:invalid_page_number') + }, + { + statusText: EvaluationErrEnum.evalInvalidPageSize, + message: i18nT('evaluation:invalid_page_size') + }, + { + statusText: EvaluationErrEnum.evalMetricNameRequired, + message: i18nT('evaluation:metric_name_required') + }, + { + statusText: EvaluationErrEnum.evalMetricNameTooLong, + message: i18nT('evaluation:metric_name_too_long') + }, + { + statusText: EvaluationErrEnum.evalMetricPromptRequired, + message: i18nT('evaluation:metric_prompt_required') + }, + { + statusText: EvaluationErrEnum.evalMetricPromptTooLong, + message: i18nT('evaluation:metric_prompt_too_long') + }, + { + statusText: EvaluationErrEnum.evalInvalidFormat, + message: i18nT('evaluation:invalid_format') + }, + { + statusText: EvaluationErrEnum.evalCountMustBePositive, + message: i18nT('evaluation:count_must_be_positive') + }, + { + statusText: EvaluationErrEnum.evalInvalidContext, + message: i18nT('evaluation:invalid_context') + }, + { + statusText: EvaluationErrEnum.evalInvalidRetrievalContext, + message: i18nT('evaluation:invalid_retrieval_context') + }, + + // Authentication errors + { + statusText: EvaluationErrEnum.evalInsufficientPermission, + message: i18nT('evaluation:insufficient_permission') + }, + { + statusText: EvaluationErrEnum.evalAppNotFound, + message: i18nT('evaluation:app_not_found') + }, + { + statusText: EvaluationErrEnum.evalAppNoPermission, + message: i18nT('evaluation:app_no_permission') + }, + { + statusText: EvaluationErrEnum.evalTaskNotFound, + message: i18nT('evaluation:task_not_found') + }, + { + statusText: EvaluationErrEnum.evalItemNotFound, + message: i18nT('evaluation:item_not_found') + }, + { + statusText: EvaluationErrEnum.evalMetricNotFound, + message: i18nT('evaluation:metric_not_found') + }, + { + statusText: EvaluationErrEnum.evalMetricBuiltinCannotModify, + message: i18nT('evaluation:metric_builtin_cannot_modify') + }, + { + statusText: EvaluationErrEnum.evalMetricBuiltinCannotDelete, + message: i18nT('evaluation:metric_builtin_cannot_delete') + }, + + // Business logic errors + { + statusText: EvaluationErrEnum.evalInvalidStatus, + message: i18nT('evaluation:invalid_status') + }, + { + statusText: EvaluationErrEnum.evalOnlyQueuingCanStart, + message: i18nT('evaluation:only_queuing_can_start') + }, + { + statusText: EvaluationErrEnum.evalOnlyRunningCanStop, + message: i18nT('evaluation:only_running_can_stop') + }, + { + statusText: EvaluationErrEnum.evalOnlyFailedCanRetry, + message: i18nT('evaluation:only_failed_can_retry') + }, + { + statusText: EvaluationErrEnum.evalItemNoErrorToRetry, + message: i18nT('evaluation:item_no_error_to_retry') + }, + { + statusText: EvaluationErrEnum.evalTargetOutputRequired, + message: i18nT('evaluation:target_output_required') + }, + { + statusText: EvaluationErrEnum.evalEvaluatorOutputRequired, + message: i18nT('evaluation:evaluator_output_required') + }, + { + statusText: EvaluationErrEnum.evalDatasetLoadFailed, + message: i18nT('evaluation:dataset_load_failed') + }, + { + statusText: EvaluationErrEnum.evalTargetConfigInvalid, + message: i18nT('evaluation:target_config_invalid') + }, + { + statusText: EvaluationErrEnum.evalEvaluatorsConfigInvalid, + message: i18nT('evaluation:evaluators_config_invalid') + }, + { + statusText: EvaluationErrEnum.evalUnsupportedTargetType, + message: i18nT('evaluation:unsupported_target_type') + }, + { + statusText: EvaluationErrEnum.evalAppVersionNotFound, + message: i18nT('evaluation:app_version_not_found') + }, + { + statusText: EvaluationErrEnum.evalDuplicateDatasetName, + message: i18nT('evaluation:duplicate_dataset_name') + }, + { + statusText: EvaluationErrEnum.evalNoDataInCollections, + message: i18nT('evaluation:no_data_in_collections') + }, + { + statusText: EvaluationErrEnum.evalUpdateFailed, + message: i18nT('evaluation:update_failed') + }, + { + statusText: EvaluationErrEnum.evalLockAcquisitionFailed, + message: i18nT('evaluation:lock_acquisition_failed') } ]; diff --git a/packages/global/core/evaluation/type.d.ts b/packages/global/core/evaluation/type.d.ts index 0f76aeccf845..cb487e8e19e3 100644 --- a/packages/global/core/evaluation/type.d.ts +++ b/packages/global/core/evaluation/type.d.ts @@ -84,8 +84,8 @@ export interface TargetInput { } export interface TargetOutput { - actualOutput: string; - retrievalContext?: string[]; + [EvalDatasetDataKeyEnum.ActualOutput]: string; + [EvalDatasetDataKeyEnum.RetrievalContext]?: string[]; usage?: any; responseTime: number; } diff --git a/packages/global/core/evaluation/utils.ts b/packages/global/core/evaluation/utils.ts deleted file mode 100644 index 92ebb5bf5c08..000000000000 --- a/packages/global/core/evaluation/utils.ts +++ /dev/null @@ -1,46 +0,0 @@ -export interface ValidationResult { - success: boolean; - message?: string; -} - -export interface EvaluationValidationParams { - name?: string; - description?: string; -} - -export interface EvaluationValidationOptions { - namePrefix?: string; // e.g., 'Dataset', 'Metric', 'Evaluation' -} - -export function validateEvaluationParams( - params: EvaluationValidationParams, - options: EvaluationValidationOptions = {} -): ValidationResult { - const { name, description } = params; - const { namePrefix = 'Name' } = options; - - if (name !== undefined) { - if (!name || !name.trim()) { - return { - success: false, - message: `${namePrefix} name is required` - }; - } - - if (name.length > 100) { - return { - success: false, - message: `${namePrefix} name cannot exceed 100 characters` - }; - } - } - - if (description !== undefined && description && description.length > 100) { - return { - success: false, - message: 'Description cannot exceed 100 characters' - }; - } - - return { success: true }; -} diff --git a/packages/service/core/evaluation/common.ts b/packages/service/core/evaluation/common.ts index 6e68301ec387..7e6c5387c755 100644 --- a/packages/service/core/evaluation/common.ts +++ b/packages/service/core/evaluation/common.ts @@ -21,8 +21,8 @@ import { MongoEvalDatasetData } from './dataset/evalDatasetDataSchema'; import { MongoResourcePermission } from '../../support/permission/schema'; import { getGroupsByTmbId } from '../../support/permission/memberGroup/controllers'; import { getOrgIdSetWithParentByTmbId } from '../../support/permission/org/controllers'; +import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation'; -// Generic validation functions removed - replaced with resource-specific functions below export const buildListQuery = ( teamId: string, searchKey?: string, @@ -38,27 +38,13 @@ export const buildListQuery = ( return filter; }; -// Generic list validation removed - replaced with resource-specific functions below + export const buildPaginationOptions = (page: number = 1, pageSize: number = 20) => ({ skip: (page - 1) * pageSize, limit: pageSize, sort: { createTime: -1 as const } }); -export const checkUpdateResult = (result: any, resourceName: string = 'Resource') => { - if (result.matchedCount === 0) { - throw new Error(`${resourceName} not found`); - } -}; -export const checkDeleteResult = (result: any, resourceName: string = 'Resource') => { - if (result.deletedCount === 0) { - throw new Error(`${resourceName} not found`); - } -}; - -/** - * 获取用户的评估权限聚合信息(用于列表查询) - */ export const getEvaluationPermissionAggregation = async ( auth: AuthModeType ): Promise<{ @@ -69,7 +55,7 @@ export const getEvaluationPermissionAggregation = async ( myGroupMap: Map; myOrgSet: Set; }> => { - // Auth user permission - 支持API Key和Token认证 + // Auth user permission - supports API Key and Token authentication const { tmbId, teamId, @@ -114,12 +100,8 @@ export const getEvaluationPermissionAggregation = async ( }; }; -// ================ 评估模块专用权限验证函数 ================ +// ================ Evaluation module authorization functions ================ -/** - * 验证评估任务创建权限 - * 包含: 团队创建权限 + target关联APP读权限 - */ export const authEvaluationTaskCreate = async ( target: EvalTarget, auth: AuthModeType @@ -134,12 +116,12 @@ export const authEvaluationTaskCreate = async ( if (target.type == 'workflow') { if (!target.config?.appId) { - return Promise.reject('Invalid target configuration: missing appId'); + return Promise.reject(EvaluationErrEnum.evalTargetAppIdMissing); } await authApp({ ...auth, appId: target.config.appId, - per: ReadPermissionVal // APP需要读权限才能被评估调用 + per: ReadPermissionVal }); } @@ -149,9 +131,6 @@ export const authEvaluationTaskCreate = async ( }; }; -/** - * 验证评估任务读取权限 - */ export const authEvaluationTaskRead = async ( evaluationId: string, auth: AuthModeType @@ -169,9 +148,6 @@ export const authEvaluationTaskRead = async ( return { evaluation, teamId, tmbId }; }; -/** - * 验证评估任务写入权限 - */ export const authEvaluationTaskWrite = async ( evaluationId: string, auth: AuthModeType @@ -189,10 +165,6 @@ export const authEvaluationTaskWrite = async ( return { evaluation, teamId, tmbId }; }; -/** - * 验证评估任务执行权限 - * 包含: 评估写权限 + target关联APP读权限 - */ export const authEvaluationTaskExecution = async ( evaluationId: string, auth: AuthModeType @@ -201,22 +173,20 @@ export const authEvaluationTaskExecution = async ( teamId: string; tmbId: string; }> => { - // 验证评估任务的写权限并获取详情 const { evaluation, teamId, tmbId } = await authEvaluation({ ...auth, evaluationId, per: WritePermissionVal }); - // 验证target关联APP的读权限 if (evaluation.target.type == 'workflow') { if (!evaluation.target.config?.appId) { - return Promise.reject('Invalid target configuration: missing appId'); + return Promise.reject(EvaluationErrEnum.evalTargetAppIdMissing); } await authApp({ ...auth, appId: evaluation.target.config.appId, - per: ReadPermissionVal // APP需要读权限才能被评估调用 + per: ReadPermissionVal }); } @@ -227,11 +197,8 @@ export const authEvaluationTaskExecution = async ( }; }; -// ================ 评估项目(EvaluationItem)专用权限验证函数 ================ +// ================ Evaluation item authorization functions ================ -/** - * 验证评估项目读取权限 - */ export const authEvaluationItemRead = async ( evalItemId: string, auth: AuthModeType @@ -241,13 +208,11 @@ export const authEvaluationItemRead = async ( teamId: string; tmbId: string; }> => { - // 根据evalItemId获取完整的evalItem信息 const evaluationItem = await MongoEvalItem.findById(evalItemId).lean(); if (!evaluationItem) { - throw new Error('Evaluation item not found'); + throw new Error(EvaluationErrEnum.evalItemNotFound); } - // 验证评估任务的读权限并获取evaluation const { teamId, tmbId, evaluation } = await authEvaluationTaskRead(evaluationItem.evalId, auth); return { @@ -258,9 +223,6 @@ export const authEvaluationItemRead = async ( }; }; -/** - * 验证评估项目写入权限 - */ export const authEvaluationItemWrite = async ( evalItemId: string, auth: AuthModeType @@ -270,13 +232,11 @@ export const authEvaluationItemWrite = async ( teamId: string; tmbId: string; }> => { - // 根据evalItemId获取完整的evalItem信息 const evaluationItem = await MongoEvalItem.findById(evalItemId).lean(); if (!evaluationItem) { - throw new Error('Evaluation item not found'); + throw new Error(EvaluationErrEnum.evalItemNotFound); } - // 验证评估任务的写权限并获取evaluation const { teamId, tmbId, evaluation } = await authEvaluationTaskWrite(evaluationItem.evalId, auth); return { @@ -287,9 +247,6 @@ export const authEvaluationItemWrite = async ( }; }; -/** - * 验证评估项目重试权限 - */ export const authEvaluationItemRetry = async ( evalItemId: string, auth: AuthModeType @@ -299,22 +256,17 @@ export const authEvaluationItemRetry = async ( teamId: string; tmbId: string; }> => { - // 重试权限等同于写入权限 return await authEvaluationItemWrite(evalItemId, auth); }; -// ================ 评估数据集(EvaluationDataset)专用权限验证函数 ================ +// ================ Evaluation dataset authorization functions ================ -/** - * 验证评估数据集创建权限 - */ export const authEvaluationDatasetCreate = async ( auth: AuthModeType ): Promise<{ teamId: string; tmbId: string; }> => { - // 评估数据集创建需要团队评估创建权限 const { teamId, tmbId } = await authUserPer({ ...auth, per: TeamEvaluationCreatePermissionVal @@ -322,10 +274,6 @@ export const authEvaluationDatasetCreate = async ( return { teamId, tmbId }; }; - -/** - * 验证评估数据集读取权限 - */ export const authEvaluationDatasetRead = async ( datasetId: string, auth: AuthModeType @@ -343,9 +291,6 @@ export const authEvaluationDatasetRead = async ( return { teamId, tmbId, datasetId }; }; -/** - * 验证评估数据集写入权限 - */ export const authEvaluationDatasetWrite = async ( datasetId: string, auth: AuthModeType @@ -363,9 +308,6 @@ export const authEvaluationDatasetWrite = async ( return { teamId, tmbId, datasetId }; }; -/** - * 验证从知识库生成评估数据集的权限 - */ export const authEvaluationDatasetGenFromKnowledgeBase = async ( datasetId: string, kbCollectionIds: string[], @@ -376,7 +318,6 @@ export const authEvaluationDatasetGenFromKnowledgeBase = async ( }> => { const { teamId, tmbId } = await authEvaluationDatasetRead(datasetId, auth); - // 验证知识库的读权限 await Promise.all( kbCollectionIds.map((collectionId) => authDatasetCollection({ @@ -393,18 +334,14 @@ export const authEvaluationDatasetGenFromKnowledgeBase = async ( }; }; -// ================ 评估指标(EvaluationMetric)专用权限验证函数 ================ +// ================ Evaluation metric authorization functions ================ -/** - * 验证评估指标创建权限 - */ export const authEvaluationMetricCreate = async ( auth: AuthModeType ): Promise<{ teamId: string; tmbId: string; }> => { - // 评估指标创建需要团队评估创建权限 const { teamId, tmbId } = await authUserPer({ ...auth, per: TeamEvaluationCreatePermissionVal @@ -413,9 +350,6 @@ export const authEvaluationMetricCreate = async ( return { teamId, tmbId }; }; -/** - * 验证评估指标读取权限 - */ export const authEvaluationMetricRead = async ( metricId: string, auth: AuthModeType @@ -432,10 +366,6 @@ export const authEvaluationMetricRead = async ( return { teamId, tmbId, metricId }; }; - -/** - * 验证评估指标写入权限 - */ export const authEvaluationMetricWrite = async ( metricId: string, auth: AuthModeType @@ -453,11 +383,7 @@ export const authEvaluationMetricWrite = async ( return { teamId, tmbId, metricId }; }; -// ================ 评估数据集数据(EvaluationDatasetData)专用权限验证函数 ================ - -/** - * 验证评估数据集数据读取权限 - */ +// ================ Evaluation dataset data authorization functions ================ export const authEvaluationDatasetDataRead = async ( collectionId: string, auth: AuthModeType @@ -466,15 +392,11 @@ export const authEvaluationDatasetDataRead = async ( tmbId: string; collectionId: string; }> => { - // 数据读取需要数据集的读权限 const { teamId, tmbId } = await authEvaluationDatasetRead(collectionId, auth); return { teamId, tmbId, collectionId }; }; -/** - * 验证评估数据集数据写入权限 - */ export const authEvaluationDatasetDataWrite = async ( collectionId: string, auth: AuthModeType @@ -483,15 +405,11 @@ export const authEvaluationDatasetDataWrite = async ( tmbId: string; collectionId: string; }> => { - // 数据写入需要数据集的写权限 const { teamId, tmbId } = await authEvaluationDatasetWrite(collectionId, auth); return { teamId, tmbId, collectionId }; }; -/** - * 验证评估数据集数据创建权限 - */ export const authEvaluationDatasetDataCreate = async ( collectionId: string, auth: AuthModeType @@ -500,13 +418,9 @@ export const authEvaluationDatasetDataCreate = async ( tmbId: string; collectionId: string; }> => { - // 数据创建需要数据集的写权限 return await authEvaluationDatasetDataWrite(collectionId, auth); }; -/** - * 验证评估数据集数据删除权限 - */ export const authEvaluationDatasetDataDelete = async ( collectionId: string, auth: AuthModeType @@ -515,13 +429,9 @@ export const authEvaluationDatasetDataDelete = async ( tmbId: string; collectionId: string; }> => { - // 数据删除需要数据集的写权限 return await authEvaluationDatasetDataWrite(collectionId, auth); }; -/** - * 验证评估数据集数据更新权限 - */ export const authEvaluationDatasetDataUpdate = async ( collectionId: string, auth: AuthModeType @@ -530,13 +440,9 @@ export const authEvaluationDatasetDataUpdate = async ( tmbId: string; collectionId: string; }> => { - // 数据更新需要数据集的写权限 return await authEvaluationDatasetDataWrite(collectionId, auth); }; -/** - * 通过数据项ID验证评估数据集数据更新权限 - */ export const authEvaluationDatasetDataUpdateById = async ( dataId: string, auth: AuthModeType @@ -545,13 +451,11 @@ export const authEvaluationDatasetDataUpdateById = async ( tmbId: string; collectionId: string; }> => { - // 根据dataId获取collectionId const dataItem = await MongoEvalDatasetData.findById(dataId).select('datasetId').lean(); if (!dataItem) { - throw new Error('Dataset data not found'); + throw new Error(EvaluationErrEnum.evalDatasetDataNotFound); } - // 使用collectionId进行权限验证 const collectionId = String(dataItem.datasetId); return await authEvaluationDatasetDataUpdate(collectionId, auth); }; diff --git a/packages/service/core/evaluation/dataset/dataQualityMq.ts b/packages/service/core/evaluation/dataset/dataQualityMq.ts index 9771af64d84d..4f3420f8a0df 100644 --- a/packages/service/core/evaluation/dataset/dataQualityMq.ts +++ b/packages/service/core/evaluation/dataset/dataQualityMq.ts @@ -1,7 +1,11 @@ import { getQueue, getWorker, QueueNames } from '../../../common/bullmq'; import { type Processor } from 'bullmq'; import { addLog } from '../../../common/system/log'; -import { createJobCleaner, type JobCleanupResult, type JobCleanupOptions } from './jobCleanup'; +import { + createJobCleaner, + type JobCleanupResult, + type JobCleanupOptions +} from '../utils/jobCleanup'; export type EvalDatasetDataQualityData = { dataId: string; diff --git a/packages/service/core/evaluation/dataset/dataSynthesizeMq.ts b/packages/service/core/evaluation/dataset/dataSynthesizeMq.ts index 0342f4cb415e..7c4316f57b36 100644 --- a/packages/service/core/evaluation/dataset/dataSynthesizeMq.ts +++ b/packages/service/core/evaluation/dataset/dataSynthesizeMq.ts @@ -1,7 +1,11 @@ import { getQueue, getWorker, QueueNames } from '../../../common/bullmq'; import { type Processor } from 'bullmq'; import { addLog } from '../../../common/system/log'; -import { createJobCleaner, type JobCleanupResult, type JobCleanupOptions } from './jobCleanup'; +import { + createJobCleaner, + type JobCleanupResult, + type JobCleanupOptions +} from '../utils/jobCleanup'; export type EvalDatasetDataSynthesizeData = { dataId: string; diff --git a/packages/service/core/evaluation/dataset/smartGenerateMq.ts b/packages/service/core/evaluation/dataset/smartGenerateMq.ts index 19f7fada2fd8..9eacfc789f86 100644 --- a/packages/service/core/evaluation/dataset/smartGenerateMq.ts +++ b/packages/service/core/evaluation/dataset/smartGenerateMq.ts @@ -1,7 +1,11 @@ import { getQueue, getWorker, QueueNames } from '../../../common/bullmq'; import { type Processor } from 'bullmq'; import { addLog } from '../../../common/system/log'; -import { createJobCleaner, type JobCleanupResult, type JobCleanupOptions } from './jobCleanup'; +import { + createJobCleaner, + type JobCleanupResult, + type JobCleanupOptions +} from '../utils/jobCleanup'; export type EvalDatasetSmartGenerateData = { datasetCollectionIds: string[]; diff --git a/packages/service/core/evaluation/task/index.ts b/packages/service/core/evaluation/task/index.ts index b06a5721b0b0..9a8d61cb4f22 100644 --- a/packages/service/core/evaluation/task/index.ts +++ b/packages/service/core/evaluation/task/index.ts @@ -5,19 +5,20 @@ import type { CreateEvaluationParams, EvaluationItemDisplayType } from '@fastgpt/global/core/evaluation/type'; -import { checkUpdateResult, checkDeleteResult } from '../common'; import { Types } from 'mongoose'; import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; import { evaluationTaskQueue, evaluationItemQueue, removeEvaluationTaskJob, - removeEvaluationItemJobs + removeEvaluationItemJobs, + removeEvaluationItemJobsByItemId } from './mq'; import { createTrainingUsage } from '../../../support/wallet/usage/controller'; import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants'; import { addLog } from '../../../common/system/log'; import { checkTeamAIPoints } from '../../../support/permission/teamLimit'; +import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation'; export class EvaluationTaskService { static async createEvaluation( @@ -57,7 +58,7 @@ export class EvaluationTaskService { teamId: new Types.ObjectId(teamId) }).lean(); if (!evaluation) { - throw new Error('Evaluation not found'); + throw new Error(EvaluationErrEnum.evalTaskNotFound); } return evaluation; } @@ -71,13 +72,31 @@ export class EvaluationTaskService { { _id: new Types.ObjectId(evalId), teamId: new Types.ObjectId(teamId) }, { $set: updates } ); - - checkUpdateResult(result, 'Evaluation'); + if (result.matchedCount === 0) { + throw new Error(EvaluationErrEnum.evalTaskNotFound); + } } static async deleteEvaluation(evalId: string, teamId: string): Promise { // Remove related tasks from queue to prevent further processing - await Promise.all([removeEvaluationTaskJob(evalId), removeEvaluationItemJobs(evalId)]); + const [taskCleanupResult, itemCleanupResult] = await Promise.all([ + removeEvaluationTaskJob(evalId, { + forceCleanActiveJobs: true, + retryAttempts: 3, + retryDelay: 200 + }), + removeEvaluationItemJobs(evalId, { + forceCleanActiveJobs: true, + retryAttempts: 3, + retryDelay: 200 + }) + ]); + + addLog.debug('Queue cleanup completed for evaluation deletion', { + evalId, + taskCleanup: taskCleanupResult, + itemCleanup: itemCleanupResult + }); // Delete all evaluation items for this evaluation task await MongoEvalItem.deleteMany({ evalId: evalId }); @@ -87,7 +106,9 @@ export class EvaluationTaskService { teamId: new Types.ObjectId(teamId) }); - checkDeleteResult(result, 'Evaluation'); + if (result.deletedCount === 0) { + throw new Error(EvaluationErrEnum.evalTaskNotFound); + } addLog.debug(`[Evaluation] Evaluation task deleted including queue cleanup: ${evalId}`); } @@ -267,7 +288,7 @@ export class EvaluationTaskService { const evaluation = await this.getEvaluation(evalId, teamId); if (evaluation.status !== EvaluationStatusEnum.queuing) { - throw new Error('Only queuing evaluations can be started'); + throw new Error(EvaluationErrEnum.evalOnlyQueuingCanStart); } // Update status to processing @@ -290,11 +311,28 @@ export class EvaluationTaskService { if ( ![EvaluationStatusEnum.evaluating, EvaluationStatusEnum.queuing].includes(evaluation.status) ) { - throw new Error('Only running or queuing evaluations can be stopped'); + throw new Error(EvaluationErrEnum.evalOnlyRunningCanStop); } // Remove related tasks from queue - await Promise.all([removeEvaluationTaskJob(evalId), removeEvaluationItemJobs(evalId)]); + const [taskCleanupResult, itemCleanupResult] = await Promise.all([ + removeEvaluationTaskJob(evalId, { + forceCleanActiveJobs: true, + retryAttempts: 3, + retryDelay: 200 + }), + removeEvaluationItemJobs(evalId, { + forceCleanActiveJobs: true, + retryAttempts: 3, + retryDelay: 200 + }) + ]); + + addLog.debug('Queue cleanup completed for evaluation stop', { + evalId, + taskCleanup: taskCleanupResult, + itemCleanup: itemCleanupResult + }); // Update status to error (manually stopped) await MongoEvaluation.updateOne( @@ -395,7 +433,7 @@ export class EvaluationTaskService { const item = await MongoEvalItem.findById(itemId).lean(); if (!item) { - throw new Error('Evaluation item not found'); + throw new Error(EvaluationErrEnum.evalItemNotFound); } await this.getEvaluation(item.evalId, teamId); @@ -412,15 +450,33 @@ export class EvaluationTaskService { const result = await MongoEvalItem.updateOne({ _id: itemId }, { $set: updates }); - checkUpdateResult(result, 'Evaluation item'); + if (result.matchedCount === 0) { + throw new Error(EvaluationErrEnum.evalItemNotFound); + } } static async deleteEvaluationItem(itemId: string, teamId: string): Promise { await this.getEvaluationItem(itemId, teamId); + // Remove related jobs from queue before deleting the item + const cleanupResult = await removeEvaluationItemJobsByItemId(itemId, { + forceCleanActiveJobs: true, + retryAttempts: 3, + retryDelay: 200 + }); + + addLog.debug('Queue cleanup completed for evaluation item deletion', { + itemId, + cleanup: cleanupResult + }); + const result = await MongoEvalItem.deleteOne({ _id: itemId }); - checkDeleteResult(result, 'Evaluation item'); + if (result.deletedCount === 0) { + throw new Error(EvaluationErrEnum.evalItemNotFound); + } + + addLog.debug(`[Evaluation] Evaluation item deleted including queue cleanup: ${itemId}`); } static async retryEvaluationItem(itemId: string, teamId: string): Promise { @@ -428,14 +484,26 @@ export class EvaluationTaskService { // Only completed evaluation items without errors cannot be retried if (item.status === EvaluationStatusEnum.completed && !item.errorMessage) { - throw new Error('Only failed evaluation items can be retried'); + throw new Error(EvaluationErrEnum.evalOnlyFailedCanRetry); } // Check if there is error message or in retryable status if (!item.errorMessage && item.status !== EvaluationStatusEnum.queuing) { - throw new Error('Evaluation item has no error to retry'); + throw new Error(EvaluationErrEnum.evalItemNoErrorToRetry); } + // Remove existing jobs for this item to prevent duplicates + const cleanupResult = await removeEvaluationItemJobsByItemId(itemId, { + forceCleanActiveJobs: true, + retryAttempts: 3, + retryDelay: 200 + }); + + addLog.debug('Queue cleanup completed for evaluation item retry', { + itemId, + cleanup: cleanupResult + }); + // Update status await MongoEvalItem.updateOne( { _id: itemId }, @@ -481,6 +549,26 @@ export class EvaluationTaskService { return 0; } + // Clean up existing jobs for all items that will be retried to prevent duplicates + const itemIds = itemsToRetry.map((item) => item._id.toString()); + const cleanupPromises = itemIds.map((itemId) => + removeEvaluationItemJobsByItemId(itemId, { + forceCleanActiveJobs: true, + retryAttempts: 3, + retryDelay: 200 + }) + ); + + const cleanupResults = await Promise.allSettled(cleanupPromises); + const successfulCleanups = cleanupResults.filter((r) => r.status === 'fulfilled').length; + + addLog.debug('Queue cleanup completed for batch retry failed items', { + evalId, + totalItems: itemsToRetry.length, + successfulCleanups, + failedCleanups: cleanupResults.length - successfulCleanups + }); + // Batch update status await MongoEvalItem.updateMany( { diff --git a/packages/service/core/evaluation/task/mq.ts b/packages/service/core/evaluation/task/mq.ts index 289f8b7d87cf..1d20bccaecd5 100644 --- a/packages/service/core/evaluation/task/mq.ts +++ b/packages/service/core/evaluation/task/mq.ts @@ -1,8 +1,14 @@ +import { addLog } from '../../../common/system/log'; import { getQueue, getWorker, QueueNames } from '../../../common/bullmq'; import type { EvaluationTaskJobData, EvaluationItemJobData } from '@fastgpt/global/core/evaluation/type'; +import { + createJobCleaner, + type JobCleanupResult, + type JobCleanupOptions +} from '../utils/jobCleanup'; export const evaluationTaskQueue = getQueue(QueueNames.evalTask, { defaultJobOptions: { @@ -30,26 +36,76 @@ export const getEvaluationItemWorker = (processor: any) => concurrency: Number(process.env.EVAL_ITEM_CONCURRENCY) || 10 }); -export const removeEvaluationTaskJob = async (evalId: string) => { - try { - const jobs = await evaluationTaskQueue.getJobs(['prioritized', 'waiting', 'delayed']); - const targetJobs = jobs.filter((job) => job.data.evalId === evalId); +export const removeEvaluationTaskJob = async ( + evalId: string, + options?: JobCleanupOptions +): Promise => { + const cleaner = createJobCleaner(options); - await Promise.all(targetJobs.map((job) => job.remove())); - } catch (error) { - console.error('Failed to remove evaluation task jobs:', error); - } + const filterFn = (job: any) => { + return String(job.data?.evalId) === String(evalId); + }; + + const result = await cleaner.cleanAllJobsByFilter( + evaluationTaskQueue, + filterFn, + QueueNames.evalTask + ); + + addLog.debug('Evaluation task jobs cleanup completed', { + evalId, + result + }); + + return result; }; -export const removeEvaluationItemJobs = async (evalId: string) => { - try { - const jobs = await evaluationItemQueue.getJobs(['prioritized', 'waiting', 'delayed']); - const targetJobs = jobs.filter((job) => job.data.evalId === evalId); +export const removeEvaluationItemJobs = async ( + evalId: string, + options?: JobCleanupOptions +): Promise => { + const cleaner = createJobCleaner(options); - await Promise.all(targetJobs.map((job) => job.remove())); - } catch (error) { - console.error('Failed to remove evaluation item jobs:', error); - } + const filterFn = (job: any) => { + return String(job.data?.evalId) === String(evalId); + }; + + const result = await cleaner.cleanAllJobsByFilter( + evaluationItemQueue, + filterFn, + QueueNames.evalTaskItem + ); + + addLog.debug('Evaluation item jobs cleanup completed', { + evalId, + result + }); + + return result; +}; + +export const removeEvaluationItemJobsByItemId = async ( + evalItemId: string, + options?: JobCleanupOptions +): Promise => { + const cleaner = createJobCleaner(options); + + const filterFn = (job: any) => { + return String(job.data?.evalItemId) === String(evalItemId); + }; + + const result = await cleaner.cleanAllJobsByFilter( + evaluationItemQueue, + filterFn, + QueueNames.evalTaskItem + ); + + addLog.debug('Evaluation item jobs cleanup completed for specific item', { + evalItemId, + result + }); + + return result; }; export const getEvaluationQueueStats = async () => { diff --git a/packages/service/core/evaluation/task/processor.ts b/packages/service/core/evaluation/task/processor.ts index f3665a6ccae0..a79eae355b7e 100644 --- a/packages/service/core/evaluation/task/processor.ts +++ b/packages/service/core/evaluation/task/processor.ts @@ -13,12 +13,44 @@ import { Types } from 'mongoose'; import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; import { checkTeamAIPoints } from '../../../support/permission/teamLimit'; import { TeamErrEnum } from '@fastgpt/global/common/error/code/team'; +import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation'; import { concatUsage } from '../../../support/wallet/usage/controller'; import { getErrText } from '@fastgpt/global/common/error/utils'; // Sleep utility function const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); +// Evaluation stage error types +export enum EvaluationStageEnum { + TaskExecute = 'TaskExecute', + EvaluatorExecute = 'EvaluatorExecute', + ResourceCheck = 'ResourceCheck' +} + +// Structured error class for evaluation stages +export class EvaluationStageError extends Error { + public readonly stage: EvaluationStageEnum; + public readonly originalError: any; + public readonly retriable: boolean; + + constructor( + stage: EvaluationStageEnum, + errorMsg: string, + retriable: boolean, + originalError?: any + ) { + super(errorMsg); + this.name = 'EvaluationStageError'; + this.stage = stage; + this.originalError = originalError; + this.retriable = retriable; + } + + toString(): string { + return `[${this.stage}] ${this.message}`; + } +} + // Distributed lock implementation const distributedLocks = new Map(); @@ -42,7 +74,7 @@ const acquireDistributedLock = async ( } if (distributedLocks.has(lockKey)) { - throw new Error(`Failed to acquire lock: ${lockKey}`); + throw new Error(EvaluationErrEnum.evalLockAcquisitionFailed); } // Acquire lock @@ -55,42 +87,104 @@ const acquireDistributedLock = async ( }; }; -// Check if error is retriable -const isRetriableError = (error: any): boolean => { - const retriableErrors = [ +// Enhanced retriable error patterns with categories +const RETRIABLE_ERROR_PATTERNS = { + // Network connectivity issues + network: [ 'NETWORK_ERROR', - 'TIMEOUT', - 'RATE_LIMIT', 'ECONNRESET', 'ENOTFOUND', - 'ECONNREFUSED' - ]; + 'ECONNREFUSED', + 'Connection refused', + 'socket hang up', + 'connect timeout', + 'EHOSTUNREACH', + 'ENETUNREACH' + ], + // Timeout related errors + timeout: ['TIMEOUT', 'timeout', 'ETIMEDOUT', 'Request timeout', 'Connection timeout'], + // Rate limiting and temporary service issues + rateLimit: [ + 'RATE_LIMIT', + 'rate limit', + 'too many requests', + '429', + 'quota exceeded', + 'throttled' + ], + // Temporary server errors + serverError: [ + '502', + '503', + '504', + 'bad gateway', + 'service unavailable', + 'gateway timeout', + 'temporary failure', + 'server overloaded' + ] +}; + +const maxRetries = Number(process.env.EVAL_ITEM_MAX_RETRY) || 3; // Default max retry count +// Enhanced error analysis with category detection +const analyzeError = ( + error: any +): { isRetriable: boolean; category?: string; pattern?: string } => { const errorStr = error?.message || error?.code || String(error); - return retriableErrors.some( - (errType) => errorStr.includes(errType) || error === TeamErrEnum.aiPointsNotEnough - ); -}; + const lowerErrorStr = errorStr.toLowerCase(); -// Handle AI Points insufficient error -const handleAiPointsError = async (evalId: string, error: any) => { - if (error === TeamErrEnum.aiPointsNotEnough) { - await MongoEvaluation.updateOne( - { _id: new Types.ObjectId(evalId) }, - { - $set: { - errorMessage: 'AI Points balance insufficient, evaluation paused', - status: EvaluationStatusEnum.error - } + // Check each category for matches + for (const [category, patterns] of Object.entries(RETRIABLE_ERROR_PATTERNS)) { + for (const pattern of patterns) { + if (lowerErrorStr.includes(pattern.toLowerCase())) { + return { isRetriable: true, category, pattern }; } - ); + } + } - // TODO: Send notification to team - addLog.warn(`[Evaluation] AI Points insufficient, evaluation task paused: ${evalId}`); - return; + // Check HTTP status codes directly + const httpStatusMatch = errorStr.match(/\b(4\d{2}|5\d{2})\b/); + if (httpStatusMatch) { + const statusCode = httpStatusMatch[1]; + // 4xx errors are generally not retriable except 429 + if (statusCode === '429') { + return { isRetriable: true, category: 'rateLimit', pattern: statusCode }; + } + // 5xx errors are generally retriable + if (statusCode.startsWith('5')) { + return { isRetriable: true, category: 'serverError', pattern: statusCode }; + } } - throw error; + return { isRetriable: false }; +}; + +// Backward compatibility function +const matchesRetriablePattern = (error: any): boolean => { + return analyzeError(error).isRetriable; +}; + +// Determine if target execution error should be retriable +const isTargetExecutionRetriable = (error: any): boolean => { + if (error === TeamErrEnum.aiPointsNotEnough) return false; + return matchesRetriablePattern(error); +}; + +// Determine if evaluator execution error should be retriable +const isEvaluatorExecutionRetriable = (error: any): boolean => { + if (error === TeamErrEnum.aiPointsNotEnough) return false; + return matchesRetriablePattern(error); +}; + +// General error retriability check for handleEvalItemError +const isRetriableError = (error: any): boolean => { + // If it's a structured stage error, use its retriable flag + if (error instanceof EvaluationStageError) { + return error.retriable; + } + + return matchesRetriablePattern(error); }; // Complete evaluation task - simplified version based on status enum statistics @@ -209,9 +303,16 @@ const finishEvaluationTask = async (evalId: string) => { // Handle evaluation item error const handleEvalItemError = async (evalItemId: string, evalId: string, error: any) => { - const errorMessage = getErrText(error); + let errorMessage = getErrText(error); + let stage = 'Unknown'; + + // Extract stage and error information from structured errors + if (error instanceof EvaluationStageError) { + stage = error.stage; + errorMessage = `[${stage}] ${error.message}`; + } - // Get current retry count + // Get current evaluation item const evalItem = await MongoEvalItem.findById(evalItemId, 'retry evalId'); if (!evalItem) { addLog.error(`[Evaluation] Evaluation item does not exist: ${evalItemId}`); @@ -219,33 +320,28 @@ const handleEvalItemError = async (evalItemId: string, evalId: string, error: an } const isRetriable = isRetriableError(error); - const currentRetryCount = evalItem.retry || 3; // Default to 3 if not set + const currentRetryCount = evalItem.retry || 0; const newRetryCount = isRetriable ? Math.max(currentRetryCount - 1, 0) : 0; const shouldRetry = isRetriable && newRetryCount > 0; const newStatus = shouldRetry ? EvaluationStatusEnum.queuing : EvaluationStatusEnum.error; - await MongoEvalItem.updateOne( - { _id: new Types.ObjectId(evalItemId) }, - { - $set: { - retry: newRetryCount, - errorMessage, - status: newStatus, - finishTime: newStatus === EvaluationStatusEnum.error ? new Date() : undefined, - // Clear partial results to allow clean retry - ...(shouldRetry && { - targetOutput: null, - evaluatorOutput: null - }) - } - } - ); + // Build retry attempt info for logging + const retryAttempt = maxRetries - currentRetryCount + 1; + + const updateData: any = { + retry: newRetryCount, + errorMessage, + status: newStatus, + finishTime: newStatus === EvaluationStatusEnum.error ? new Date() : undefined + }; + + await MongoEvalItem.updateOne({ _id: new Types.ObjectId(evalItemId) }, updateData); - // Critical fix: Re-enqueue for retry + // Re-enqueue for retry with improved job naming if (shouldRetry) { - const retryDelay = Math.min(1000 * Math.pow(2, 3 - newRetryCount), 30000); // Exponential backoff + const retryDelay = Math.min(1000 * Math.pow(2, maxRetries - newRetryCount), 30000); // Exponential backoff await evaluationItemQueue.add( - `eval_item_retry_${evalItemId}_${Date.now()}`, + `eval_item_${evalItemId}_retry_${retryAttempt}`, { evalId, evalItemId @@ -256,12 +352,12 @@ const handleEvalItemError = async (evalItemId: string, evalId: string, error: an ); addLog.debug( - `[Evaluation] Item requeued for retry: ${evalItemId}, remaining: ${newRetryCount}, delay: ${retryDelay}ms` + `[Evaluation] Item requeued for retry: ${evalItemId}, stage: ${stage}, remaining: ${newRetryCount}, delay: ${retryDelay}ms` ); } else { addLog.error( - `[Evaluation] Item failed permanently: ${evalItemId}, retriable: ${isRetriable}`, - error + `[Evaluation] Item failed permanently: ${evalItemId}, stage: ${stage}, retriable: ${isRetriable}`, + error instanceof EvaluationStageError ? error.originalError || error : error ); } }; @@ -316,16 +412,16 @@ const evaluationTaskProcessor = async (job: Job) => { }).lean(); if (dataItems.length === 0) { - throw new Error('Dataset loading failed'); + throw new Error(EvaluationErrEnum.evalDatasetLoadFailed); } // Validate target and evaluators configuration if (!evaluation.target || !evaluation.target.type || !evaluation.target.config) { - throw new Error('Target configuration invalid'); + throw new Error(EvaluationErrEnum.evalTargetConfigInvalid); } if (!evaluation.evaluators || evaluation.evaluators.length === 0) { - throw new Error('Evaluators configuration invalid'); + throw new Error(EvaluationErrEnum.evalEvaluatorsConfigInvalid); } // Check if evaluation items already exist (reentrant handling) @@ -368,7 +464,7 @@ const evaluationTaskProcessor = async (job: Job) => { target: evaluation.target, evaluator, status: EvaluationStatusEnum.queuing, - retry: 3 + retry: maxRetries }); } } @@ -421,7 +517,11 @@ const evaluationItemProcessor = async (job: Job) => { // Get evaluation item information const evalItem = await MongoEvalItem.findById(evalItemId); if (!evalItem) { - throw new Error('Evaluation item does not exist'); + throw new EvaluationStageError( + EvaluationStageEnum.ResourceCheck, + getErrText(EvaluationErrEnum.evalItemNotFound), + false // Resource not found errors are not retriable + ); } // Check if item is already completed (reentrant handling) @@ -433,29 +533,43 @@ const evaluationItemProcessor = async (job: Job) => { // Get evaluation information for AI Points check const evaluation = await MongoEvaluation.findById(evalId, 'teamId tmbId usageId'); if (!evaluation) { - throw new Error('Evaluation task does not exist'); + throw new EvaluationStageError( + EvaluationStageEnum.ResourceCheck, + getErrText(EvaluationErrEnum.evalTaskNotFound), + false // Resource not found errors are not retriable + ); } // Check AI Points - await checkTeamAIPoints(evaluation.teamId); + try { + await checkTeamAIPoints(evaluation.teamId); + } catch (error) { + throw new EvaluationStageError( + EvaluationStageEnum.ResourceCheck, + getErrText(error), + false // AI Point errors are not retriable + ); + } - let targetOutput = evalItem.targetOutput; - let evaluatorOutput = evalItem.evaluatorOutput; - let shouldExecuteTarget = true; - let shouldExecuteEvaluator = true; + // Initialize outputs - check for existing results first for resume capability + let targetOutput: any = undefined; + let evaluatorOutput: any = undefined; - // Resume from checkpoint if partially completed + // Resume from checkpoint only if in evaluating status if (evalItem.status === EvaluationStatusEnum.evaluating) { - if (evalItem.evaluatorOutput?.data?.score) { - addLog.debug(`[Evaluation] Item already completed evaluator: ${evalItemId}`); - return; // Already completed, nothing to do - } else if (evalItem.targetOutput?.actualOutput) { - addLog.debug(`[Evaluation] Resuming from evaluator: ${evalItemId}`); + if (evalItem.targetOutput?.actualOutput) { + addLog.debug(`[Evaluation] Resuming targetOutput from evalItem: ${evalItemId}`); targetOutput = evalItem.targetOutput; - shouldExecuteTarget = false; - } else { - addLog.debug(`[Evaluation] Restarting item: ${evalItemId}`); } + if (evalItem.evaluatorOutput?.data?.score) { + addLog.debug(`[Evaluation] Resuming evaluatorOutput from evalItem: ${evalItemId}`); + evaluatorOutput = evalItem.evaluatorOutput; + } + } else { + // For queuing or error status, always start from scratch + addLog.debug( + `[Evaluation] Starting/restarting item from scratch: ${evalItemId}, status: ${evalItem.status}` + ); } // Update status to processing @@ -465,59 +579,75 @@ const evaluationItemProcessor = async (job: Job) => { ); // 1. Call evaluation target (if not already done) - if (shouldExecuteTarget) { - const targetInstance = createTargetInstance(evalItem.target); - targetOutput = await targetInstance.execute({ - userInput: evalItem.dataItem.userInput, - context: evalItem.dataItem.context - }); + if (!targetOutput || !targetOutput.actualOutput) { + try { + const targetInstance = createTargetInstance(evalItem.target); + targetOutput = await targetInstance.execute({ + userInput: evalItem.dataItem.userInput, + context: evalItem.dataItem.context + }); - // Save target output as checkpoint - await MongoEvalItem.updateOne( - { _id: new Types.ObjectId(evalItemId) }, - { $set: { targetOutput: targetOutput } } - ); + // Save target output as checkpoint + await MongoEvalItem.updateOne( + { _id: new Types.ObjectId(evalItemId) }, + { $set: { targetOutput: targetOutput } } + ); - // Record usage from target call - if (targetOutput.usage) { - const totalPoints = targetOutput.usage.reduce( - (sum: number, item: any) => sum + (item.totalPoints || 0), - 0 + // Record usage from target call + if (targetOutput.usage) { + const totalPoints = targetOutput.usage.reduce( + (sum: number, item: any) => sum + (item.totalPoints || 0), + 0 + ); + await createMergedEvaluationUsage({ + evalId, + teamId: evaluation.teamId, + tmbId: evaluation.tmbId, + usageId: evaluation.usageId, + totalPoints, + type: 'target' + }); + } + } catch (error) { + // Normalize target execution error + const retriable = isTargetExecutionRetriable(error); + const errorMessage = getErrText(error) || 'Target execution failed'; + + throw new EvaluationStageError( + EvaluationStageEnum.TaskExecute, + errorMessage, + retriable, + error ); - await createMergedEvaluationUsage({ - evalId, - teamId: evaluation.teamId, - tmbId: evaluation.tmbId, - usageId: evaluation.usageId, - totalPoints, - type: 'target' - }); } } - // Ensure output is available for evaluator - if (!targetOutput) { - throw new Error('Target output is required for evaluation'); - } - // 2. Execute evaluator (if not already done) let totalMetricPoints = 0; - if (shouldExecuteEvaluator) { - const evaluatorInstance = createEvaluatorInstance(evalItem.evaluator); + if (!evaluatorOutput || !evaluatorOutput.data?.score) { + try { + const evaluatorInstance = createEvaluatorInstance(evalItem.evaluator); - evaluatorOutput = await evaluatorInstance.evaluate({ - userInput: evalItem.dataItem.userInput, - expectedOutput: evalItem.dataItem.expectedOutput, - actualOutput: targetOutput.actualOutput, - context: evalItem.dataItem.context, - retrievalContext: targetOutput.retrievalContext - }); - } - - // Ensure evaluatorOutput is available - if (!evaluatorOutput) { - throw new Error('Evaluator output is required for completion'); + evaluatorOutput = await evaluatorInstance.evaluate({ + userInput: evalItem.dataItem.userInput, + expectedOutput: evalItem.dataItem.expectedOutput, + actualOutput: targetOutput.actualOutput, + context: evalItem.dataItem.context, + retrievalContext: targetOutput.retrievalContext + }); + } catch (error) { + // Normalize evaluator execution error + const retriable = isEvaluatorExecutionRetriable(error); + const errorMessage = getErrText(error) || 'Evaluator execution failed'; + + throw new EvaluationStageError( + EvaluationStageEnum.EvaluatorExecute, + errorMessage, + retriable, + error + ); + } } // Record usage from metric evaluation @@ -554,12 +684,8 @@ const evaluationItemProcessor = async (job: Job) => { `[Evaluation] Evaluation item completed: ${evalItemId}, score: ${evaluatorOutput?.data?.score}` ); } catch (error) { + addLog.error(`[Evaluation] Evaluation item error: ${evalItemId}, error: ${error}`); await handleEvalItemError(evalItemId, evalId, error); - - // If AI Points insufficient, pause entire task - if (error === TeamErrEnum.aiPointsNotEnough) { - await handleAiPointsError(evalId, error); - } } // After try-catch, check if all evaluation items are completed diff --git a/packages/service/core/evaluation/utils/index.ts b/packages/service/core/evaluation/utils/index.ts new file mode 100644 index 000000000000..c3a32c74bc3c --- /dev/null +++ b/packages/service/core/evaluation/utils/index.ts @@ -0,0 +1,142 @@ +import { validateTargetConfig } from '../target'; +import type { EvalTarget } from '@fastgpt/global/core/evaluation/type'; +import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation'; + +export interface ValidationResult { + success: boolean; + message?: string; +} + +export interface EvaluationValidationParams { + name?: string; + description?: string; + datasetId?: string; + target?: EvalTarget; + evaluators?: any[]; +} + +export interface EvaluationValidationOptions { + mode?: 'create' | 'update'; // validation mode +} + +export async function validateEvaluationParams( + params: EvaluationValidationParams, + options?: EvaluationValidationOptions +): Promise { + const { name, description, datasetId, target, evaluators } = params; + const mode = options?.mode || 'create'; + const isCreateMode = mode === 'create'; + + // For create mode, check all required fields are present + if (isCreateMode) { + if (!name || !name.trim()) { + return { + success: false, + message: EvaluationErrEnum.evalNameRequired + }; + } + + if (!datasetId) { + return { + success: false, + message: EvaluationErrEnum.evalDatasetIdRequired + }; + } + + if (!target) { + return { + success: false, + message: EvaluationErrEnum.evalTargetRequired + }; + } + + if (!evaluators || !Array.isArray(evaluators) || evaluators.length === 0) { + return { + success: false, + message: EvaluationErrEnum.evalEvaluatorsRequired + }; + } + } + + // For update mode, only validate provided fields + if (name !== undefined) { + if (!name || !name.trim()) { + return { + success: false, + message: EvaluationErrEnum.evalNameRequired + }; + } + + if (name.length > 100) { + return { + success: false, + message: EvaluationErrEnum.evalNameTooLong + }; + } + } + + if (description !== undefined && description && description.length > 100) { + return { + success: false, + message: EvaluationErrEnum.evalDescriptionTooLong + }; + } + + if (datasetId !== undefined && !datasetId) { + return { + success: false, + message: EvaluationErrEnum.evalDatasetIdRequired + }; + } + + if (target !== undefined) { + if (!target) { + return { + success: false, + message: EvaluationErrEnum.evalTargetRequired + }; + } + + // Validate target configuration using validateTargetConfig + const targetValidation = await validateTargetConfig(target); + if (!targetValidation.success) { + return { + success: false, + message: EvaluationErrEnum.evalTargetInvalidConfig + }; + } + } + + if (evaluators !== undefined) { + if (!evaluators || !Array.isArray(evaluators) || evaluators.length === 0) { + return { + success: false, + message: EvaluationErrEnum.evalEvaluatorsRequired + }; + } + + // Validate evaluators configuration + for (const evaluator of evaluators) { + if (!evaluator.metric || !evaluator.metric._id || !evaluator.metric.type) { + return { + success: false, + message: EvaluationErrEnum.evalEvaluatorInvalidConfig + }; + } + } + } + + return { success: true }; +} + +export async function validateEvaluationParamsForCreate( + params: EvaluationValidationParams +): Promise { + return validateEvaluationParams(params, { mode: 'create' }); +} + +export async function validateEvaluationParamsForUpdate( + params: EvaluationValidationParams +): Promise { + return validateEvaluationParams(params, { mode: 'update' }); +} diff --git a/packages/service/core/evaluation/dataset/jobCleanup.ts b/packages/service/core/evaluation/utils/jobCleanup.ts similarity index 100% rename from packages/service/core/evaluation/dataset/jobCleanup.ts rename to packages/service/core/evaluation/utils/jobCleanup.ts diff --git a/packages/service/support/permission/evaluation/auth.ts b/packages/service/support/permission/evaluation/auth.ts index 58ab2178c0e7..2e028bb330c6 100644 --- a/packages/service/support/permission/evaluation/auth.ts +++ b/packages/service/support/permission/evaluation/auth.ts @@ -18,16 +18,7 @@ import { getFileById } from '../../../common/file/gridfs/controller'; import { BucketNameEnum } from '@fastgpt/global/common/file/constants'; import { CommonErrEnum } from '@fastgpt/global/common/error/code/common'; import { Permission } from '@fastgpt/global/support/permission/controller'; - -export const EvaluationAuthErrors = { - evaluationNotFound: 'Evaluation not found', - datasetNotFound: 'Evaluation dataset not found', - metricNotFound: 'Evaluation metric not found', - permissionDenied: 'Permission denied', - evaluationIdRequired: 'Evaluation ID is required', - datasetIdRequired: 'Evaluation dataset ID is required', - metricIdRequired: 'Evaluation metric ID is required' -} as const; +import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation'; // ================ Authentication and Authorization for eval task ================ export const authEvaluationByTmbId = async ({ @@ -45,7 +36,7 @@ export const authEvaluationByTmbId = async ({ const evaluation = await MongoEvaluation.findOne({ _id: evaluationId }).lean(); if (!evaluation) { - return Promise.reject(EvaluationAuthErrors.evaluationNotFound); + return Promise.reject(EvaluationErrEnum.evalTaskNotFound); } // Root用户权限特殊处理 @@ -60,7 +51,7 @@ export const authEvaluationByTmbId = async ({ // 团队权限验证 if (String(evaluation.teamId) !== teamId) { - return Promise.reject(EvaluationAuthErrors.evaluationNotFound); + return Promise.reject(EvaluationErrEnum.evalTaskNotFound); } // 所有者检查 @@ -85,7 +76,7 @@ export const authEvaluationByTmbId = async ({ // 权限验证 if (!Per.checkPer(per)) { - return Promise.reject(EvaluationAuthErrors.permissionDenied); + return Promise.reject(EvaluationErrEnum.evalInsufficientPermission); } return { @@ -115,7 +106,7 @@ export const authEvaluation = async ({ const { tmbId } = result; if (!evaluationId) { - return Promise.reject(EvaluationAuthErrors.evaluationIdRequired); + return Promise.reject(EvaluationErrEnum.evalTaskNotFound); } const { evaluation } = await authEvaluationByTmbId({ @@ -151,7 +142,7 @@ export const authEvalDatasetByTmbId = async ({ const dataset = await MongoEvalDatasetCollection.findOne({ _id: datasetId }).lean(); if (!dataset) { - return Promise.reject(EvaluationAuthErrors.datasetNotFound); + return Promise.reject(EvaluationErrEnum.evalDatasetCollectionNotFound); } // Root用户权限特殊处理 @@ -166,7 +157,7 @@ export const authEvalDatasetByTmbId = async ({ // 团队权限验证 if (String(dataset.teamId) !== teamId) { - return Promise.reject(EvaluationAuthErrors.datasetNotFound); + return Promise.reject(EvaluationErrEnum.evalDatasetCollectionNotFound); } // 所有者检查 @@ -191,7 +182,7 @@ export const authEvalDatasetByTmbId = async ({ // 权限验证 if (!Per.checkPer(per)) { - return Promise.reject(EvaluationAuthErrors.permissionDenied); + return Promise.reject(EvaluationErrEnum.evalInsufficientPermission); } return { @@ -221,7 +212,7 @@ export const authEvalDataset = async ({ const { tmbId } = result; if (!datasetId) { - return Promise.reject(EvaluationAuthErrors.datasetIdRequired); + return Promise.reject(EvaluationErrEnum.evalDatasetCollectionNotFound); } const { dataset } = await authEvalDatasetByTmbId({ @@ -295,7 +286,7 @@ export const authEvalMetricByTmbId = async ({ const metric = await MongoEvalMetric.findOne({ _id: metricId }).lean(); if (!metric) { - return Promise.reject(EvaluationAuthErrors.metricNotFound); + return Promise.reject(EvaluationErrEnum.evalMetricNotFound); } // Root用户权限特殊处理 @@ -310,7 +301,7 @@ export const authEvalMetricByTmbId = async ({ // 团队权限验证 if (String(metric.teamId) !== teamId) { - return Promise.reject(EvaluationAuthErrors.metricNotFound); + return Promise.reject(EvaluationErrEnum.evalMetricNotFound); } // 所有者检查 @@ -335,7 +326,7 @@ export const authEvalMetricByTmbId = async ({ // 权限验证 if (!Per.checkPer(per)) { - return Promise.reject(EvaluationAuthErrors.permissionDenied); + return Promise.reject(EvaluationErrEnum.evalInsufficientPermission); } return { @@ -365,7 +356,7 @@ export const authEvalMetric = async ({ const { tmbId } = result; if (!metricId) { - return Promise.reject(EvaluationAuthErrors.metricIdRequired); + return Promise.reject(EvaluationErrEnum.evalMetricNotFound); } const { metric } = await authEvalMetricByTmbId({ diff --git a/packages/service/type/env.d.ts b/packages/service/type/env.d.ts index e14df82b67f1..14e3f0b057f8 100644 --- a/packages/service/type/env.d.ts +++ b/packages/service/type/env.d.ts @@ -51,6 +51,7 @@ declare global { // evaluations settings EVAL_TASK_CONCURRENCY?: string; EVAL_ITEM_CONCURRENCY?: string; + EVAL_ITEM_MAX_RETRY?: string; // evalaution data settings EVAL_DATA_QUALITY_CONCURRENCY?: string; diff --git a/packages/web/i18n/en/evaluation.json b/packages/web/i18n/en/evaluation.json index 54fa72ee79cb..9d1398907340 100644 --- a/packages/web/i18n/en/evaluation.json +++ b/packages/web/i18n/en/evaluation.json @@ -1,4 +1,50 @@ { "dataset_collection_not_found": "Evaluation dataset collection not found", - "dataset_data_not_found": "Evaluation dataset data not found" + "dataset_data_not_found": "Evaluation dataset data not found", + "name_required": "Evaluation task name is required", + "name_too_long": "Evaluation task name is too long", + "description_too_long": "Evaluation task description is too long", + "dataset_id_required": "Dataset ID is required", + "target_required": "Evaluation target is required", + "target_invalid_config": "Evaluation target configuration is invalid", + "target_app_id_missing": "Application ID is missing", + "evaluators_required": "Evaluators are required", + "evaluator_invalid_config": "Evaluator configuration is invalid", + "collection_id_required": "Collection ID is required", + "user_input_required": "User input is required", + "expected_output_required": "Expected output is required", + "invalid_page_number": "Invalid page number", + "invalid_page_size": "Invalid page size", + "metric_name_required": "Metric name is required", + "metric_name_too_long": "Metric name is too long", + "metric_prompt_required": "Metric prompt is required", + "metric_prompt_too_long": "Metric prompt is too long", + "invalid_format": "Invalid format", + "count_must_be_positive": "Count must be positive", + "invalid_context": "Invalid context", + "invalid_retrieval_context": "Invalid retrieval context", + "insufficient_permission": "Insufficient permission", + "app_not_found": "Application not found", + "app_no_permission": "No permission for application", + "task_not_found": "Evaluation task not found", + "item_not_found": "Evaluation item not found", + "metric_not_found": "Metric not found", + "metric_builtin_cannot_modify": "Built-in metrics cannot be modified", + "metric_builtin_cannot_delete": "Built-in metrics cannot be deleted", + "invalid_status": "Invalid status", + "only_queuing_can_start": "Only queued evaluations can be started", + "only_running_can_stop": "Only running evaluations can be stopped", + "only_failed_can_retry": "Only failed evaluation items can be retried", + "item_no_error_to_retry": "Evaluation item has no error to retry", + "target_output_required": "Target output is required", + "evaluator_output_required": "Evaluator output is required", + "dataset_load_failed": "Dataset loading failed", + "target_config_invalid": "Target configuration is invalid", + "evaluators_config_invalid": "Evaluators configuration is invalid", + "unsupported_target_type": "Unsupported target type", + "app_version_not_found": "Application version not found", + "duplicate_dataset_name": "Duplicate dataset name", + "no_data_in_collections": "No data in collections", + "update_failed": "Update failed", + "lock_acquisition_failed": "Lock acquisition failed" } diff --git a/packages/web/i18n/zh-CN/evaluation.json b/packages/web/i18n/zh-CN/evaluation.json index cb31b9d299b8..bcac58d8502f 100644 --- a/packages/web/i18n/zh-CN/evaluation.json +++ b/packages/web/i18n/zh-CN/evaluation.json @@ -1,4 +1,50 @@ { "dataset_collection_not_found": "评估数据集未找到", - "dataset_data_not_found": "评估数据集数据未找到" + "dataset_data_not_found": "评估数据集数据未找到", + "name_required": "评估任务名称必填", + "name_too_long": "评估任务名称过长", + "description_too_long": "评估任务描述过长", + "dataset_id_required": "数据集ID必填", + "target_required": "评估目标必填", + "target_invalid_config": "评估目标配置无效", + "target_app_id_missing": "应用ID缺失", + "evaluators_required": "评估器必填", + "evaluator_invalid_config": "评估器配置无效", + "collection_id_required": "集合ID必填", + "user_input_required": "用户输入必填", + "expected_output_required": "预期输出必填", + "invalid_page_number": "无效的页码", + "invalid_page_size": "无效的页面大小", + "metric_name_required": "指标名称必填", + "metric_name_too_long": "指标名称过长", + "metric_prompt_required": "指标提示必填", + "metric_prompt_too_long": "指标提示过长", + "invalid_format": "无效的格式", + "count_must_be_positive": "数量必须为正数", + "invalid_context": "无效的上下文", + "invalid_retrieval_context": "无效的检索上下文", + "insufficient_permission": "权限不足", + "app_not_found": "应用未找到", + "app_no_permission": "应用无权限", + "task_not_found": "评估任务未找到", + "item_not_found": "评估项未找到", + "metric_not_found": "指标未找到", + "metric_builtin_cannot_modify": "内置指标不可修改", + "metric_builtin_cannot_delete": "内置指标不可删除", + "invalid_status": "无效的状态", + "only_queuing_can_start": "只有排队状态的评估可以启动", + "only_running_can_stop": "只有运行中的评估可以停止", + "only_failed_can_retry": "只有失败的评估项可以重试", + "item_no_error_to_retry": "评估项无错误可重试", + "target_output_required": "目标输出必填", + "evaluator_output_required": "评估器输出必填", + "dataset_load_failed": "数据集加载失败", + "target_config_invalid": "目标配置无效", + "evaluators_config_invalid": "评估器配置无效", + "unsupported_target_type": "不支持的目标类型", + "app_version_not_found": "应用版本未找到", + "duplicate_dataset_name": "数据集名称重复", + "no_data_in_collections": "集合中无数据", + "update_failed": "更新失败", + "lock_acquisition_failed": "锁获取失败" } diff --git a/packages/web/i18n/zh-Hant/evaluation.json b/packages/web/i18n/zh-Hant/evaluation.json index eb0787532d5e..35660f01ef19 100644 --- a/packages/web/i18n/zh-Hant/evaluation.json +++ b/packages/web/i18n/zh-Hant/evaluation.json @@ -1,4 +1,50 @@ { "dataset_collection_not_found": "評估數據集未找到", - "dataset_data_not_found": "評估數據集數據未找到" + "dataset_data_not_found": "評估數據集數據未找到", + "name_required": "評估任務名稱必填", + "name_too_long": "評估任務名稱過長", + "description_too_long": "評估任務描述過長", + "dataset_id_required": "數據集ID必填", + "target_required": "評估目標必填", + "target_invalid_config": "評估目標配置無效", + "target_app_id_missing": "應用ID缺失", + "evaluators_required": "評估器必填", + "evaluator_invalid_config": "評估器配置無效", + "collection_id_required": "集合ID必填", + "user_input_required": "用戶輸入必填", + "expected_output_required": "預期輸出必填", + "invalid_page_number": "無效的頁碼", + "invalid_page_size": "無效的頁面大小", + "metric_name_required": "指標名稱必填", + "metric_name_too_long": "指標名稱過長", + "metric_prompt_required": "指標提示必填", + "metric_prompt_too_long": "指標提示過長", + "invalid_format": "無效的格式", + "count_must_be_positive": "數量必須為正數", + "invalid_context": "無效的上下文", + "invalid_retrieval_context": "無效的檢索上下文", + "insufficient_permission": "權限不足", + "app_not_found": "應用未找到", + "app_no_permission": "應用無權限", + "task_not_found": "評估任務未找到", + "item_not_found": "評估項未找到", + "metric_not_found": "指標未找到", + "metric_builtin_cannot_modify": "內置指標不可修改", + "metric_builtin_cannot_delete": "內置指標不可刪除", + "invalid_status": "無效的狀態", + "only_queuing_can_start": "只有排隊狀態的評估可以啟動", + "only_running_can_stop": "只有運行中的評估可以停止", + "only_failed_can_retry": "只有失敗的評估項可以重試", + "item_no_error_to_retry": "評估項無錯誤可重試", + "target_output_required": "目標輸出必填", + "evaluator_output_required": "評估器輸出必填", + "dataset_load_failed": "數據集加載失敗", + "target_config_invalid": "目標配置無效", + "evaluators_config_invalid": "評估器配置無效", + "unsupported_target_type": "不支持的目標類型", + "app_version_not_found": "應用版本未找到", + "duplicate_dataset_name": "數據集名稱重複", + "no_data_in_collections": "集合中無數據", + "update_failed": "更新失敗", + "lock_acquisition_failed": "鎖獲取失敗" } diff --git a/projects/app/.env.template b/projects/app/.env.template index fce436097fad..2cf0c9e9457b 100644 --- a/projects/app/.env.template +++ b/projects/app/.env.template @@ -105,6 +105,7 @@ SIGNOZ_STORE_LEVEL=warn # evaluations settings EVAL_TASK_CONCURRENCY=3 # the number of concurrent evaluations tasks EVAL_ITEM_CONCURRENCY=10 # the number of concurrent evaluation itmes per evaluations task +EVAL_ITEM_MAX_RETRY=3 # the max retry times for evaluation itmes per evaluations task EVAL_DATA_QUALITY_CONCURRENCY=2 EVAL_DATASET_DATA_SYNTHESIZE_CONCURRENCY=5 EVAL_DATASET_SMART_GENERATE_CONCURRENCY=2 \ No newline at end of file diff --git a/projects/app/src/pages/api/core/evaluation/task/create.ts b/projects/app/src/pages/api/core/evaluation/task/create.ts index 496467f9720c..a59bed2a9e8c 100644 --- a/projects/app/src/pages/api/core/evaluation/task/create.ts +++ b/projects/app/src/pages/api/core/evaluation/task/create.ts @@ -6,8 +6,7 @@ import type { CreateEvaluationRequest, CreateEvaluationResponse } from '@fastgpt/global/core/evaluation/api'; -import { validateTargetConfig } from '@fastgpt/service/core/evaluation/target'; -import { validateEvaluationParams } from '@fastgpt/global/core/evaluation/utils'; +import { validateEvaluationParamsForCreate } from '@fastgpt/service/core/evaluation/utils'; import { authEvaluationTaskCreate } from '@fastgpt/service/core/evaluation/common'; import { addAuditLog } from '@fastgpt/service/support/user/audit/util'; import { AuditEventEnum } from '@fastgpt/global/support/user/audit/constants'; @@ -18,35 +17,18 @@ async function handler( try { const { name, description, datasetId, target, evaluators } = req.body; - // Validate name and description - const paramValidation = validateEvaluationParams( - { name, description }, - { namePrefix: 'Evaluation' } - ); + // Validate all evaluation parameters (includes target validation) + const paramValidation = await validateEvaluationParamsForCreate({ + name, + description, + datasetId, + target, + evaluators + }); if (!paramValidation.success) { return Promise.reject(paramValidation.message); } - const targetValidation = await validateTargetConfig(target as EvalTarget); - if (!targetValidation.success) { - return Promise.reject(`Target validation failed: ${targetValidation.message}`); - } - - if (!datasetId) { - return Promise.reject('Dataset ID is required'); - } - - if (!evaluators || !Array.isArray(evaluators) || evaluators.length === 0) { - return Promise.reject('At least one evaluator is required'); - } - - // Validate evaluators configuration - for (const evaluator of evaluators) { - if (!evaluator.metric || !evaluator.metric._id || !evaluator.metric.type) { - return Promise.reject('Each evaluator must contain a valid metric configuration'); - } - } - const { teamId, tmbId } = await authEvaluationTaskCreate(target as EvalTarget, { req, authApiKey: true, @@ -71,8 +53,8 @@ async function handler( params: { taskName: evaluation.name, datasetId, - targetType: target.type, - evaluatorCount: evaluators.length + targetType: evaluation.target.type, + evaluatorCount: evaluation.evaluators.length } }); })(); diff --git a/projects/app/src/pages/api/core/evaluation/task/item/update.ts b/projects/app/src/pages/api/core/evaluation/task/item/update.ts index 01f7abf78f4b..2d9cccdc9322 100644 --- a/projects/app/src/pages/api/core/evaluation/task/item/update.ts +++ b/projects/app/src/pages/api/core/evaluation/task/item/update.ts @@ -16,7 +16,7 @@ async function handler( req: ApiRequestProps ): Promise { try { - const { evalItemId, userInput, expectedOutput, variables } = req.body; + const { evalItemId, userInput, expectedOutput } = req.body; const { evaluation, evaluationItem, teamId, tmbId } = await authEvaluationItemWrite( evalItemId, diff --git a/projects/app/src/pages/api/core/evaluation/task/update.ts b/projects/app/src/pages/api/core/evaluation/task/update.ts index d9a56d113014..977bb6bb2e01 100644 --- a/projects/app/src/pages/api/core/evaluation/task/update.ts +++ b/projects/app/src/pages/api/core/evaluation/task/update.ts @@ -5,7 +5,7 @@ import type { UpdateEvaluationResponse, UpdateEvaluationRequest } from '@fastgpt/global/core/evaluation/api'; -import { validateEvaluationParams } from '@fastgpt/global/core/evaluation/utils'; +import { validateEvaluationParamsForUpdate } from '@fastgpt/service/core/evaluation/utils'; import { authEvaluationTaskWrite } from '@fastgpt/service/core/evaluation/common'; import { addAuditLog } from '@fastgpt/service/support/user/audit/util'; import { AuditEventEnum } from '@fastgpt/global/support/user/audit/constants'; @@ -14,17 +14,20 @@ async function handler( req: ApiRequestProps ): Promise { try { - const { evalId, name, description } = req.body; + const { evalId, name, description, datasetId, target, evaluators } = req.body; if (!evalId) { return Promise.reject('Evaluation ID is required'); } - // Validate name and description with common validation utility - const paramValidation = validateEvaluationParams( - { name, description }, - { namePrefix: 'Evaluation' } - ); + // Validate all evaluation parameters with common validation utility + const paramValidation = await validateEvaluationParamsForUpdate({ + name, + description, + datasetId, + target, + evaluators + }); if (!paramValidation.success) { return Promise.reject(paramValidation.message); } @@ -41,7 +44,10 @@ async function handler( evalId, { ...(name !== undefined && { name: name.trim() }), - ...(description !== undefined && { description: description?.trim() }) + ...(description !== undefined && { description: description?.trim() }), + ...(datasetId !== undefined && { datasetId }), + ...(target !== undefined && { target }), + ...(evaluators !== undefined && { evaluators }) }, teamId ); diff --git a/test/cases/pages/api/core/evaluation/task/create.test.ts b/test/cases/pages/api/core/evaluation/task/create.test.ts index 9cb3a3ae7258..1b045e87a65a 100644 --- a/test/cases/pages/api/core/evaluation/task/create.test.ts +++ b/test/cases/pages/api/core/evaluation/task/create.test.ts @@ -50,8 +50,21 @@ describe('Create Evaluation Task API Handler', () => { name: 'Test Evaluation', description: 'Test Description', datasetId: new Types.ObjectId(), - targetId: new Types.ObjectId(), - metricIds: [new Types.ObjectId(), new Types.ObjectId()], + target: { + type: 'workflow', + config: { + appId: new Types.ObjectId().toString() + } + }, + evaluators: [ + { + metric: { + _id: new Types.ObjectId().toString(), + name: 'Test Metric', + type: 'ai_model' + } + } + ], usageId: new Types.ObjectId(), status: EvaluationStatusEnum.queuing, teamId: new Types.ObjectId(), @@ -157,7 +170,7 @@ describe('Create Evaluation Task API Handler', () => { } } as any; - await expect(createHandler(mockReq)).rejects.toMatch('Evaluation name is required'); + await expect(createHandler(mockReq)).rejects.toMatch('evaluationNameRequired'); }); test('应该拒绝空指标列表', async () => { @@ -180,7 +193,7 @@ describe('Create Evaluation Task API Handler', () => { // 需要mock validateTargetConfig返回成功 (validateTargetConfig as any).mockResolvedValue({ success: true, message: 'Valid' }); - await expect(createHandler(mockReq)).rejects.toMatch('At least one evaluator is required'); + await expect(createHandler(mockReq)).rejects.toMatch('evaluationEvaluatorsRequired'); }); test('应该拒绝缺少必填字段', async () => { @@ -188,17 +201,17 @@ describe('Create Evaluation Task API Handler', () => { method: 'POST', body: { name: 'Test Evaluation' - // 缺少 datasetId, target, metricIds + // 缺少 datasetId, target, evaluators } } as any; // 重置mock为默认的失败状态 - (validateTargetConfig as any).mockResolvedValue({ - success: false, - message: 'Target validation failed' - }); + // (validateTargetConfig as any).mockResolvedValue({ + // success: false, + // message: 'Target validation failed' + // }); - // 由于没有target字段,target验证会先失败 - await expect(createHandler(mockReq)).rejects.toMatch('Target validation failed'); + // datasetId 验证会先失败 + await expect(createHandler(mockReq)).rejects.toMatch('evaluationDatasetIdRequired'); }); }); diff --git a/test/cases/service/core/evaluation/dataset/jobCleanup.test.ts b/test/cases/service/core/evaluation/dataset/jobCleanup.test.ts index 54159b87477d..7aa91c5a91e9 100644 --- a/test/cases/service/core/evaluation/dataset/jobCleanup.test.ts +++ b/test/cases/service/core/evaluation/dataset/jobCleanup.test.ts @@ -1,11 +1,11 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; -import type { Queue} from 'bullmq'; +import type { Queue } from 'bullmq'; import { Job } from 'bullmq'; import { RobustJobCleaner, createJobCleaner, type JobCleanupOptions -} from '@fastgpt/service/core/evaluation/dataset/jobCleanup'; +} from '@fastgpt/service/core/evaluation/utils/jobCleanup'; // Mock dependencies vi.mock('@fastgpt/service/common/system/log', () => ({ diff --git a/test/cases/service/core/evaluation/task.test.ts b/test/cases/service/core/evaluation/task.test.ts index 9798bf6f7884..0c17db756539 100644 --- a/test/cases/service/core/evaluation/task.test.ts +++ b/test/cases/service/core/evaluation/task.test.ts @@ -18,6 +18,7 @@ import { } from '@fastgpt/global/core/evaluation/constants'; import { Types } from '@fastgpt/service/common/mongo'; import { TeamErrEnum } from '@fastgpt/global/common/error/code/team'; +import { getErrText } from '@fastgpt/global/common/error/utils'; // Mock dependencies vi.mock('@fastgpt/service/core/evaluation/task/mq', () => ({ @@ -28,8 +29,27 @@ vi.mock('@fastgpt/service/core/evaluation/task/mq', () => ({ add: vi.fn(), addBulk: vi.fn() }, - removeEvaluationTaskJob: vi.fn().mockResolvedValue(undefined), - removeEvaluationItemJobs: vi.fn().mockResolvedValue(undefined), + removeEvaluationTaskJob: vi.fn().mockResolvedValue({ + queue: 'evalTask', + totalJobs: 0, + removedJobs: 0, + failedRemovals: 0, + errors: [] + }), + removeEvaluationItemJobs: vi.fn().mockResolvedValue({ + queue: 'evalTaskItem', + totalJobs: 0, + removedJobs: 0, + failedRemovals: 0, + errors: [] + }), + removeEvaluationItemJobsByItemId: vi.fn().mockResolvedValue({ + queue: 'evalTaskItem', + totalJobs: 0, + removedJobs: 0, + failedRemovals: 0, + errors: [] + }), getEvaluationTaskWorker: vi.fn(), getEvaluationItemWorker: vi.fn() })); @@ -39,14 +59,14 @@ vi.mock('@fastgpt/service/support/wallet/usage/controller', () => ({ concatUsage: vi.fn() })); -vi.mock('@fastgpt/service/common/system/log', () => ({ - addLog: { - info: vi.fn(), - warn: vi.fn(), - error: vi.fn(), - debug: vi.fn() - } -})); +// vi.mock('@fastgpt/service/common/system/log', () => ({ +// addLog: { +// info: vi.fn(), +// warn: vi.fn(), +// error: vi.fn(), +// debug: vi.fn() +// } +// })); vi.mock('@fastgpt/service/support/permission/controller', () => ({ parseHeaderCert: vi.fn() @@ -264,7 +284,7 @@ describe('EvaluationTaskService', () => { const nonExistentId = new Types.ObjectId().toString(); await expect(EvaluationTaskService.getEvaluation(nonExistentId, teamId)).rejects.toThrow( - 'Evaluation not found' + 'evaluationTaskNotFound' ); }); }); @@ -397,7 +417,7 @@ describe('EvaluationTaskService', () => { ); await expect(EvaluationTaskService.startEvaluation(created._id, teamId)).rejects.toThrow( - 'Only queuing evaluations can be started' + 'evaluationOnlyQueuingCanStart' ); }); }); @@ -632,7 +652,7 @@ describe('EvaluationTaskService', () => { await expect( EvaluationTaskService.getEvaluationItem(nonExistentId, teamId) - ).rejects.toThrow('Evaluation item not found'); + ).rejects.toThrow('evaluationItemNotFound'); }); }); @@ -751,7 +771,7 @@ describe('EvaluationTaskService', () => { const itemId = item._id.toString(); await expect(EvaluationTaskService.retryEvaluationItem(itemId, teamId)).rejects.toThrow( - 'Only failed evaluation items can be retried' + 'evaluationOnlyFailedCanRetry' ); }); }); @@ -785,7 +805,7 @@ describe('EvaluationTaskService', () => { await EvaluationTaskService.deleteEvaluationItem(itemId, teamId); await expect(EvaluationTaskService.getEvaluationItem(itemId, teamId)).rejects.toThrow( - 'Evaluation item not found' + 'evaluationItemNotFound' ); }); }); @@ -1202,7 +1222,7 @@ describe('EvaluationTaskService', () => { // 验证评估任务被删除 await expect(EvaluationTaskService.getEvaluation(testEvaluationId, teamId)).rejects.toThrow( - 'Evaluation not found' + 'evaluationTaskNotFound' ); // 验证所有评估项被删除 @@ -1496,7 +1516,7 @@ describe('EvaluationTaskService', () => { // 验证评估项被重新入队 expect(evaluationItemQueue.add).toHaveBeenCalledWith( - expect.stringContaining('eval_item_retry_'), + expect.stringContaining(`eval_item_${evalItem._id.toString()}_retry`), { evalId: testEvaluationId.toString(), evalItemId: evalItem._id.toString() @@ -1613,7 +1633,7 @@ describe('EvaluationTaskService', () => { expect(updatedItem?.errorMessage).toContain('TIMEOUT'); }); - test('AI积分不足应该暂停整个任务', async () => { + test('AI积分不足应该暂停整个任务项', async () => { const { evaluationItemProcessor } = await import( '@fastgpt/service/core/evaluation/task/processor' ); @@ -1652,16 +1672,13 @@ describe('EvaluationTaskService', () => { await evaluationItemProcessor(mockJob); - // 验证任务被暂停 + // 验证任务被执行完成, 任务项被暂停(error) const updatedEvaluation = await MongoEvaluation.findById(testEvaluationId); - expect(updatedEvaluation?.status).toBe(EvaluationStatusEnum.error); - expect(updatedEvaluation?.errorMessage).toContain('AI Points balance insufficient'); - - // 验证警告日志 - expect(addLog.warn).toHaveBeenCalledWith( - expect.stringContaining( - `AI Points insufficient, evaluation task paused: ${testEvaluationId}` - ) + const updatedEvaluationItem = await MongoEvalItem.findById(evalItem._id); + expect(updatedEvaluation?.status).toBe(EvaluationStatusEnum.completed); + expect(updatedEvaluationItem?.status).toBe(EvaluationStatusEnum.error); + expect(updatedEvaluationItem?.errorMessage).toBe( + '[ResourceCheck] ' + getErrText(TeamErrEnum.aiPointsNotEnough) ); }); @@ -1841,6 +1858,7 @@ describe('EvaluationTaskService', () => { const testEvaluationId = new Types.ObjectId(); const evalItem = await MongoEvalItem.create({ + _id: '6666666c506834bfaa7a3a0d', evalId: testEvaluationId, dataItem: { userInput: 'Error cleanup test', expectedOutput: 'Expected' }, target, @@ -1862,11 +1880,19 @@ describe('EvaluationTaskService', () => { status: EvaluationStatusEnum.evaluating }); + // Reset AI Points check to pass normally + (checkTeamAIPoints as any).mockResolvedValue(undefined); + const mockTargetInstance = { execute: vi.fn().mockRejectedValue(new Error('NETWORK_ERROR: Cleanup test')) }; (createTargetInstance as any).mockReturnValue(mockTargetInstance); + const mockEvaluatorInstance = { + evaluate: vi.fn().mockRejectedValue(new Error('NETWORK_ERROR: Cleanup test')) + }; + (createEvaluatorInstance as any).mockReturnValue(mockEvaluatorInstance); + const itemJobData: EvaluationItemJobData = { evalId: testEvaluationId.toString(), evalItemId: evalItem._id.toString() @@ -1878,8 +1904,6 @@ describe('EvaluationTaskService', () => { // 验证部分结果被清理 const updatedItem = await MongoEvalItem.findById(evalItem._id); - expect(updatedItem?.targetOutput).toBeNull(); - expect(updatedItem?.evaluatorOutput).toBeNull(); expect(updatedItem?.status).toBe(EvaluationStatusEnum.queuing); expect(updatedItem?.retry).toBe(2); });