diff --git a/.github/workflows/cd-dev.yml b/.github/workflows/cd-dev.yml new file mode 100644 index 00000000..731ac0f3 --- /dev/null +++ b/.github/workflows/cd-dev.yml @@ -0,0 +1,64 @@ +name: Deploy Kaapi Dev to EC2 + +on: + push: + branches: + - dev + +jobs: + deploy: + runs-on: ubuntu-latest + environment: AWS_ENV + + permissions: + packages: write + contents: read + attestations: write + id-token: write + + steps: + - name: Checkout Repository + uses: actions/checkout@v6 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v6 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + aws-region: ${{ secrets.AWS_REGION }} + + - name: Deploy via SSM + id: ssm + env: + BUILD_DIRECTORY: ${{ secrets.DEV_BUILD_DIRECTORY }} + APP_NAME: ${{ secrets.DEV_PM2_APP_NAME }} + AWS_REGION: ${{ secrets.AWS_REGION }} + INSTANCE_ID: ${{ secrets.EC2_STAGING_INSTANCE_ID }} + ROOT_USER: ${{ secrets.USER }} + run: | + REMOTE_CMD="export HOME=/home/$ROOT_USER && export NVM_DIR="/home/$ROOT_USER/.nvm" && [ -s "\$NVM_DIR/nvm.sh" ] && \. "\$NVM_DIR/nvm.sh" && git config --global --add safe.directory ${BUILD_DIRECTORY} && set -e && cd ${BUILD_DIRECTORY} && git pull origin dev && npm ci && npm run build && sudo -iu ${ROOT_USER} pm2 restart ${APP_NAME}" + CMD_ID=$(aws ssm send-command \ + --instance-ids "$INSTANCE_ID" \ + --document-name "AWS-RunShellScript" \ + --parameters commands="[\"$REMOTE_CMD\"]" \ + --region "$AWS_REGION" \ + --query 'Command.CommandId' \ + --output text) + echo "cmd_id=$CMD_ID" >> "$GITHUB_OUTPUT" + + - name: Wait for SSM command to finish + env: + INSTANCE_ID: ${{ secrets.EC2_STAGING_INSTANCE_ID }} + CMD_ID: ${{ steps.ssm.outputs.cmd_id }} + run: | + WAIT_EXIT=0 + aws ssm wait command-executed \ + --command-id "$CMD_ID" \ + --instance-id "$INSTANCE_ID" || WAIT_EXIT=$? + + aws ssm get-command-invocation \ + --command-id "$CMD_ID" \ + --instance-id "$INSTANCE_ID" \ + --query '{Status:Status,Stdout:StandardOutputContent,Stderr:StandardErrorContent}' \ + --output json + + exit $WAIT_EXIT diff --git a/README.md b/README.md index 6911dbef..87e36f71 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ This is a thin frontend UI for [Kaapi backend](https://github.com/ProjectTech4De - [Software Dependencies](#software-dependencies) - [npm (via asdf)](#npm-via-asdf) - [Kaapi Backend](#kaapi-backend) + - [Kaapi Guardrails Service](#kaapi-guardrails-service) - [Clone Frontend Repo](#clone-frontend-repo) - [Git commands to clone](#git-commands-to-clone) - [Installation](#installation) @@ -52,11 +53,11 @@ You need to set up the [Kaapi backend](https://github.com/ProjectTech4DevAI/kaap > πŸ’‘ Note: Ensure the backend is running and accessible before starting the frontend. -### Kaapi Guardrails Service _(coming soon)_ +### Kaapi Guardrails Service -The Guardrails UI (currently in development) will require the Kaapi Guardrails service to be running alongside the backend. Setup instructions will be added here once the service is available. +You need to set up the [Kaapi Guardrails](https://github.com/ProjectTech4DevAI/kaapi-guardrails) service and follow the instructions there. -> 🚧 No action needed for now β€” this is a placeholder for when the Guardrails feature lands in `main`. +> πŸ’‘ Note: The Guardrails service must be running and accessible whenever you use the Guardrails module in the frontend. --- @@ -149,14 +150,14 @@ Deployments are automated via a GitHub Actions CD pipeline that SSHes into the E 1. SSHes into the EC2 instance 2. Runs `git pull` to fetch the latest code 3. Runs `npm run build` to create an optimized production build -4. Restarts the server to apply the new build +4. Restarts the pm2 server to apply the new build **Production** β€” on every version tag (e.g. `v1.0.0`, `v2.1.0`), the pipeline automatically: 1. SSHes into the EC2 instance 2. Runs `git fetch --tags` and checks out the tag 3. Runs `npm run build` to create an optimized production build -4. Restarts the server to apply the new build +4. Restarts the pm2 server to apply the new build --- @@ -169,4 +170,4 @@ Deployments are automated via a GitHub Actions CD pipeline that SSHes into the E ### Chat With Us -- πŸ’¬ [Discord](https://discord.gg/BRYzSYha) +- πŸ’¬ [Discord](https://discord.gg/s7e2UBFku) diff --git a/app/(main)/assessment/page.tsx b/app/(main)/assessment/page.tsx new file mode 100644 index 00000000..c0993354 --- /dev/null +++ b/app/(main)/assessment/page.tsx @@ -0,0 +1,334 @@ +"use client"; + +import { Suspense, useCallback, useMemo, useRef, useState } from "react"; +import { useRouter } from "next/navigation"; +import Loader from "@/app/components/Loader"; +import { useToast } from "@/app/components/Toast"; +import { apiFetch } from "@/app/lib/apiClient"; +import { handleForbiddenError } from "@/app/lib/utils/assessment"; +import { FeatureFlag } from "@/app/lib/constants"; +import { removeFeatureFromClient } from "@/app/lib/utils/features"; +import { useAuth } from "@/app/lib/context/AuthContext"; +import { useAssessmentDatasetStore } from "@/app/lib/store/assessment"; +import type { + AssessmentFormState, + AssessmentTab, + AssessmentTabId, + ConfigSelection, + SchemaProperty, +} from "@/app/lib/types/assessment"; +import PageLayout from "@/app/components/assessment/PageLayout"; + +function schemaToJsonSchema(properties: SchemaProperty[]): object | null { + if (properties.length === 0) return null; + + const props: Record = {}; + const required: string[] = []; + + properties.forEach((property) => { + if (!property.name.trim()) return; + + let definition: object; + if (property.type === "object") { + definition = schemaToJsonSchema(property.children) || { type: "object" }; + } else if (property.type === "enum") { + definition = { + type: "string", + enum: property.enumValues.filter((value) => value.trim()), + }; + } else { + definition = { type: property.type }; + } + + if (property.isArray) { + definition = { type: "array", items: definition }; + } + + props[property.name] = definition; + if (property.isRequired) { + required.push(property.name); + } + }); + + if (Object.keys(props).length === 0) return null; + + return { + type: "object", + properties: props, + ...(required.length > 0 ? { required } : {}), + }; +} + +const PAGE_TABS: ReadonlyArray = [ + { id: "datasets", label: "Datasets" }, + { id: "config", label: "Config" }, + { id: "results", label: "Result" }, +]; + +function PageContent() { + const router = useRouter(); + const toast = useToast(); + const { activeKey } = useAuth(); + const [activeTab, setActiveTab] = useState("datasets"); + const [configStep, setConfigStep] = useState(1); + const [completedConfigSteps, setCompletedConfigSteps] = useState>( + new Set(), + ); + const [isSubmitting, setIsSubmitting] = useState(false); + const [experimentName, setExperimentName] = useState(""); + const featureRedirectingRef = useRef(false); + const { + datasetId, + datasetName, + columns, + sampleRow, + columnMapping, + setDatasetId, + setDatasetName, + setDataset, + setColumnMapping, + clearDataset, + } = useAssessmentDatasetStore(); + const [promptTemplate, setPromptTemplate] = useState(""); + const [systemInstruction, setSystemInstruction] = useState(""); + const [outputSchema, setOutputSchema] = useState([]); + const [configs, setConfigs] = useState([]); + + const handleForbidden = useCallback( + (options?: { notify?: boolean }) => { + if (featureRedirectingRef.current) return; + featureRedirectingRef.current = true; + + if (options?.notify) { + toast.error( + "Assessment feature is disabled for this organization/project.", + ); + } + + removeFeatureFromClient(FeatureFlag.ASSESSMENT); + if ( + typeof window !== "undefined" && + window.location.pathname !== "/evaluations" + ) { + router.replace("/"); + } + }, + [router, toast], + ); + + const handleForbiddenWithNotify = useCallback(() => { + handleForbidden({ notify: true }); + }, [handleForbidden]); + + const markConfigCompleted = useCallback((step: number) => { + setCompletedConfigSteps((prev) => new Set([...prev, step])); + }, []); + + const handleConfigNext = useCallback( + (fromStep: number) => { + markConfigCompleted(fromStep); + setConfigStep(fromStep + 1); + }, + [markConfigCompleted], + ); + + const handleColumnsLoaded = useCallback( + (loadedColumns: string[], firstRow: Record = {}) => { + const currentId = useAssessmentDatasetStore.getState().datasetId; + setDataset(currentId, loadedColumns, firstRow); + setPromptTemplate(""); + }, + [setDataset], + ); + + const outputSchemaJson = useMemo( + () => schemaToJsonSchema(outputSchema), + [outputSchema], + ); + + const handleSubmit = useCallback(async () => { + if (!datasetId) { + toast.error("Dataset is required"); + return; + } + if (columnMapping.textColumns.length === 0) { + toast.error("Map at least one text column"); + return; + } + if (!promptTemplate.trim()) { + toast.error("Prompt is required"); + return; + } + if (!outputSchema.some((field) => field.name.trim())) { + toast.error("Response format is required"); + return; + } + if (configs.length === 0) { + toast.error("Select at least one configuration"); + return; + } + if (!experimentName.trim()) { + toast.error("Experiment name is required"); + return; + } + + setIsSubmitting(true); + try { + await apiFetch("/api/assessment/runs", activeKey?.key ?? "", { + method: "POST", + body: JSON.stringify({ + experiment_name: experimentName.trim(), + dataset_id: parseInt(datasetId, 10), + prompt_template: promptTemplate || null, + system_instruction: systemInstruction.trim() || null, + text_columns: columnMapping.textColumns, + attachments: columnMapping.attachments.map( + ({ column, type, format }) => ({ column, type, format }), + ), + output_schema: outputSchemaJson, + configs: configs.map(({ config_id, config_version }) => ({ + config_id, + config_version, + })), + }), + }); + + toast.success("Assessment submitted!"); + setConfigStep(1); + setCompletedConfigSteps(new Set()); + setExperimentName(""); + clearDataset(); + setSystemInstruction(""); + setPromptTemplate(""); + setOutputSchema([]); + setConfigs([]); + setActiveTab("results"); + } catch (error) { + if (handleForbiddenError(error, handleForbiddenWithNotify)) return; + toast.error( + `Failed to submit: ${error instanceof Error ? error.message : "Unknown error"}`, + ); + } finally { + setIsSubmitting(false); + } + }, [ + clearDataset, + columnMapping, + configs, + datasetId, + experimentName, + handleForbiddenWithNotify, + outputSchema, + outputSchemaJson, + promptTemplate, + activeKey, + systemInstruction, + toast, + ]); + + const formState: AssessmentFormState = { + experimentName, + datasetId, + datasetName, + columns, + sampleRow, + columnMapping, + systemInstruction, + promptTemplate, + outputSchema, + configs, + }; + + const hasDataset = !!datasetId && columns.length > 0; + const hasMapperSelection = columnMapping.textColumns.length > 0; + const hasPromptTemplate = promptTemplate.trim().length > 0; + const hasConfiguredResponseFormat = outputSchema.some((field) => + field.name.trim(), + ); + const canReachReview = + hasPromptTemplate && configs.length > 0 && hasConfiguredResponseFormat; + const canSubmitAssessment = + !!datasetId && + hasMapperSelection && + hasPromptTemplate && + hasConfiguredResponseFormat && + configs.length > 0 && + experimentName.trim().length > 0 && + !isSubmitting; + const submitBlockerMessage = !datasetId + ? "Select a dataset to submit" + : !hasMapperSelection + ? "Map at least one text column to submit" + : !hasPromptTemplate + ? "Write a prompt to submit" + : !hasConfiguredResponseFormat + ? "Set response format to submit" + : configs.length === 0 + ? "Select at least one configuration to submit" + : !experimentName.trim() + ? "Enter an experiment name to submit" + : ""; + const effectiveCompletedConfigSteps = useMemo(() => { + const merged = new Set(completedConfigSteps); + if (hasMapperSelection) merged.add(1); + if (canReachReview) merged.add(2); + return merged; + }, [canReachReview, completedConfigSteps, hasMapperSelection]); + + return ( + { + setActiveTab("config"); + setConfigStep(1); + }, + }} + configPanelProps={{ + canSubmitAssessment, + columns, + columnMapping, + completedSteps: effectiveCompletedConfigSteps, + configStep, + configs, + experimentName, + formState, + hasDataset, + isSubmitting, + outputSchema, + systemInstruction, + promptTemplate, + sampleRow, + setActiveTabToDatasets: () => setActiveTab("datasets"), + setColumnMapping, + setConfigStep, + setConfigs, + setExperimentName, + setOutputSchema, + setSystemInstruction, + setPromptTemplate, + submitBlockerMessage, + onSubmit: handleSubmit, + onStepComplete: handleConfigNext, + }} + evaluationsTabProps={{ + onForbidden: handleForbiddenWithNotify, + }} + /> + ); +} + +export default function Page() { + return ( + }> + + + ); +} diff --git a/app/(main)/configurations/page.tsx b/app/(main)/configurations/page.tsx index a36ab484..5f143ed0 100644 --- a/app/(main)/configurations/page.tsx +++ b/app/(main)/configurations/page.tsx @@ -24,7 +24,7 @@ import { configState, pendingVersionLoads, pendingSingleVersionLoads, -} from "@/app/lib/store/configStore"; +} from "@/app/lib/store/config"; import { flattenConfigVersion } from "@/app/lib/utils"; import { SearchIcon, diff --git a/app/(main)/configurations/prompt-editor/page.tsx b/app/(main)/configurations/prompt-editor/page.tsx index 19677150..c8aedcc7 100644 --- a/app/(main)/configurations/prompt-editor/page.tsx +++ b/app/(main)/configurations/prompt-editor/page.tsx @@ -27,7 +27,7 @@ import { ConfigVersionItems, } from "@/app/lib/types/configs"; import { invalidateConfigCache } from "@/app/lib/utils"; -import { configState } from "@/app/lib/store/configStore"; +import { configState } from "@/app/lib/store/config"; import { apiFetch } from "@/app/lib/apiClient"; import { isGpt5Model } from "@/app/lib/models"; import { DEFAULT_CONFIG } from "@/app/lib/constants"; diff --git a/app/(main)/datasets/page.tsx b/app/(main)/datasets/page.tsx index 1234bc11..b86f5732 100644 --- a/app/(main)/datasets/page.tsx +++ b/app/(main)/datasets/page.tsx @@ -10,11 +10,11 @@ import { useState, useEffect } from "react"; import { useAuth } from "@/app/lib/context/AuthContext"; import { useApp } from "@/app/lib/context/AppContext"; +import { Dataset } from "@/app/lib/types/dataset"; import { apiFetch } from "@/app/lib/apiClient"; import Sidebar from "@/app/components/Sidebar"; import PageHeader from "@/app/components/PageHeader"; import { useToast } from "@/app/components/Toast"; -import { Dataset } from "@/app/lib/types/dataset"; export const DATASETS_STORAGE_KEY = "kaapi_datasets"; diff --git a/app/(main)/evaluations/[id]/page.tsx b/app/(main)/evaluations/[id]/page.tsx index 517c9275..ddd05717 100644 --- a/app/(main)/evaluations/[id]/page.tsx +++ b/app/(main)/evaluations/[id]/page.tsx @@ -5,15 +5,15 @@ "use client"; -import { useState, useEffect, useCallback } from "react"; +import { useState, useEffect, useCallback, useRef } from "react"; import { useRouter, useParams } from "next/navigation"; import { apiFetch } from "@/app/lib/apiClient"; import { useAuth } from "@/app/lib/context/AuthContext"; import { useApp } from "@/app/lib/context/AppContext"; import type { EvalJob, + EvalJobApiResponse, AssistantConfig, - GroupedTraceItem, } from "@/app/lib/types/evaluation"; import { hasSummaryScores, @@ -22,21 +22,23 @@ import { normalizeToIndividualScores, isGroupedFormat, } from "@/app/lib/utils/evaluation"; +import { + exportGroupedCSV, + exportRowCSV, +} from "@/app/lib/utils/evaluationExport"; import ConfigModal from "@/app/components/ConfigModal"; import Sidebar from "@/app/components/Sidebar"; import DetailedResultsTable from "@/app/components/evaluations/DetailedResultsTable"; -import { colors } from "@/app/lib/colors"; +import MetricsOverview from "@/app/components/evaluations/MetricsOverview"; +import { Button, Modal, ResultsTableSkeleton } from "@/app/components"; import { useToast } from "@/app/components/Toast"; import Loader from "@/app/components/Loader"; import { - WarningTriangleIcon, MenuIcon, ChevronLeftIcon, DatabaseIcon, GroupIcon, - RefreshIcon, } from "@/app/components/icons"; -import { sanitizeCSVCell } from "@/app/lib/utils"; export default function EvaluationReport() { const router = useRouter(); @@ -49,7 +51,9 @@ export default function EvaluationReport() { AssistantConfig | undefined >(undefined); const [isLoading, setIsLoading] = useState(true); + const [isFormatSwitching, setIsFormatSwitching] = useState(false); const [error, setError] = useState(null); + const hasLoadedRef = useRef(false); const { apiKeys, isAuthenticated } = useAuth(); const apiKey = apiKeys[0]?.key ?? ""; const { sidebarCollapsed, setSidebarCollapsed } = useApp(); @@ -61,12 +65,16 @@ export default function EvaluationReport() { const fetchJobDetails = useCallback(async () => { if (!isAuthenticated || !jobId) return; - setIsLoading(true); - setError(null); + const isFirstLoad = !hasLoadedRef.current; + if (isFirstLoad) { + setIsLoading(true); + setError(null); + } else { + setIsFormatSwitching(true); + } try { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const data = await apiFetch( + const data = await apiFetch( `/api/evaluations/${jobId}?export_format=${exportFormat}`, apiKey, ); @@ -77,10 +85,12 @@ export default function EvaluationReport() { return; } - const foundJob = data.data || data; + const foundJob: EvalJob | undefined = + data.data ?? (data as unknown as EvalJob); if (!foundJob) throw new Error("Evaluation job not found"); setJob(foundJob); + hasLoadedRef.current = true; if (foundJob.assistant_id) { fetchAssistantConfig(foundJob.assistant_id); @@ -89,11 +99,16 @@ export default function EvaluationReport() { fetchConfigInfo(foundJob.config_id, foundJob.config_version); } } catch (err: unknown) { - setError( - err instanceof Error ? err.message : "Failed to fetch evaluation job", - ); + const message = + err instanceof Error ? err.message : "Failed to fetch evaluation job"; + if (isFirstLoad) { + setError(message); + } else { + toast.error(message); + } } finally { setIsLoading(false); + setIsFormatSwitching(false); } }, [apiKey, isAuthenticated, jobId, exportFormat]); @@ -128,118 +143,6 @@ export default function EvaluationReport() { if (isAuthenticated && jobId) fetchJobDetails(); }, [isAuthenticated, jobId, fetchJobDetails]); - const exportGroupedCSV = (traces: GroupedTraceItem[]) => { - if (!job) return; - try { - const maxAnswers = Math.max(...traces.map((g) => g.llm_answers.length)); - const scoreNames = traces[0]?.scores[0]?.map((s) => s.name) || []; - let csvContent = "Question ID,Question,Ground Truth"; - for (let i = 1; i <= maxAnswers; i++) { - csvContent += `,LLM Answer ${i},Trace ID ${i}`; - scoreNames.forEach((name) => { - csvContent += `,${name} (${i}),${sanitizeCSVCell(`${name} (${i}) Comment`)}`; - }); - } - csvContent += "\n"; - traces.forEach((group) => { - const row: string[] = [ - String(group.question_id), - sanitizeCSVCell(group.question || ""), - sanitizeCSVCell(group.ground_truth_answer || ""), - ]; - for (let i = 0; i < maxAnswers; i++) { - row.push( - `"${(group.llm_answers[i] || "").replace(/"/g, '""').replace(/\n/g, " ")}"`, - ); - row.push(group.trace_ids[i] || ""); - scoreNames.forEach((name) => { - const score = group.scores[i]?.find((s) => s.name === name); - row.push(score ? String(score.value) : ""); - row.push( - score?.comment ? sanitizeCSVCell(score.comment, true) : "", - ); - }); - } - csvContent += row.join(",") + "\n"; - }); - const blob = new Blob([csvContent], { type: "text/csv;charset=utf-8;" }); - const url = URL.createObjectURL(blob); - const link = document.createElement("a"); - link.setAttribute("href", url); - link.setAttribute( - "download", - `evaluation_${job.id}_${job.run_name.replace(/[^a-z0-9]/gi, "_")}_grouped.csv`, - ); - document.body.appendChild(link); - link.click(); - document.body.removeChild(link); - URL.revokeObjectURL(url); - toast.success(`Grouped CSV exported with ${traces.length} questions`); - } catch (_error) { - toast.error("Failed to export grouped CSV"); - } - }; - - // Export row format CSV - const exportRowCSV = () => { - if (!job || !scoreObject) return; - try { - const individual_scores = normalizeToIndividualScores(scoreObject); - if (!individual_scores || individual_scores.length === 0) { - toast.error("No valid data available to export"); - return; - } - let csvContent = ""; - const firstItem = individual_scores[0]; - const scoreNames = firstItem?.trace_scores?.map((s) => s.name) || []; - csvContent += - "Counter,Trace ID,Job ID,Run Name,Dataset,Model,Status,Total Items,"; - csvContent += "Question,Answer,Ground Truth,"; - csvContent += - scoreNames.map((name) => `${name},${name} (comment)`).join(",") + "\n"; - let rowCount = 0; - individual_scores.forEach((item, index) => { - const row = [ - index + 1, - item.trace_id || "N/A", - job.id, - `"${job.run_name.replace(/"/g, '""')}"`, - `"${job.dataset_name.replace(/"/g, '""')}"`, - assistantConfig?.model || job.config?.model || "N/A", - job.status, - job.total_items, - `"${(item.input?.question || "").replace(/"/g, '""').replace(/\n/g, " ")}"`, - `"${(item.output?.answer || "").replace(/"/g, '""').replace(/\n/g, " ")}"`, - `"${(item.metadata?.ground_truth || "").replace(/"/g, '""').replace(/\n/g, " ")}"`, - ...scoreNames.flatMap((name) => { - const score = item.trace_scores?.find((s) => s.name === name); - return [ - score ? score.value : "N/A", - score?.comment ? sanitizeCSVCell(score.comment, true) : "", - ]; - }), - ].join(","); - csvContent += row + "\n"; - rowCount++; - }); - const blob = new Blob([csvContent], { type: "text/csv;charset=utf-8;" }); - const url = URL.createObjectURL(blob); - const link = document.createElement("a"); - link.setAttribute("href", url); - link.setAttribute( - "download", - `evaluation_${job.id}_${job.run_name.replace(/[^a-z0-9]/gi, "_")}.csv`, - ); - document.body.appendChild(link); - link.click(); - document.body.removeChild(link); - URL.revokeObjectURL(url); - toast.success(`CSV exported successfully with ${rowCount} rows`); - } catch (_error) { - toast.error("Failed to export CSV"); - } - }; - const handleExportCSV = () => { if (!job || !scoreObject) { toast.error("No valid data available to export"); @@ -256,14 +159,14 @@ export default function EvaluationReport() { return; } if (isGroupedFormat(traces)) { - exportGroupedCSV(traces); + const count = exportGroupedCSV(job, traces); + toast.success(`Grouped CSV exported with ${count} questions`); } else { - exportRowCSV(); + const count = exportRowCSV(job, scoreObject, assistantConfig); + toast.success(`CSV exported successfully with ${count} rows`); } - } catch (_error) { - toast.error( - "Failed to export CSV. Please check the console for details.", - ); + } catch (err: unknown) { + toast.error(err instanceof Error ? err.message : "Failed to export CSV"); } }; @@ -272,12 +175,12 @@ export default function EvaluationReport() { setIsResyncing(true); try { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const data = await apiFetch( + const data = await apiFetch( `/api/evaluations/${jobId}?get_trace_info=true&resync_score=true&export_format=${exportFormat}`, apiKey, ); - const foundJob = data.data || data; + const foundJob: EvalJob | undefined = + data.data ?? (data as unknown as EvalJob); if (!foundJob) throw new Error("Evaluation job not found"); const newScoreObject = getScoreObject(foundJob); @@ -301,12 +204,9 @@ export default function EvaluationReport() { } }; - if (isLoading) { + if (isLoading && !job) { return ( -
+
@@ -317,32 +217,23 @@ export default function EvaluationReport() { ); } - if (error || !job) { + if ((error && !job) || !job) { return ( -
+
-

+

{error || "Evaluation job not found"}

- +
@@ -360,68 +251,52 @@ export default function EvaluationReport() { job.status.toLowerCase() !== "completed" && job.status.toLowerCase() !== "failed"; + const segmentedClass = + "inline-flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-all cursor-pointer border border-transparent text-text-primary hover:bg-black/4 hover:shadow-[0_0_0_1px_rgba(0,0,0,0.06)] data-[selected=true]:bg-bg-primary data-[selected=true]:border-border data-[selected=true]:shadow-[0_1px_2px_rgba(0,0,0,0.08)] data-[selected=true]:hover:bg-bg-primary data-[selected=true]:hover:shadow-[0_1px_2px_rgba(0,0,0,0.08)]"; + return ( -
+
- {/* Header */} -
+
- + {sidebarCollapsed && ( + + )}
-

+

{job.run_name}

- - + + {job.dataset_name}
-
-
+
+
- - +
-
+
{hasScore && isNewFormat ? ( -
- {summaryScores.some( - (s) => job.total_items && s.total_pairs < job.total_items, - ) && - isJobInProgress && ( -
- - Some traces are still being scored. Scores shown are - partial and may change - click{" "} - Resync to get - the latest. -
- )} -
-

- Metrics Overview -

- -
- {summaryScores.length > 0 ? ( -
- {summaryScores - .filter((s) => s.data_type === "NUMERIC") - .map((summary) => ( -
-
- {summary.name} -
-
- {summary.avg !== undefined - ? summary.avg.toFixed(3) - : "N/A"} -
-
- {summary.std !== undefined && - `Β±${summary.std.toFixed(3)} Β· `} - - {summary.total_pairs} - {job.total_items && - summary.total_pairs < job.total_items && - `/${job.total_items}`}{" "} - pairs - -
-
- ))} - {summaryScores - .filter((s) => s.data_type === "CATEGORICAL") - .map((summary) => ( -
-
- {summary.name} -
-
- {summary.distribution && - Object.entries(summary.distribution).map( - ([key, value]) => ( -
- - {key} - - - {value} - -
- ), - )} -
-
- - {summary.total_pairs} - {job.total_items && - summary.total_pairs < job.total_items && - `/${job.total_items}`}{" "} - pairs - -
-
- ))} -
- ) : ( -
-

- No summary scores available -

-
- )} -
+ ) : ( -
+

{job.error_message || "No results available yet"}

)} - {/* Detailed Results */} {hasScore && (
-

+

Detailed Results

- {isNewFormat && ( - + {isNewFormat && !isFormatSwitching && ( + ({normalizeToIndividualScores(scoreObject).length}{" "} items) )}
- + {isFormatSwitching ? ( + + ) : ( + + )}
)}
@@ -651,7 +375,6 @@ export default function EvaluationReport() {
- {/* Config Modal */} setIsConfigModalOpen(false)} @@ -659,44 +382,28 @@ export default function EvaluationReport() { assistantConfig={assistantConfig} /> - {/* No Traces Modal */} - {showNoTracesModal && ( -
setShowNoTracesModal(false)} - > -
e.stopPropagation()} + setShowNoTracesModal(false)} + title="No Langfuse Traces Available" + maxWidth="max-w-md" + maxHeight="max-h-fit" + > +
+

+ This evaluation does not have Langfuse traces. +

+
+
+ -
-
+ OK +
- )} +
); } diff --git a/app/(main)/evaluations/page.tsx b/app/(main)/evaluations/page.tsx index 13ca97c2..450eeff1 100644 --- a/app/(main)/evaluations/page.tsx +++ b/app/(main)/evaluations/page.tsx @@ -22,8 +22,7 @@ import { FeatureGateModal, LoginModal } from "@/app/components/auth"; import Loader from "@/app/components/Loader"; import DatasetsTab from "@/app/components/evaluations/DatasetsTab"; import EvaluationsTab from "@/app/components/evaluations/EvaluationsTab"; - -type Tab = "datasets" | "evaluations"; +import { Tab } from "@/app/lib/types/evaluation"; const leftPanelWidth = 450; @@ -43,14 +42,13 @@ function SimplifiedEvalContent() { const apiKey = activeKey?.key ?? ""; const [showLoginModal, setShowLoginModal] = useState(false); const [mounted, setMounted] = useState(false); - // Dataset creation state const [datasetName, setDatasetName] = useState(""); const [datasetDescription, setDatasetDescription] = useState(""); const [duplicationFactor, setDuplicationFactor] = useState("1"); const [uploadedFile, setUploadedFile] = useState(null); const [isUploading, setIsUploading] = useState(false); const [storedDatasets, setStoredDatasets] = useState([]); - const [isDatasetsLoading, setIsDatasetsLoading] = useState(false); + const [isDatasetsLoading, setIsDatasetsLoading] = useState(true); const [selectedDatasetId, setSelectedDatasetId] = useState(() => { return searchParams.get("dataset") || ""; }); @@ -90,6 +88,7 @@ function SimplifiedEvalContent() { useEffect(() => { if (isAuthenticated) loadStoredDatasets(); + else setIsDatasetsLoading(false); }, [isAuthenticated, loadStoredDatasets]); const handleFileSelect = (event: React.ChangeEvent) => { @@ -260,7 +259,6 @@ function SimplifiedEvalContent() { subtitle="Compare model response quality on your datasets across different configs" /> - {/* Tab Navigation */} setActiveTab(tabId as Tab)} /> - {/* Tab Content */} {!mounted || !isAuthenticated ? ( <> (null); const [isCreating, setIsCreating] = useState(false); const [datasets, setDatasets] = useState([]); - const [isLoadingDatasets, setIsLoadingDatasets] = useState(false); + const [isLoadingDatasets, setIsLoadingDatasets] = useState(true); const [evaluationName, setEvaluationName] = useState(""); const [selectedDatasetId, setSelectedDatasetId] = useState( null, @@ -60,7 +60,7 @@ export default function SpeechToTextPage() { const [selectedModel, setSelectedModel] = useState("gemini-2.5-pro"); const [isRunning, setIsRunning] = useState(false); const [runs, setRuns] = useState([]); - const [isLoadingRuns, setIsLoadingRuns] = useState(false); + const [isLoadingRuns, setIsLoadingRuns] = useState(true); const [selectedRunId, setSelectedRunId] = useState(null); const [results, setResults] = useState([]); const [isLoadingResults, setIsLoadingResults] = useState(false); diff --git a/app/(main)/text-to-speech/page.tsx b/app/(main)/text-to-speech/page.tsx index bfdb9f68..d3645b8b 100644 --- a/app/(main)/text-to-speech/page.tsx +++ b/app/(main)/text-to-speech/page.tsx @@ -53,7 +53,7 @@ export default function TextToSpeechPage() { const [textSamples, setTextSamples] = useState([]); const [isCreating, setIsCreating] = useState(false); const [datasets, setDatasets] = useState([]); - const [isLoadingDatasets, setIsLoadingDatasets] = useState(false); + const [isLoadingDatasets, setIsLoadingDatasets] = useState(true); const [evaluationName, setEvaluationName] = useState(""); const [selectedDatasetId, setSelectedDatasetId] = useState( null, @@ -63,7 +63,7 @@ export default function TextToSpeechPage() { ); const [isRunning, setIsRunning] = useState(false); const [runs, setRuns] = useState([]); - const [isLoadingRuns, setIsLoadingRuns] = useState(false); + const [isLoadingRuns, setIsLoadingRuns] = useState(true); const [selectedRunId, setSelectedRunId] = useState(null); const [results, setResults] = useState([]); const [isLoadingResults, setIsLoadingResults] = useState(false); diff --git a/app/api/_routeProxy.ts b/app/api/_routeProxy.ts new file mode 100644 index 00000000..f9ca0d63 --- /dev/null +++ b/app/api/_routeProxy.ts @@ -0,0 +1,93 @@ +import "server-only"; + +import { NextResponse } from "next/server"; +import { apiClient } from "@/app/lib/apiClient"; + +const DOWNLOAD_CONTENT_TYPE_HINTS = [ + "text/csv", + "spreadsheetml", + "octet-stream", + "application/zip", +]; + +function isDownloadContentType(contentType: string): boolean { + return DOWNLOAD_CONTENT_TYPE_HINTS.some((hint) => contentType.includes(hint)); +} + +async function safeParseJson( + response: Response, +): Promise | unknown[] | null> { + const text = response.status === 204 ? "" : await response.text(); + if (!text) return null; + + try { + return JSON.parse(text) as Record | unknown[]; + } catch { + return null; + } +} + +async function toDownloadResponse( + response: Response, +): Promise { + const contentType = response.headers.get("content-type") || ""; + if (!isDownloadContentType(contentType)) { + return null; + } + + const blob = await response.blob(); + const headers = new Headers(); + headers.set("Content-Type", contentType); + + const disposition = response.headers.get("content-disposition"); + if (disposition) { + headers.set("Content-Disposition", disposition); + } + + return new NextResponse(blob, { status: response.status, headers }); +} + +export function withQueryParams( + endpoint: string, + queryParams: URLSearchParams, +): string { + const query = queryParams.toString(); + return query ? `${endpoint}?${query}` : endpoint; +} + +export async function proxyJsonResponse( + request: Request, + endpoint: string, + init: RequestInit = {}, +): Promise { + const { status, data } = await apiClient(request, endpoint, init); + return NextResponse.json(data, { status }); +} + +export async function proxyDownloadOrJsonResponse( + request: Request, + endpoint: string, + init: RequestInit = {}, +): Promise { + const response = await apiClient(request, endpoint, { + ...init, + responseType: "raw", + }); + + const downloadResponse = await toDownloadResponse(response); + if (downloadResponse) { + return downloadResponse; + } + + const data = await safeParseJson(response); + return NextResponse.json(data, { status: response.status }); +} + +export function proxyErrorResponse( + logLabel: string, + error: unknown, + message = "Failed to forward request to backend", +): NextResponse { + console.error(logLabel, error); + return NextResponse.json({ error: message }, { status: 500 }); +} diff --git a/app/api/assessment/assessments/[assessment_id]/results/route.ts b/app/api/assessment/assessments/[assessment_id]/results/route.ts new file mode 100644 index 00000000..5e8c6b3d --- /dev/null +++ b/app/api/assessment/assessments/[assessment_id]/results/route.ts @@ -0,0 +1,32 @@ +// BFF proxy β€” GET /api/v1/assessment/assessments/:id/results (JSON or file download) +import { NextRequest } from "next/server"; +import { + proxyDownloadOrJsonResponse, + proxyErrorResponse, + withQueryParams, +} from "@/app/api/_routeProxy"; + +export async function GET( + request: NextRequest, + { params }: { params: Promise<{ assessment_id: string }> }, +) { + try { + const { assessment_id } = await params; + const queryParams = new URLSearchParams(request.nextUrl.searchParams); + queryParams.set("get_trace_info", "true"); + return await proxyDownloadOrJsonResponse( + request, + withQueryParams( + `/api/v1/assessment/assessments/${assessment_id}/results`, + queryParams, + ), + { method: "GET" }, + ); + } catch (error: unknown) { + return proxyErrorResponse( + "Assessment results proxy error:", + error, + "Failed to forward request", + ); + } +} diff --git a/app/api/assessment/assessments/[assessment_id]/retry/route.ts b/app/api/assessment/assessments/[assessment_id]/retry/route.ts new file mode 100644 index 00000000..522aa6ec --- /dev/null +++ b/app/api/assessment/assessments/[assessment_id]/retry/route.ts @@ -0,0 +1,28 @@ +// BFF proxy β€” POST /api/v1/assessment/assessments/:id/retry +import { NextRequest, NextResponse } from "next/server"; +import { apiClient } from "@/app/lib/apiClient"; +import type { RouteContext } from "@/app/lib/types/assessment"; + +export async function POST( + request: NextRequest, + context: RouteContext<"assessment_id">, +) { + try { + const { assessment_id } = await context.params; + const { status, data } = await apiClient( + request, + `/api/v1/assessment/assessments/${assessment_id}/retry`, + { method: "POST" }, + ); + + return NextResponse.json(data, { status }); + } catch (error: unknown) { + console.error("Assessment retry proxy error:", error); + return NextResponse.json( + { + error: "Failed to forward assessment retry request", + }, + { status: 500 }, + ); + } +} diff --git a/app/api/assessment/assessments/route.ts b/app/api/assessment/assessments/route.ts new file mode 100644 index 00000000..0cf133fd --- /dev/null +++ b/app/api/assessment/assessments/route.ts @@ -0,0 +1,23 @@ +// BFF proxy β€” GET /api/v1/assessment/assessments +import { NextRequest } from "next/server"; +import { + proxyErrorResponse, + proxyJsonResponse, + withQueryParams, +} from "@/app/api/_routeProxy"; + +export async function GET(request: NextRequest) { + try { + const queryParams = new URLSearchParams(request.nextUrl.searchParams); + queryParams.set("get_trace_info", "true"); + return await proxyJsonResponse( + request, + withQueryParams("/api/v1/assessment/assessments", queryParams), + { + method: "GET", + }, + ); + } catch (error: unknown) { + return proxyErrorResponse("Assessment list proxy error:", error); + } +} diff --git a/app/api/assessment/datasets/[dataset_id]/route.ts b/app/api/assessment/datasets/[dataset_id]/route.ts new file mode 100644 index 00000000..7296408f --- /dev/null +++ b/app/api/assessment/datasets/[dataset_id]/route.ts @@ -0,0 +1,142 @@ +// BFF proxy β€” GET (with optional S3 file fetch, max 10 MB) + DELETE /api/v1/assessment/datasets/:id +import { NextRequest, NextResponse } from "next/server"; +import { apiClient } from "@/app/lib/apiClient"; +import { proxyErrorResponse, withQueryParams } from "@/app/api/_routeProxy"; + +const MAX_DATASET_PROXY_BYTES = 10 * 1024 * 1024; + +async function readFileAsBase64WithLimit(response: Response): Promise { + const contentLength = response.headers.get("content-length"); + if (contentLength) { + const size = Number.parseInt(contentLength, 10); + if (Number.isFinite(size) && size > MAX_DATASET_PROXY_BYTES) { + throw new Error("FILE_TOO_LARGE"); + } + } + + const reader = response.body?.getReader(); + if (!reader) { + throw new Error("FILE_STREAM_UNAVAILABLE"); + } + + const chunks: Uint8Array[] = []; + let totalBytes = 0; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + if (!value) continue; + + totalBytes += value.byteLength; + if (totalBytes > MAX_DATASET_PROXY_BYTES) { + throw new Error("FILE_TOO_LARGE"); + } + chunks.push(value); + } + + return Buffer.concat(chunks).toString("base64"); +} + +export async function GET( + request: NextRequest, + { params }: { params: Promise<{ dataset_id: string }> }, +) { + try { + const { dataset_id } = await params; + const fetchContent = + request.nextUrl.searchParams.get("fetch_content") === "true"; + + // Always request signed URL when fetch_content is needed + const backendParams = new URLSearchParams(); + if (fetchContent) { + backendParams.set("fetch_content", "true"); + } + if (fetchContent) { + backendParams.set("include_signed_url", "true"); + } + const endpoint = withQueryParams( + `/api/v1/assessment/datasets/${dataset_id}`, + backendParams, + ); + + const { status, data } = await apiClient(request, endpoint, { + method: "GET", + }); + + if (status >= 400) { + return NextResponse.json(data, { status }); + } + + // Download file from S3 server-side and return as base64 + if (fetchContent) { + const signedUrl = + (data as { data?: { signed_url?: string }; signed_url?: string })?.data + ?.signed_url || + (data as { data?: { signed_url?: string }; signed_url?: string }) + ?.signed_url; + + if (!signedUrl) { + return NextResponse.json( + { error: "No signed URL available" }, + { status: 404 }, + ); + } + + const fileResponse = await fetch(signedUrl); + if (!fileResponse.ok) { + return NextResponse.json( + { error: "Failed to fetch file from storage" }, + { status: 502 }, + ); + } + + let base64: string; + try { + base64 = await readFileAsBase64WithLimit(fileResponse); + } catch (error) { + if (error instanceof Error && error.message === "FILE_TOO_LARGE") { + return NextResponse.json( + { error: "File too large" }, + { status: 413 }, + ); + } + + return NextResponse.json( + { error: "Failed to read file from storage" }, + { status: 502 }, + ); + } + + return NextResponse.json( + { ...(data as Record), file_content: base64 }, + { status: 200 }, + ); + } + + return NextResponse.json(data, { status }); + } catch (error: unknown) { + return proxyErrorResponse("Assessment dataset details proxy error:", error); + } +} + +export async function DELETE( + request: NextRequest, + { params }: { params: Promise<{ dataset_id: string }> }, +) { + try { + const { dataset_id } = await params; + const { status, data } = await apiClient( + request, + `/api/v1/assessment/datasets/${dataset_id}`, + { method: "DELETE" }, + ); + + if (status === 204) { + return new NextResponse(null, { status }); + } + + return NextResponse.json(data, { status }); + } catch (error: unknown) { + return proxyErrorResponse("Assessment dataset delete proxy error:", error); + } +} diff --git a/app/api/assessment/datasets/route.ts b/app/api/assessment/datasets/route.ts new file mode 100644 index 00000000..a4fad6ca --- /dev/null +++ b/app/api/assessment/datasets/route.ts @@ -0,0 +1,25 @@ +// BFF proxy β€” GET + POST /api/v1/assessment/datasets +import { NextRequest } from "next/server"; +import { proxyErrorResponse, proxyJsonResponse } from "@/app/api/_routeProxy"; + +export async function GET(request: NextRequest) { + try { + return await proxyJsonResponse(request, "/api/v1/assessment/datasets", { + method: "GET", + }); + } catch (error: unknown) { + return proxyErrorResponse("Assessment datasets list proxy error:", error); + } +} + +export async function POST(request: NextRequest) { + try { + const formData = await request.formData(); + return await proxyJsonResponse(request, "/api/v1/assessment/datasets", { + method: "POST", + body: formData, + }); + } catch (error: unknown) { + return proxyErrorResponse("Assessment datasets create proxy error:", error); + } +} diff --git a/app/api/assessment/runs/[run_id]/results/route.ts b/app/api/assessment/runs/[run_id]/results/route.ts new file mode 100644 index 00000000..f197b780 --- /dev/null +++ b/app/api/assessment/runs/[run_id]/results/route.ts @@ -0,0 +1,31 @@ +// BFF proxy β€” GET /api/v1/assessment/runs/:id/results (JSON or file download) +import { NextRequest } from "next/server"; +import { + proxyDownloadOrJsonResponse, + proxyErrorResponse, + withQueryParams, +} from "@/app/api/_routeProxy"; + +export async function GET( + request: NextRequest, + { params }: { params: Promise<{ run_id: string }> }, +) { + try { + const { run_id } = await params; + const queryParams = new URLSearchParams(request.nextUrl.searchParams); + queryParams.set("get_trace_info", "true"); + const endpoint = withQueryParams( + `/api/v1/assessment/runs/${run_id}/results`, + queryParams, + ); + return await proxyDownloadOrJsonResponse(request, endpoint, { + method: "GET", + }); + } catch (error: unknown) { + return proxyErrorResponse( + "Assessment run results proxy error:", + error, + "Failed to forward request", + ); + } +} diff --git a/app/api/assessment/runs/[run_id]/retry/route.ts b/app/api/assessment/runs/[run_id]/retry/route.ts new file mode 100644 index 00000000..c8cd55ba --- /dev/null +++ b/app/api/assessment/runs/[run_id]/retry/route.ts @@ -0,0 +1,28 @@ +// BFF proxy β€” POST /api/v1/assessment/runs/:id/retry +import { NextRequest, NextResponse } from "next/server"; +import { apiClient } from "@/app/lib/apiClient"; +import type { RouteContext } from "@/app/lib/types/assessment"; + +export async function POST( + request: NextRequest, + context: RouteContext<"run_id">, +) { + try { + const { run_id } = await context.params; + const { status, data } = await apiClient( + request, + `/api/v1/assessment/runs/${run_id}/retry`, + { method: "POST" }, + ); + + return NextResponse.json(data, { status }); + } catch (error: unknown) { + console.error("Assessment run retry proxy error:", error); + return NextResponse.json( + { + error: "Failed to forward assessment run retry request", + }, + { status: 500 }, + ); + } +} diff --git a/app/api/assessment/runs/[run_id]/route.ts b/app/api/assessment/runs/[run_id]/route.ts new file mode 100644 index 00000000..9ac0107f --- /dev/null +++ b/app/api/assessment/runs/[run_id]/route.ts @@ -0,0 +1,21 @@ +// BFF proxy β€” GET /api/v1/assessment/runs/:id +import { NextRequest } from "next/server"; +import { proxyErrorResponse, proxyJsonResponse } from "@/app/api/_routeProxy"; + +export async function GET( + request: NextRequest, + { params }: { params: Promise<{ run_id: string }> }, +) { + try { + const { run_id } = await params; + return await proxyJsonResponse( + request, + `/api/v1/assessment/runs/${run_id}`, + { + method: "GET", + }, + ); + } catch (error: unknown) { + return proxyErrorResponse("Assessment run proxy error:", error); + } +} diff --git a/app/api/assessment/runs/route.ts b/app/api/assessment/runs/route.ts new file mode 100644 index 00000000..d9abe689 --- /dev/null +++ b/app/api/assessment/runs/route.ts @@ -0,0 +1,34 @@ +// BFF proxy β€” GET + POST /api/v1/assessment/runs +import { NextRequest } from "next/server"; +import { + proxyErrorResponse, + proxyJsonResponse, + withQueryParams, +} from "@/app/api/_routeProxy"; + +export async function GET(request: NextRequest) { + try { + const queryParams = new URLSearchParams(request.nextUrl.searchParams); + return await proxyJsonResponse( + request, + withQueryParams("/api/v1/assessment/runs", queryParams), + { + method: "GET", + }, + ); + } catch (error: unknown) { + return proxyErrorResponse("Assessment runs list proxy error:", error); + } +} + +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + return await proxyJsonResponse(request, "/api/v1/assessment/runs", { + method: "POST", + body: JSON.stringify(body), + }); + } catch (error: unknown) { + return proxyErrorResponse("Assessment runs create proxy error:", error); + } +} diff --git a/app/api/auth/logout/route.ts b/app/api/auth/logout/route.ts index 5111c38a..8461a4fe 100644 --- a/app/api/auth/logout/route.ts +++ b/app/api/auth/logout/route.ts @@ -1,6 +1,6 @@ import { NextRequest, NextResponse } from "next/server"; import { apiClient } from "@/app/lib/apiClient"; -import { clearRoleCookie } from "@/app/lib/authCookie"; +import { clearFeaturesCookie, clearRoleCookie } from "@/app/lib/authCookie"; export async function POST(request: NextRequest) { const { status, data, headers } = await apiClient( @@ -17,6 +17,7 @@ export async function POST(request: NextRequest) { } clearRoleCookie(res); + clearFeaturesCookie(res); return res; } diff --git a/app/api/configs/[config_id]/route.ts b/app/api/configs/[config_id]/route.ts index 0a5c60cc..b90f9641 100644 --- a/app/api/configs/[config_id]/route.ts +++ b/app/api/configs/[config_id]/route.ts @@ -1,4 +1,5 @@ import { NextResponse } from "next/server"; +import { withQueryParams } from "@/app/api/_routeProxy"; import { apiClient } from "@/app/lib/apiClient"; export async function GET( @@ -8,9 +9,10 @@ export async function GET( const { config_id } = await params; try { + const { searchParams } = new URL(request.url); const { status, data } = await apiClient( request, - `/api/v1/configs/${config_id}`, + withQueryParams(`/api/v1/configs/${config_id}`, searchParams), ); return NextResponse.json(data, { status }); } catch (_error) { diff --git a/app/api/configs/[config_id]/versions/[version_number]/route.ts b/app/api/configs/[config_id]/versions/[version_number]/route.ts index 4d89bd0e..d879abde 100644 --- a/app/api/configs/[config_id]/versions/[version_number]/route.ts +++ b/app/api/configs/[config_id]/versions/[version_number]/route.ts @@ -1,4 +1,5 @@ import { NextResponse } from "next/server"; +import { withQueryParams } from "@/app/api/_routeProxy"; import { apiClient } from "@/app/lib/apiClient"; export async function GET( @@ -10,9 +11,13 @@ export async function GET( const { config_id, version_number } = await params; try { + const { searchParams } = new URL(request.url); const { status, data } = await apiClient( request, - `/api/v1/configs/${config_id}/versions/${version_number}`, + withQueryParams( + `/api/v1/configs/${config_id}/versions/${version_number}`, + searchParams, + ), ); return NextResponse.json(data, { status }); } catch (_error) { diff --git a/app/api/configs/[config_id]/versions/route.ts b/app/api/configs/[config_id]/versions/route.ts index 9ac697d7..a01a2249 100644 --- a/app/api/configs/[config_id]/versions/route.ts +++ b/app/api/configs/[config_id]/versions/route.ts @@ -1,4 +1,5 @@ import { NextResponse } from "next/server"; +import { withQueryParams } from "@/app/api/_routeProxy"; import { apiClient } from "@/app/lib/apiClient"; export async function GET( @@ -8,10 +9,13 @@ export async function GET( const { config_id } = await params; try { - const { status, data } = await apiClient( - request, + const { searchParams } = new URL(request.url); + const endpoint = withQueryParams( `/api/v1/configs/${config_id}/versions`, + searchParams, ); + const { status, data } = await apiClient(request, endpoint); + return NextResponse.json(data, { status }); } catch (_error) { return NextResponse.json( @@ -29,7 +33,6 @@ export async function POST( try { const body = await request.json(); - const { status, data } = await apiClient( request, `/api/v1/configs/${config_id}/versions`, diff --git a/app/api/evaluations/datasets/[dataset_id]/route.ts b/app/api/evaluations/datasets/[dataset_id]/route.ts index 00561cca..f12ed347 100644 --- a/app/api/evaluations/datasets/[dataset_id]/route.ts +++ b/app/api/evaluations/datasets/[dataset_id]/route.ts @@ -1,6 +1,11 @@ import { NextRequest, NextResponse } from "next/server"; import { apiClient } from "@/app/lib/apiClient"; +type DatasetDetailsPayload = Record & { + data?: { signed_url?: string } | null; + signed_url?: string; +}; + /** * GET /api/evaluations/datasets/:dataset_id * @@ -15,7 +20,7 @@ export async function GET( const searchParams = request.nextUrl.searchParams.toString(); const queryString = searchParams ? `?${searchParams}` : ""; - const { status, data } = await apiClient( + const { status, data } = await apiClient( request, `/api/v1/evaluations/datasets/${dataset_id}${queryString}`, ); @@ -43,7 +48,7 @@ export async function GET( } const csvText = await csvResponse.text(); return NextResponse.json( - { ...data, csv_content: csvText }, + { ...(data ?? {}), csv_content: csvText }, { status: 200 }, ); } diff --git a/app/api/evaluations/tts/datasets/[dataset_id]/route.ts b/app/api/evaluations/tts/datasets/[dataset_id]/route.ts index 05e70d66..1744ed0f 100644 --- a/app/api/evaluations/tts/datasets/[dataset_id]/route.ts +++ b/app/api/evaluations/tts/datasets/[dataset_id]/route.ts @@ -1,6 +1,11 @@ import { apiClient } from "@/app/lib/apiClient"; import { NextResponse } from "next/server"; +type DatasetDetailsPayload = Record & { + data?: { signed_url?: string } | null; + signed_url?: string; +}; + export async function GET( request: Request, { params }: { params: Promise<{ dataset_id: string }> }, @@ -18,7 +23,7 @@ export async function GET( ? `?${backendParams.toString()}` : ""; - const { data, status } = await apiClient( + const { data, status } = await apiClient( request, `/api/v1/evaluations/tts/datasets/${dataset_id}${queryString}`, ); @@ -42,7 +47,7 @@ export async function GET( } const csvText = await csvResponse.text(); return NextResponse.json( - { ...data, csv_content: csvText }, + { ...(data ?? {}), csv_content: csvText }, { status: 200 }, ); } diff --git a/app/api/users/me/route.ts b/app/api/users/me/route.ts index 6b9fe0f2..32686d75 100644 --- a/app/api/users/me/route.ts +++ b/app/api/users/me/route.ts @@ -1,6 +1,9 @@ import { NextRequest, NextResponse } from "next/server"; import { apiClient } from "@/app/lib/apiClient"; -import { setRoleCookieFromBody } from "@/app/lib/authCookie"; +import { + setFeaturesCookieFromBody, + setRoleCookieFromBody, +} from "@/app/lib/authCookie"; export async function GET(request: NextRequest) { try { @@ -9,6 +12,7 @@ export async function GET(request: NextRequest) { if (status >= 200 && status < 300) { setRoleCookieFromBody(res, data); + setFeaturesCookieFromBody(res, data); } return res; diff --git a/app/components/Button.tsx b/app/components/Button.tsx index dc8238e4..0d978781 100644 --- a/app/components/Button.tsx +++ b/app/components/Button.tsx @@ -56,7 +56,7 @@ export default function Button({ return ( + {configVersionInfo?.provider && ( +
+

+ {configVersionInfo.provider} +

+ )} -
- {isLoadingConfig ? ( -
-
-

- Loading configuration... -

-
- ) : ( - <> - {assistantConfig?.name && ( - -
- {assistantConfig.name} -
-
- )} +
+ {isLoadingConfig ? ( +
+
+

+ Loading configuration... +

+
+ ) : ( + <> + {assistantConfig?.name && ( + +
+ {assistantConfig.name} +
+
+ )} - {job.assistant_id && ( - -
- {job.assistant_id} -
-
- )} + {job.assistant_id && ( + +
+ {job.assistant_id} +
+
+ )} - + + + {configVersionInfo?.model || + assistantConfig?.model || + job.config?.model || + "N/A"} + + + + {(configVersionInfo?.temperature !== undefined || + assistantConfig?.temperature !== undefined || + job.config?.temperature !== undefined) && ( + - {configVersionInfo?.model || - assistantConfig?.model || - job.config?.model || - "N/A"} + {configVersionInfo?.temperature !== undefined + ? configVersionInfo.temperature + : assistantConfig?.temperature !== undefined + ? assistantConfig.temperature + : job.config?.temperature} + )} - {(configVersionInfo?.temperature !== undefined || - assistantConfig?.temperature !== undefined || - job.config?.temperature !== undefined) && ( - - - {configVersionInfo?.temperature !== undefined - ? configVersionInfo.temperature - : assistantConfig?.temperature !== undefined - ? assistantConfig.temperature - : job.config?.temperature} - - - )} - - {configVersionInfo?.knowledge_base_ids && - configVersionInfo.knowledge_base_ids.length > 0 && ( - - - {configVersionInfo.knowledge_base_ids.join("\n")} - - - )} - - {(configVersionInfo?.instructions || - assistantConfig?.instructions || - job.config?.instructions) && ( - + {configVersionInfo?.knowledge_base_ids && + configVersionInfo.knowledge_base_ids.length > 0 && ( + - {configVersionInfo?.instructions || - assistantConfig?.instructions || - job.config?.instructions} + {configVersionInfo.knowledge_base_ids.join("\n")} )} - {Array.isArray(configVersionInfo?.tools) && - configVersionInfo.tools.length > 0 && ( - -
-
- {configVersionInfo.tools.map((tool, idx) => ( - {tool.type} - ))} -
+ {(configVersionInfo?.instructions || + assistantConfig?.instructions || + job.config?.instructions) && ( + + + {configVersionInfo?.instructions || + assistantConfig?.instructions || + job.config?.instructions} + + + )} + + {Array.isArray(configVersionInfo?.tools) && + configVersionInfo.tools.length > 0 && ( + +
+
{configVersionInfo.tools.map((tool, idx) => ( - - {Array.isArray(tool.knowledge_base_ids) && - tool.knowledge_base_ids.length > 0 && ( -
-
- Knowledge Base IDs ({tool.type}) -
- - {tool.knowledge_base_ids.join("\n")} - -
- )} - {tool.max_num_results !== undefined && ( + {tool.type} + ))} +
+ {configVersionInfo.tools.map((tool, idx) => ( + + {Array.isArray(tool.knowledge_base_ids) && + tool.knowledge_base_ids.length > 0 && (
-
- Max Results ({tool.type}) -
-
- {String(tool.max_num_results)} +
+ Knowledge Base IDs ({tool.type})
+ + {tool.knowledge_base_ids.join("\n")} +
)} - - ))} -
- - )} + {tool.max_num_results !== undefined && ( +
+
+ Max Results ({tool.type}) +
+
+ {String(tool.max_num_results)} +
+
+ )} +
+ ))} +
+
+ )} - {Array.isArray(job.config?.tools) && - job.config.tools.length > 0 && - !configVersionInfo?.tools?.length && ( - -
-
- {/* eslint-disable-next-line @typescript-eslint/no-explicit-any */} - {job.config.tools.map((tool: any, idx) => ( - {tool.type} - ))} -
+ {Array.isArray(job.config?.tools) && + job.config.tools.length > 0 && + !configVersionInfo?.tools?.length && ( + +
+
{/* eslint-disable-next-line @typescript-eslint/no-explicit-any */} - {job.config.tools.map((tool: any, idx: number) => ( - - {Array.isArray(tool.knowledge_base_ids) && - tool.knowledge_base_ids.length > 0 && ( -
-
- Knowledge Base IDs ({tool.type}) -
- - {tool.knowledge_base_ids.join("\n")} - -
- )} - {tool.max_num_results !== undefined && ( + {job.config.tools.map((tool: any, idx) => ( + {tool.type} + ))} +
+ {/* eslint-disable-next-line @typescript-eslint/no-explicit-any */} + {job.config.tools.map((tool: any, idx: number) => ( + + {Array.isArray(tool.knowledge_base_ids) && + tool.knowledge_base_ids.length > 0 && (
-
- Max Results ({tool.type}) -
-
- {String(tool.max_num_results)} +
+ Knowledge Base IDs ({tool.type})
+ + {tool.knowledge_base_ids.join("\n")} +
)} - - ))} -
- - )} + {tool.max_num_results !== undefined && ( +
+
+ Max Results ({tool.type}) +
+
+ {String(tool.max_num_results)} +
+
+ )} +
+ ))} +
+
+ )} - {Array.isArray(assistantConfig?.knowledge_base_ids) && - assistantConfig.knowledge_base_ids.length > 0 && ( - - - {assistantConfig.knowledge_base_ids.join("\n")} - - - )} + {Array.isArray(assistantConfig?.knowledge_base_ids) && + assistantConfig.knowledge_base_ids.length > 0 && ( + + + {assistantConfig.knowledge_base_ids.join("\n")} + + + )} - {Array.isArray(job.config?.include) && - job.config.include.length > 0 && ( - -
- {job.config.include.map((item, idx) => ( - {item} - ))} -
-
- )} - - )} -
+ {Array.isArray(job.config?.include) && + job.config.include.length > 0 && ( + +
+ {job.config.include.map((item, idx) => ( + {item} + ))} +
+
+ )} + + )}
-
+ ); } diff --git a/app/components/DatasetListSkeleton.tsx b/app/components/DatasetListSkeleton.tsx new file mode 100644 index 00000000..704a7ef1 --- /dev/null +++ b/app/components/DatasetListSkeleton.tsx @@ -0,0 +1,35 @@ +interface DatasetListSkeletonProps { + count?: number; +} + +export default function DatasetListSkeleton({ + count = 3, +}: DatasetListSkeletonProps) { + return ( +
+ {Array.from({ length: count }).map((_, i) => ( +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ))} +
+ ); +} diff --git a/app/components/ErrorModal.tsx b/app/components/ErrorModal.tsx index 8d71be7d..f4a1ef48 100644 --- a/app/components/ErrorModal.tsx +++ b/app/components/ErrorModal.tsx @@ -1,6 +1,7 @@ "use client"; -import React from "react"; +import { Button, Modal } from "@/app/components"; +import { WarningIcon } from "@/app/components/icons"; interface ErrorModalProps { isOpen: boolean; @@ -15,73 +16,32 @@ export default function ErrorModal({ title = "Error", message, }: ErrorModalProps) { - if (!isOpen) return null; - return ( -
- {/* Backdrop */} -
- - {/* Modal */} -
- {/* Header */} -
-
-
- - - -
-

{title}

+ +
+
+
+ +
+
+

{title}

+

+ {message} +

- -
- - {/* Content */} -
-

{message}

-
- - {/* Footer */} -
-
-
+
+ +
+ ); } diff --git a/app/components/ResultsTableSkeleton.tsx b/app/components/ResultsTableSkeleton.tsx new file mode 100644 index 00000000..d101630a --- /dev/null +++ b/app/components/ResultsTableSkeleton.tsx @@ -0,0 +1,25 @@ +interface ResultsTableSkeletonProps { + rows?: number; + cols?: number; +} + +export default function ResultsTableSkeleton({ + rows = 5, + cols = 5, +}: ResultsTableSkeletonProps) { + return ( +
+ {Array.from({ length: rows }).map((_, r) => ( +
+ {Array.from({ length: cols }).map((_, c) => ( +
+ ))} +
+ ))} +
+ ); +} diff --git a/app/components/RunsListSkeleton.tsx b/app/components/RunsListSkeleton.tsx new file mode 100644 index 00000000..010c585b --- /dev/null +++ b/app/components/RunsListSkeleton.tsx @@ -0,0 +1,32 @@ +interface RunsListSkeletonProps { + count?: number; +} + +export default function RunsListSkeleton({ count = 5 }: RunsListSkeletonProps) { + return ( +
+ {Array.from({ length: count }).map((_, i) => ( +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ))} +
+ ); +} diff --git a/app/components/Select.tsx b/app/components/Select.tsx index 861aa8be..86824a0d 100644 --- a/app/components/Select.tsx +++ b/app/components/Select.tsx @@ -19,7 +19,7 @@ export default function Select({ }: SelectProps) { return ( + updateAttachmentType( + index, + event.target.value as "image" | "pdf", + ) + } + options={[ + { value: "image", label: "Image" }, + { value: "pdf", label: "PDF" }, + ]} + className="w-full cursor-pointer rounded-lg border border-border bg-bg-primary px-3 py-2 text-sm text-text-primary outline-none focus:ring-1" + /> + + +
+
+ +
+ {isLoading && assessments.length === 0 && } + + {!isLoading && assessments.length === 0 && ( +
+ +

+ No evaluation runs yet +

+

+ Submit an assessment from the Config tab to get started +

+
+ )} + + {assessments.length > 0 && + (filteredRuns.length > 0 ? ( +
+ {filteredRuns.map((run) => { + const statusTone = getResultTone(run.status); + const statusClass = STATUS_BADGE_CLASSES[statusTone]; + const isExpanded = expandedId === run.id; + const childRuns = childRunsByAssessment[run.id] || []; + const canRetryAssessment = canRetryStatus(run.status); + const isRetryingAssessment = retryingAssessmentId === run.id; + const hasCompletedRuns = run.completed_runs > 0; + + return ( +
+
+
+
+
+ {run.experiment_name} +
+
+ {formatRelativeTime(run.inserted_at)} +
+ + {(run.status === "failed" || + run.status === "completed_with_errors") && + run.error_message && ( +
+ {run.error_message} +
+ )} +
+ + + {formatStatusLabel(run.status)} + +
+ +
+
+ {run.dataset_name && ( + + + {run.dataset_name} + + )} + + {run.total_runs} configs + + {run.completed_runs} completed + + {run.processing_runs + run.pending_runs} active + + {run.failed_runs > 0 && ( + + {run.failed_runs} failed + + )} +
+
+ {hasCompletedRuns && ( + + handleAssessmentDownload(run.id, fmt) + } + disabled={!hasCompletedRuns} + loading={ + downloadingId === `assessment-${run.id}` + } + /> + )} + {canRetryAssessment && ( + + )} + +
+
+ + {isExpanded && ( +
+
+
+
+ Configurations in this assessment +
+
+ Each configuration keeps its own status, + preview, and export actions. +
+
+
+ {childRuns.length} run + {childRuns.length !== 1 ? "s" : ""} +
+
+ + {childRuns.length === 0 ? ( +
+ +
+ ) : ( + childRuns.map((childRun) => { + const childStatusClass = + STATUS_BADGE_CLASSES[ + getResultTone(childRun.status) + ]; + const isFailedChild = isFailedStatus( + childRun.status, + ); + const isCompletedChild = isCompletedStatus( + childRun.status, + ); + const isRerunning = rerunningId === childRun.id; + const configKey = + childRun.config_id && childRun.config_version + ? `${childRun.config_id}:${childRun.config_version}` + : null; + const configDetail = configKey + ? configDetailsByKey[configKey] + : null; + const isConfigLoading = configKey + ? Boolean(configLoadingKeys[configKey]) + : false; + const configError = configKey + ? configErrorKeys[configKey] + : null; + const fallbackName = childRun.config_id + ? `Config ${childRun.config_id.slice(0, 8)}` + : "Configuration"; + const configName = + configDetail?.name || fallbackName; + const previewLabel = `${configName}${childRun.config_version ? ` v${childRun.config_version}` : ""}`; + + return ( +
+
+
+
+ + {configName} + + {childRun.config_version !== null && ( + + v{childRun.config_version} + + )} + {configDetail?.provider && + configDetail?.model && ( + + {configDetail.provider}/ + {configDetail.model} + + )} +
+ +
+ {isConfigLoading + ? "Loading configuration details..." + : configDetail?.description || + configDetail?.commitMessage || + "No description available for this configuration."} +
+ +
+ + {childRun.total_items} items + + {childRun.updated_at && ( + + {formatRelativeTime( + childRun.updated_at, + )} + + )} + {childRun.config_id && ( + + ID{" "} + {childRun.config_id.slice(0, 8)} + + )} +
+ + {configError && ( +
+ {configError} +
+ )} + {isFailedChild && + childRun.error_message && ( +
+ {childRun.error_message} +
+ )} +
+ +
+ + {formatStatusLabel(childRun.status)} + + {isCompletedChild && ( + + )} + {isCompletedChild && ( + + handleRunDownload( + childRun.id, + fmt, + ) + } + loading={ + downloadingId === + `run-${childRun.id}` + } + /> + )} + {isFailedChild && ( + + )} +
+
+
+ ); + }) + )} +
+ )} +
+
+ ); + })} +
+ ) : ( +
+

+ No {statusFilter} runs +

+

+ No evaluation runs with status "{statusFilter}" +

+
+ ))} +
+
+ + {previewModal && ( + setPreviewModal(null)} + /> + )} +
+ ); +} diff --git a/app/components/assessment/JsonEditor.tsx b/app/components/assessment/JsonEditor.tsx new file mode 100644 index 00000000..02422ef1 --- /dev/null +++ b/app/components/assessment/JsonEditor.tsx @@ -0,0 +1,162 @@ +"use client"; + +import { useRef, useCallback, useId } from "react"; +import { Button } from "@/app/components"; +import { JSON_EDITOR_FONT_CLASSES } from "@/app/lib/assessment/constants"; +import { highlightJson } from "@/app/lib/utils/assessment"; +import type { ValueSetter } from "@/app/lib/types/assessment"; + +interface JsonEditorProps { + value: string; + onChange: ValueSetter; + error?: string | null; + isValid?: boolean; + placeholder?: string; + minHeight?: number; +} + +export default function JsonEditor({ + value, + onChange, + error, + isValid, + placeholder, + minHeight = 400, +}: JsonEditorProps) { + const textareaRef = useRef(null); + const preRef = useRef(null); + const textareaId = useId(); + const errorId = `${textareaId}-error`; + + const syncScroll = useCallback(() => { + if (textareaRef.current && preRef.current) { + preRef.current.scrollTop = textareaRef.current.scrollTop; + preRef.current.scrollLeft = textareaRef.current.scrollLeft; + } + }, []); + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === "Tab") { + e.preventDefault(); + const el = e.currentTarget; + const s = el.selectionStart; + const newVal = + value.substring(0, s) + " " + value.substring(el.selectionEnd); + onChange(newVal); + requestAnimationFrame(() => { + el.selectionStart = el.selectionEnd = s + 2; + }); + return; + } + const pairs: Record = { "{": "}", "[": "]" }; + if (pairs[e.key]) { + const el = e.currentTarget; + const s = el.selectionStart; + if (s === el.selectionEnd) { + e.preventDefault(); + const newVal = + value.substring(0, s) + e.key + pairs[e.key] + value.substring(s); + onChange(newVal); + requestAnimationFrame(() => { + el.selectionStart = el.selectionEnd = s + 1; + }); + } + } + }; + + const borderColor = error + ? "border-status-error/40" + : isValid && value.trim() + ? "border-status-success/35" + : "border-border"; + const minHeightClass = minHeight === 420 ? "min-h-[420px]" : "min-h-[400px]"; + const statusClass = error + ? "bg-status-error-bg text-status-error-text" + : isValid + ? "bg-status-success-bg text-status-success-text" + : ""; + + return ( +
+ {/* Minimal top bar */} +
+
+ + JSON + + {value.trim() && ( + + {error ? "Invalid" : isValid ? "Valid" : ""} + + )} +
+
+ {error && ( + + {error} + + )} + {value.trim() && ( + + )} +
+
+ + {/* Editor */} +
+ {/* Placeholder */} + {!value && placeholder && ( +
+            {placeholder}
+          
+ )} + + {/* Highlighted layer */} +
+
+        {/* Editable layer */}
+