From 030e9c977cb7eb9bf0f87081971075f7c0b695fe Mon Sep 17 00:00:00 2001 From: Ayush8923 <80516839+Ayush8923@users.noreply.github.com> Date: Sun, 3 May 2026 20:03:28 +0530 Subject: [PATCH 01/13] feat(*): evaluation revamp phase:2 --- .../speech-to-text/EvaluationsTab.tsx | 1252 +---------------- .../speech-to-text/RunSTTEvaluationForm.tsx | 139 ++ .../speech-to-text/STTDatasetCard.tsx | 49 + .../speech-to-text/STTResultRow.tsx | 264 ++++ .../speech-to-text/STTResultsTable.tsx | 161 +++ app/components/speech-to-text/STTRunCard.tsx | 74 + app/components/speech-to-text/STTRunsList.tsx | 159 +++ .../speech-to-text/STTScoreInfoTooltip.tsx | 120 ++ .../speech-to-text/STTViewDatasetModal.tsx | 148 ++ .../text-to-speech/CreateTTSDatasetForm.tsx | 174 +++ app/components/text-to-speech/DatasetsTab.tsx | 675 ++------- .../text-to-speech/EvaluationsTab.tsx | 1164 +-------------- .../text-to-speech/RunTTSEvaluationForm.tsx | 144 ++ .../text-to-speech/TTSDatasetCard.tsx | 49 + .../text-to-speech/TTSResultRow.tsx | 222 +++ .../text-to-speech/TTSResultsTable.tsx | 172 +++ app/components/text-to-speech/TTSRunCard.tsx | 77 + app/components/text-to-speech/TTSRunsList.tsx | 160 +++ .../text-to-speech/TTSScoreInfoTooltip.tsx | 81 ++ .../text-to-speech/TTSViewDatasetModal.tsx | 91 ++ app/lib/types/dataset.ts | 7 + app/lib/types/nav.ts | 11 + app/lib/types/speechToText.ts | 6 + app/lib/types/textToSpeech.ts | 6 + 24 files changed, 2478 insertions(+), 2927 deletions(-) create mode 100644 app/components/speech-to-text/RunSTTEvaluationForm.tsx create mode 100644 app/components/speech-to-text/STTDatasetCard.tsx create mode 100644 app/components/speech-to-text/STTResultRow.tsx create mode 100644 app/components/speech-to-text/STTResultsTable.tsx create mode 100644 app/components/speech-to-text/STTRunCard.tsx create mode 100644 app/components/speech-to-text/STTRunsList.tsx create mode 100644 app/components/speech-to-text/STTScoreInfoTooltip.tsx create mode 100644 app/components/speech-to-text/STTViewDatasetModal.tsx create mode 100644 app/components/text-to-speech/CreateTTSDatasetForm.tsx create mode 100644 app/components/text-to-speech/RunTTSEvaluationForm.tsx create mode 100644 app/components/text-to-speech/TTSDatasetCard.tsx create mode 100644 app/components/text-to-speech/TTSResultRow.tsx create mode 100644 app/components/text-to-speech/TTSResultsTable.tsx create mode 100644 app/components/text-to-speech/TTSRunCard.tsx create mode 100644 app/components/text-to-speech/TTSRunsList.tsx create mode 100644 app/components/text-to-speech/TTSScoreInfoTooltip.tsx create mode 100644 app/components/text-to-speech/TTSViewDatasetModal.tsx diff --git a/app/components/speech-to-text/EvaluationsTab.tsx b/app/components/speech-to-text/EvaluationsTab.tsx index 119e955d..d0188d4a 100644 --- a/app/components/speech-to-text/EvaluationsTab.tsx +++ b/app/components/speech-to-text/EvaluationsTab.tsx @@ -1,17 +1,11 @@ "use client"; -import { useState, useEffect } from "react"; -import { colors } from "@/app/lib/colors"; import { Tab, Dataset, STTRun, STTResult } from "@/app/lib/types/speechToText"; import { APIKey } from "@/app/lib/types/credentials"; import { useAuth } from "@/app/lib/context/AuthContext"; import { apiFetch } from "@/app/lib/apiClient"; -import Loader, { LoaderBox } from "@/app/components/Loader"; -import StatusBadge from "@/app/components/StatusBadge"; -import { computeWordDiff } from "./TranscriptionDiffViewer"; -import { getStatusColor } from "@/app/components/utils"; -import AudioPlayerFromUrl from "@/app/components/speech-to-text/AudioPlayerFromUrl"; -import { RefreshIcon } from "@/app/components/icons"; +import STTRunsList from "./STTRunsList"; +import RunSTTEvaluationForm from "./RunSTTEvaluationForm"; export interface EvaluationsTabProps { leftPanelWidth: number; @@ -64,50 +58,12 @@ export default function EvaluationsTab({ loadResults, apiKeys, toast, - // eslint-disable-next-line @typescript-eslint/no-unused-vars - setActiveTab, }: EvaluationsTabProps) { const { isAuthenticated } = useAuth(); - const [statusFilter, setStatusFilter] = useState("all"); - const [expandedTranscriptions, setExpandedTranscriptions] = useState< - Set - >(new Set()); - const [openScoreInfo, setOpenScoreInfo] = useState(null); - const [scoreInfoPos, setScoreInfoPos] = useState({ top: 0, left: 0 }); - const [playingResultId, setPlayingResultId] = useState(null); - const [loadingRunId, setLoadingRunId] = useState(null); - - useEffect(() => { - setLoadingRunId(null); - }, [selectedRunId]); - - useEffect(() => { - if (!openScoreInfo) return; - const handleClick = () => setOpenScoreInfo(null); - const handleScroll = () => setOpenScoreInfo(null); - document.addEventListener("click", handleClick); - window.addEventListener("scroll", handleScroll, true); - return () => { - document.removeEventListener("click", handleClick); - window.removeEventListener("scroll", handleScroll, true); - }; - }, [openScoreInfo]); - - const toggleTranscription = (resultId: number) => { - setExpandedTranscriptions((prev) => { - const newSet = new Set(prev); - if (newSet.has(resultId)) { - newSet.delete(resultId); - } else { - newSet.add(resultId); - } - return newSet; - }); - }; const updateFeedback = async ( resultId: number, - isCorrect: boolean | null, + isCorrect: boolean | null | undefined, comment?: string, ) => { if (!isAuthenticated) return; @@ -126,7 +82,6 @@ export default function EvaluationsTab({ }, ); - // Update local state setResults((prev) => prev.map((r) => r.id === resultId @@ -146,1182 +101,41 @@ export default function EvaluationsTab({ return (
- {/* Left Panel - Evaluation Configuration */} + {/* Left Panel - Evaluation Runs List or Results */} + + + {/* Right Panel - Evaluation Configuration */} {selectedRunId === null && (
-
- {/* Page Title */} -
-

- Run New Evaluation -

-

- Compare transcription quality across STT models -

-
- - {/* Evaluation Name */} -
- - setEvaluationName(e.target.value)} - placeholder="e.g., English Podcast Evaluation v1" - className="w-full px-3 py-2 border rounded-md text-sm" - style={{ - backgroundColor: colors.bg.primary, - borderColor: colors.border, - color: colors.text.primary, - }} - /> -
- - {/* Model Selection */} -
- - -
- - {/* Dataset Selection */} -
- - {isLoadingDatasets ? ( - - ) : datasets.length === 0 ? ( -
-

- No datasets available -

-

- Create a dataset first in the Datasets tab -

-
- ) : ( - - )} -
- - {/* Selected Dataset Info */} - {selectedDataset && ( -
-
- - - -
-
- {selectedDataset.name} -
-
-
- {selectedDataset.dataset_metadata?.sample_count || 0}{" "} - samples -
-
-
-
-
- )} -
- - {/* Run Evaluation Button */} -
- -
+
)} - - {/* Right Panel - Evaluation Runs List or Results */} -
-
-
-
- {selectedRunId !== null ? ( -
- -

- {runs.find((r) => r.id === selectedRunId)?.run_name} -

-
- ) : ( -

- Evaluation Runs -

- )} -
- {selectedRunId === null && ( -
- - -
- )} -
- -
- {selectedRunId !== null ? ( - // Results View - isLoadingResults ? ( -
- -
- ) : results.length === 0 ? ( -
-

- No results found -

-

- This evaluation has no results yet -

-
- ) : ( - - - - - - - - - - - - {results.map((result) => ( - - - - - -
- Sample - -
-
Ground Truth vs Transcription
-
- - - - Deletion - - - - - - Insertion - - - - - - Substitution - - -
-
-
- - Score - { - e.stopPropagation(); - const rect = - e.currentTarget.getBoundingClientRect(); - setScoreInfoPos({ - top: rect.bottom + 4, - left: rect.left, - }); - setOpenScoreInfo( - openScoreInfo ? null : "accuracy", - ); - }} - > - i - - {openScoreInfo && - (() => { - const metrics = [ - { - key: "accuracy", - title: - "Accuracy (Word Information Preserved)", - desc: "Measures how much of the original information was correctly captured.", - formula: "WIP = (C / N) × (C / H)", - formulaDesc: - "C = correct words\nN = total words in reference\nH = total words in hypothesis", - example: `Reference: "the cat sat on the mat" (N=6)\nHypothesis: "a cat sit on mat" (H=5)\nC = 3 (cat, on, mat)\n\nWIP = (3/6) × (3/5)\n = 0.5 × 0.6 = 0.30 = 30%`, - direction: "Higher is better.", - directionColor: colors.status.success, - }, - { - key: "wer", - title: "WER (Word Error Rate)", - desc: "The most widely used metric in STT evaluation.", - formula: "WER = (S + D + I) / N", - formulaDesc: - "S = substitutions, D = deletions\nI = insertions, N = total words in reference", - example: `Reference: "the cat sat on the mat" (N=6)\nHypothesis: "a cat sit on mat"\n\nthe → a (Substitution)\ncat → cat (Correct)\nsat → sit (Substitution)\non → on (Correct)\nthe → ∅ (Deletion)\nmat → mat (Correct)\n\nS=2, D=1, I=0\nWER = (2+1+0) / 6 = 0.50 = 50%`, - direction: "Lower is better.", - directionColor: colors.status.error, - }, - { - key: "cer", - title: "CER (Character Error Rate)", - desc: "Same concept as WER but at the character level — more granular, catches partial word errors.", - formula: "CER = (S + D + I) / N", - formulaDesc: - "S, D, I = character-level errors\nN = total characters in reference", - example: `Reference: "the cat sat" (N=11 chars)\nHypothesis: "the bat set"\n\nt → t (Correct)\nh → h (Correct)\ne → e (Correct)\n· → · (Correct)\nc → b (Substitution)\na → a (Correct)\nt → t (Correct)\n· → · (Correct)\ns → s (Correct)\na → e (Substitution)\nt → t (Correct)\n\nS=2, D=0, I=0\nCER = 2/11 = 0.18 = 18%`, - direction: "Lower is better.", - directionColor: colors.status.error, - }, - { - key: "lenient_wer", - title: "Lenient WER", - desc: "Same as WER but ignores differences in casing and punctuation — useful when exact formatting doesn't matter.", - formula: "Same as WER after normalizing text", - formulaDesc: - "Normalization: lowercase + remove punctuation", - example: `Reference: "Hello, World!"\nHypothesis: "hello world"\n\nAfter normalization:\n"hello world" vs "hello world"\n→ exact match\n\nLenient WER = 0%\n(strict WER would be higher)`, - direction: "Lower is better.", - directionColor: colors.status.error, - }, - ]; - const currentIdx = metrics.findIndex( - (m) => m.key === openScoreInfo, - ); - const current = - metrics[currentIdx >= 0 ? currentIdx : 0]; - return ( -
e.stopPropagation()} - > - {/* Tab navigation */} -
- {metrics.map((m, _idx) => ( - - ))} -
- {/* Content */} -
-
- {current.title} -
-

- {current.desc} -

-
- Formula -
-
- {current.formula} - {"\n"} - - {current.formulaDesc} - -
-
- Example -
-
- {current.example} -
-
- {current.direction} -
-
-
- ); - })()} -
-
- Is Correct - - Comment -
- {result.signedUrl ? ( - - setPlayingResultId( - playingResultId === result.id - ? null - : result.id, - ) - } - /> - ) : ( -
- {result.sampleName || "-"} -
- )} -
- {(() => { - const hasBoth = - result.groundTruth && result.transcription; - const segments = hasBoth - ? computeWordDiff( - result.groundTruth, - result.transcription, - ) - : []; - const isExpanded = expandedTranscriptions.has( - result.id, - ); - return ( -
-
- {/* Left Panel - Ground Truth */} -
-
- Ground Truth -
-
- {hasBoth ? ( - segments.map((seg, idx) => { - if (seg.type === "insertion") - return null; - const word = seg.reference || ""; - return ( - - - {seg.type === "deletion" && - "- "} - {word} - {" "} - - ); - }) - ) : ( - - {result.groundTruth || "-"} - - )} -
-
- {/* Right Panel - Transcription */} -
-
- Transcription -
-
- {hasBoth ? ( - segments.map((seg, idx) => { - if (seg.type === "deletion") { - return ( - - - ___ - {" "} - - ); - } - const word = - seg.hypothesis || - seg.reference || - ""; - return ( - - - {seg.type === "insertion" && - "+ "} - {word} - {" "} - - ); - }) - ) : ( - - {result.transcription || "-"} - - )} -
-
-
- {hasBoth && - (result.groundTruth!.length > 100 || - result.transcription!.length > 100) && ( - - )} -
- ); - })()} -
- {result.score ? ( -
-
- - Accuracy - - = 0.9 - ? colors.status.success - : result.score.wip >= 0.7 - ? "#ca8a04" - : colors.status.error, - }} - > - {(result.score.wip * 100).toFixed(1)}% - -
-
-
- Errors -
-
- {[ - { label: "WER", value: result.score.wer }, - { label: "CER", value: result.score.cer }, - { - label: "Lenient WER", - value: result.score.lenient_wer, - }, - ].map(({ label, value }) => ( -
- - {label} - - = 0.8 - ? colors.status.error - : value >= 0.4 - ? "#ca8a04" - : colors.status.success, - }} - > - {(value * 100).toFixed(1)}% - -
- ))} -
-
-
- ) : ( - - - - - )} -
- - -
-