diff --git a/web/apps/web/app/leaderboard/client.tsx b/web/apps/web/app/leaderboard/client.tsx index 6a8c98d..5f42c2d 100644 --- a/web/apps/web/app/leaderboard/client.tsx +++ b/web/apps/web/app/leaderboard/client.tsx @@ -24,6 +24,7 @@ type DefinitionAuthorDetail = { totalComparisons: number totalWorkloads: number coverage: Record + speedupSums: Record solutionNamesByAuthor: Record } @@ -36,6 +37,14 @@ type DefinitionDetail = { solutionNames: string[] } +type OverallAuthorRanking = { + author: string + totalSpeedup: number + comparisons: number + coverage?: CoverageStats + correctness?: CorrectnessSummary +} + const DEFAULT_PIN = 0.95 const DEFAULT_VISIBLE = 5 const LIST_MAX_HEIGHT = 288 // 18rem @@ -86,7 +95,7 @@ export function LeaderboardClient({ }: LeaderboardClientProps) { const [pinnedP, setPinnedP] = useState(initialPinnedP) const [isListExpanded, setIsListExpanded] = useState(false) - const [activeTab, setActiveTab] = useState<"fast" | "correctness">("fast") + const [activeTab, setActiveTab] = useState<"fast" | "rankings">("fast") const [selectedAuthor, setSelectedAuthor] = useState(null) const [isDrawerOpen, setIsDrawerOpen] = useState(false) const [highlightedAuthor, setHighlightedAuthor] = useState(null) @@ -292,7 +301,43 @@ export function LeaderboardClient({ openAuthorDetail(highlightedAuthor) }, [highlightedAuthor, openAuthorDetail]) - const pinnedLabel = pinnedTarget.toFixed(2) + const correctnessByAuthor = useMemo(() => { + const map: Record = {} + for (const entry of correctness.stats) { + map[entry.author] = { + total: entry.total, + passed: entry.passed, + incorrect: entry.incorrect, + runtime_error: entry.runtime_error, + other: entry.other, + } + } + return map + }, [correctness.stats]) + + const overallRanking = useMemo(() => { + const entries: OverallAuthorRanking[] = [] + const speedupEntries = Object.entries(fast.speedupSums ?? {}) + for (const [author, totalSpeedup] of speedupEntries) { + if (excludedSet.has(author)) continue + const comparisons = fast.comparisonCounts[author] ?? 0 + if (comparisons <= 0) continue + entries.push({ + author, + totalSpeedup, + comparisons, + coverage: fast.coverage?.[author], + correctness: correctnessByAuthor[author], + }) + } + return entries.sort((a, b) => { + if (b.totalSpeedup !== a.totalSpeedup) return b.totalSpeedup - a.totalSpeedup + if (b.comparisons !== a.comparisons) return b.comparisons - a.comparisons + return a.author.localeCompare(b.author) + }) + }, [correctnessByAuthor, excludedSet, fast.comparisonCounts, fast.coverage, fast.speedupSums]) + + const maxTotalSpeedup = overallRanking.length > 0 ? overallRanking[0].totalSpeedup : 0 const correctnessRanking = useMemo(() => { return correctness.stats @@ -313,20 +358,6 @@ export function LeaderboardClient({ const maxPassRate = correctnessRanking.length > 0 ? correctnessRanking[0].passRate : 0 - const correctnessByAuthor = useMemo(() => { - const map: Record = {} - for (const entry of correctness.stats) { - map[entry.author] = { - total: entry.total, - passed: entry.passed, - incorrect: entry.incorrect, - runtime_error: entry.runtime_error, - other: entry.other, - } - } - return map - }, [correctness.stats]) - return (
@@ -337,14 +368,14 @@ export function LeaderboardClient({

- setActiveTab(value as "fast" | "correctness")} + setActiveTab(value as "fast" | "rankings")} className="space-y-6" > - Correctness + Author rankings @@ -497,10 +528,30 @@ export function LeaderboardClient({ )} + + - -
+ + + +
+
+

Correctness author rankings

+

Pass rate across all workloads for each author.

+
{correctnessRanking.length === 0 ? (

No correctness data available.

) : ( @@ -520,10 +571,7 @@ export function LeaderboardClient({
-
+
) @@ -560,6 +608,60 @@ type AuthorDetailDrawerProps = { onOpenChange: (open: boolean) => void } +type OverallRankingCardProps = { + rankings: OverallAuthorRanking[] + maxTotalSpeedup: number + formatCoverage: (stats?: CoverageStats | null) => { percentText: string } | null + title: string + description: string +} + +function OverallRankingCard({ rankings, maxTotalSpeedup, formatCoverage, title, description }: OverallRankingCardProps) { + const hasRankings = rankings.length > 0 + return ( +
+
+

{title}

+

{description}

+
+ {!hasRankings ? ( +

No speedup data available.

+ ) : ( +
+ {rankings.map((entry, index) => { + const widthPercent = maxTotalSpeedup > 0 ? Math.max(0, (entry.totalSpeedup / maxTotalSpeedup) * 100) : 0 + const width = `${Math.min(widthPercent, 100)}%` + const totalSpeedupText = entry.totalSpeedup.toFixed(2) + const passRatePercent = entry.correctness && entry.correctness.total > 0 + ? ((entry.correctness.passed / entry.correctness.total) * 100).toFixed(1) + : null + const coverageInfo = formatCoverage(entry.coverage) + return ( +
+
+
+ {index + 1}. + {entry.author} +
+
Area under Fast-p curve {totalSpeedupText}
+
+
+
+
+
+ {entry.comparisons} comparisons + {coverageInfo && {coverageInfo.percentText}% evaluated} + {passRatePercent && {passRatePercent}% pass} +
+
+ ) + })} +
+ )} +
+ ) +} + function AuthorDetailDrawer({ open, author, definitions, pinnedTarget, onOpenChange }: AuthorDetailDrawerProps) { return ( diff --git a/web/apps/web/app/leaderboard/section.tsx b/web/apps/web/app/leaderboard/section.tsx index c53aca1..fac411b 100644 --- a/web/apps/web/app/leaderboard/section.tsx +++ b/web/apps/web/app/leaderboard/section.tsx @@ -29,6 +29,7 @@ type DefinitionAuthorDetail = { totalComparisons: number totalWorkloads: number coverage: Record + speedupSums: Record solutionNamesByAuthor: Record } @@ -63,7 +64,7 @@ export function LeaderboardSection({ entries, baselineLabel, initialPinnedP }: L }) const definitionAuthorDetails: DefinitionAuthorDetail[] = filteredEntries.map((entry) => { - const { curves, comparisonCounts, totalComparisons, totalWorkloads, coverage } = computeFastPCurvesForAuthors({ + const { curves, comparisonCounts, totalComparisons, totalWorkloads, coverage, speedupSums } = computeFastPCurvesForAuthors({ datasets: [ { solutions: entry.solutions, @@ -89,6 +90,7 @@ export function LeaderboardSection({ entries, baselineLabel, initialPinnedP }: L totalComparisons, totalWorkloads, coverage, + speedupSums, solutionNamesByAuthor, } }) diff --git a/web/apps/web/lib/analytics.ts b/web/apps/web/lib/analytics.ts index 767497d..a82c662 100644 --- a/web/apps/web/lib/analytics.ts +++ b/web/apps/web/lib/analytics.ts @@ -24,6 +24,7 @@ export type AuthorCurvesResponse = { totalComparisons: number totalWorkloads: number coverage: Record + speedupSums: Record } export type AuthorCorrectnessResponse = { @@ -294,6 +295,7 @@ export function computeFastPCurvesForAuthors(params: { const curves: Record = {} const comparisonCounts: Record = {} + const speedupSums: Record = {} let totalComparisons = 0 for (const [author, ratios] of authorRatios.entries()) { @@ -315,6 +317,17 @@ export function computeFastPCurvesForAuthors(params: { points.push({ p, percent }) } curves[author] = points + + let area = 0 + for (let index = 1; index < points.length; index++) { + const prev = points[index - 1] + const current = points[index] + const width = current.p - prev.p + if (width <= 0) continue + const height = (prev.percent + current.percent) / 200 + area += width * height + } + speedupSums[author] = area } const coverage: Record = {} @@ -326,7 +339,7 @@ export function computeFastPCurvesForAuthors(params: { coverage[author] = { attempted, total, percent } } - return { curves, comparisonCounts, totalComparisons, totalWorkloads, coverage } + return { curves, comparisonCounts, totalComparisons, totalWorkloads, coverage, speedupSums } } export function computeAuthorCorrectnessSummary(params: {