diff --git a/src/components/CompareDeferralChart.tsx b/src/components/CompareDeferralChart.tsx new file mode 100644 index 0000000..0785aee --- /dev/null +++ b/src/components/CompareDeferralChart.tsx @@ -0,0 +1,222 @@ +import React, { useMemo, useCallback } from 'react'; +import { + ScatterChart, + Scatter, + CartesianGrid, + XAxis, + YAxis, + Tooltip, + ResponsiveContainer, + LabelList, +} from 'recharts'; +import { CompareMetric } from '../data/routerData'; + +interface CompareDeferralChartProps { + selectedPoints: ScatterPoint[]; + backgroundPoints: ScatterPoint[]; + metric: CompareMetric; + contextLabel?: string; + height?: number; +} + +type ScatterPoint = { + routerId: string; + routerName: string; + metricValue: number; + costPer1k: number; + color: string; +}; + +const formatCurrency = (value: number) => `$${value.toFixed(value >= 10 ? 0 : 2)}`; + +const logTicks = (min: number, max: number) => { + const ticks: number[] = []; + const minLog = Math.floor(Math.log10(min)); + const maxLog = Math.ceil(Math.log10(max)); + + for (let i = minLog; i <= maxLog; i++) { + const tickValue = Math.pow(10, i); + if (tickValue >= min && tickValue <= max) { + ticks.push(Number(tickValue.toFixed(3))); + } + } + + return ticks; +}; + +const metricLabels: Record = { + accuracy: 'Accuracy (%)', + robustness: 'Robustness', + cost: 'Cost Score', +}; + +const CompareDeferralChart: React.FC = ({ + selectedPoints, + backgroundPoints, + metric, + contextLabel, + height = 320, +}) => { + const combinedPoints = useMemo( + () => [...backgroundPoints, ...selectedPoints], + [backgroundPoints, selectedPoints] + ); + + const costDomain = useMemo(() => { + if (!combinedPoints.length) return { min: 0.01, max: 100 }; + const minValue = Math.min(...combinedPoints.map(point => point.costPer1k)); + const maxValue = Math.max(...combinedPoints.map(point => point.costPer1k)); + return { + min: Math.max(0.01, minValue * 0.7), + max: maxValue * 1.3, + }; + }, [combinedPoints]); + + const metricDomain = useMemo(() => { + if (!combinedPoints.length) return { min: 20, max: 100 }; + const minValue = Math.min(...combinedPoints.map(point => point.metricValue)); + const maxValue = Math.max(...combinedPoints.map(point => point.metricValue)); + const padding = Math.max(4, (maxValue - minValue) * 0.08); + return { + min: Math.max(0, minValue - padding), + max: Math.min(100, maxValue + padding), + }; + }, [combinedPoints]); + + const costTicks = logTicks(costDomain.min, costDomain.max); + const metricLabel = metricLabels[metric]; + const formatMetricValue = useCallback( + (value: number) => (metric === 'accuracy' ? `${value.toFixed(1)}%` : value.toFixed(1)), + [metric] + ); + const renderTooltip = useCallback( + (props: any) => { + const { active, payload } = props; + if (!active || !payload?.length) return null; + const point = payload[0].payload as ScatterPoint; + return ( +
+

{point.routerName}

+

+ {metricLabel}: {formatMetricValue(point.metricValue)} +

+

+ Cost per 1k: {formatCurrency(point.costPer1k)} +

+
+ ); + }, + [metricLabel, formatMetricValue] + ); + + if (!selectedPoints.length && !backgroundPoints.length) { + return ( +
+

Select routers to plot on the deferral curve.

+
+ ); + } + + return ( +
+
+
+

Efficiency view

+

Deferral curve

+ {contextLabel &&

{contextLabel}

} +
+
+ + + + + + + + {backgroundPoints.length > 0 && ( + + )} + {selectedPoints.map(point => ( + + { + const rawX = props.x; + const rawY = props.y; + const x = + typeof rawX === 'number' ? rawX : typeof rawX === 'string' ? Number(rawX) : NaN; + const y = + typeof rawY === 'number' ? rawY : typeof rawY === 'string' ? Number(rawY) : NaN; + if (!Number.isFinite(x) || !Number.isFinite(y)) return null; + return ( + + {point.routerName} + + ); + }} + /> + + ))} + + +
+ ); +}; + +export default CompareDeferralChart; diff --git a/src/components/CompareModal.tsx b/src/components/CompareModal.tsx new file mode 100644 index 0000000..e2d7ff7 --- /dev/null +++ b/src/components/CompareModal.tsx @@ -0,0 +1,807 @@ +import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'; +import { ChevronDown } from 'lucide-react'; +import { + Radar, + RadarChart, + PolarGrid, + PolarAngleAxis, + PolarRadiusAxis, + ResponsiveContainer, + BarChart, + Bar, + XAxis, + YAxis, + CartesianGrid, + Tooltip, + Legend, + LabelList, +} from 'recharts'; +import { + routerCategoryScores, + routerIdToName, + compareRouterOptions, + compareRouterNames, + compareMetrics, + compareDifficulties, + CompareMetric, + DifficultyLevel, + routerMetricsById, + computeCostScore, +} from '../data/routerData'; +import DifficultyBarPanel from './DifficultyBarPanel'; +import CompareDeferralChart from './CompareDeferralChart'; + +type DeferralPoint = { + routerId: string; + routerName: string; + metricValue: number; + costPer1k: number; + color: string; +}; + +type ScopeSelectValue = { type: 'overall' } | { type: 'category'; category: string }; + +const METRIC_LABELS: Record = { + accuracy: 'Accuracy', + robustness: 'Robustness', + cost: 'Cost', +}; + +const MAX_BACKGROUND_DEFERRAL_POINTS = 30; +const BACKGROUND_POINT_COLOR = '#c7cdd8'; +const OVERALL_SCOPE_VALUE = 'overall'; + +const buildCategoryScopeValue = (category: string) => `category|${encodeURIComponent(category)}`; +const PRIORITY_AXIS_LABEL = 'Computer science, information, and general works'; + +const parseScopeSelectValue = (value: string): ScopeSelectValue => { + if (value === OVERALL_SCOPE_VALUE) { + return { type: 'overall' }; + } + + if (value.startsWith('category|')) { + const [, encodedCategory = ''] = value.split('|'); + return { + type: 'category', + category: decodeURIComponent(encodedCategory), + }; + } + + return { type: 'overall' }; +}; + +interface CompareModalProps { + routerIds: string[]; + onClose: () => void; + onAdd: (routerId: string) => void; + onRemove: (routerId: string) => void; + maxSelected: number; +} + +const CompareModal: React.FC = ({ + routerIds, + onClose, + onAdd, + onRemove, + maxSelected, +}) => { + const [activeMetric, setActiveMetric] = useState('accuracy'); + const [activeDifficulty, setActiveDifficulty] = useState('all'); + const [activeCategory, setActiveCategory] = useState(null); + const [searchTerm, setSearchTerm] = useState(''); + const [activeBarAxis, setActiveBarAxis] = useState(''); + const [activeChartView, setActiveChartView] = useState<'spider' | 'bars' | 'deferral'>('spider'); + const [isScopeDropdownOpen, setIsScopeDropdownOpen] = useState(false); + const [isCompactLayout, setIsCompactLayout] = useState(false); + const scopeDropdownRef = useRef(null); + + const filteredRouterOptions = useMemo(() => { + const lowerTerm = searchTerm.toLowerCase(); + return compareRouterOptions + .filter(option => !routerIds.includes(option.id)) + .filter(option => option.name.toLowerCase().includes(lowerTerm) || option.id.includes(lowerTerm)) + .slice(0, 6); + }, [searchTerm, routerIds]); + + const spiderChartHeight = isCompactLayout ? 420 : 520; + const spiderOuterRadius = isCompactLayout ? '65%' : '80%'; + const spiderChartMargin = isCompactLayout + ? { top: 32, right: 32, bottom: 32, left: 32 } + : { top: 16, right: 48, bottom: 16, left: 48 }; + const secondaryChartHeight = isCompactLayout ? 260 : 320; + const deferralChartHeight = isCompactLayout ? 280 : 320; + const isDeferralDisabled = activeMetric === 'cost'; + + const canSelectMore = routerIds.length < maxSelected; + + useEffect(() => { + const updateLayout = () => { + if (typeof window === 'undefined') return; + setIsCompactLayout(window.innerWidth <= 768); + }; + + updateLayout(); + window.addEventListener('resize', updateLayout); + return () => window.removeEventListener('resize', updateLayout); + }, []); + +useEffect(() => { + if (isDeferralDisabled && activeChartView === 'deferral') { + setActiveChartView('spider'); + } +}, [isDeferralDisabled, activeChartView]); + + const scopeOptions = useMemo(() => { + const categoriesMap = new Map>(); + + routerIds.forEach(id => { + const routerData = routerCategoryScores[id]; + if (!routerData) return; + + Object.entries(routerData.categories).forEach(([categoryName, categoryData]) => { + if (!categoriesMap.has(categoryName)) { + categoriesMap.set(categoryName, new Set()); + } + + const subcategorySet = categoriesMap.get(categoryName)!; + const subcategories = categoryData.subcategories ? Object.keys(categoryData.subcategories) : []; + subcategories.forEach(subcategory => subcategorySet.add(subcategory)); + }); + }); + + return Array.from(categoriesMap.entries()) + .map(([category, subSet]) => ({ + category, + subcategories: Array.from(subSet).sort((a, b) => a.localeCompare(b)), + })) + .sort((a, b) => a.category.localeCompare(b.category)); + }, [routerIds]); + + const currentAxes = useMemo(() => { + const axes = new Set(); + routerIds.forEach(id => { + const data = routerCategoryScores[id]; + if (!data) return; + + if (activeCategory) { + const subs = data.categories[activeCategory]?.subcategories; + if (subs) Object.keys(subs).forEach(label => axes.add(label)); + } else { + Object.keys(data.categories).forEach(label => axes.add(label)); + } + }); + const axisList = Array.from(axes); + axisList.sort((a, b) => { + const aIsPriority = a === PRIORITY_AXIS_LABEL; + const bIsPriority = b === PRIORITY_AXIS_LABEL; + if (aIsPriority && bIsPriority) return 0; + if (aIsPriority) return -1; + if (bIsPriority) return 1; + return a.localeCompare(b); + }); + return axisList; + }, [routerIds, activeCategory]); + + const canShowSpider = currentAxes.length >= 3; + + const getMetricValue = useCallback( + (routerId: string, axisLabel: string): number => { + const data = routerCategoryScores[routerId]; + if (!data) return 0; + + if (activeCategory) { + const subs = data.categories[activeCategory]?.subcategories; + const metrics = subs?.[axisLabel]?.metrics?.[activeDifficulty]; + return metrics?.[activeMetric] ?? 0; + } + + const metrics = data.categories[axisLabel]?.metrics?.[activeDifficulty]; + return metrics?.[activeMetric] ?? 0; + }, + [activeCategory, activeDifficulty, activeMetric] + ); + + const chartData = useMemo(() => { + if (!currentAxes.length) return []; + + return currentAxes.map(axisLabel => { + const entry: Record = { axis: axisLabel, fullMark: 100 }; + routerIds.forEach(id => { + entry[id] = getMetricValue(id, axisLabel); + }); + return entry; + }); + }, [currentAxes, routerIds, getMetricValue]); + + const routerAxisAverages = useMemo(() => { + const axisAverages: Record = {}; + chartData.forEach(row => { + const axis = row.axis as string; + const values = routerIds + .map(id => Number(row[id])) + .filter(value => Number.isFinite(value)) as number[]; + if (values.length) { + axisAverages[axis] = values.reduce((sum, value) => sum + value, 0) / values.length; + } + }); + return axisAverages; + }, [chartData, routerIds]); + + const spiderValueDomain = useMemo<[number, number]>(() => { + if (activeMetric !== 'cost') return [0, 100]; + if (!Object.keys(routerAxisAverages).length) return [0, 100]; + + let min = Infinity; + let max = -Infinity; + + Object.values(routerAxisAverages).forEach(value => { + min = Math.min(min, value); + max = Math.max(max, value); + }); + + if (!Number.isFinite(min) || !Number.isFinite(max)) { + return [0, 100]; + } + + if (min === max) { + const padding = Math.max(Math.abs(min) * 0.3, 0.00005); + const domainMin = Math.max(0, min - padding); + const domainMax = min + padding; + return [domainMin, Math.max(domainMin + padding * 0.5, domainMax)]; + } + + const range = max - min; + const padding = Math.max(range * 0.2, max * 0.1, 0.00005); + const domainMin = Math.max(0, min - padding); + const domainMax = max + padding; + return [domainMin, Math.max(domainMin + padding * 0.5, domainMax)]; + }, [activeMetric, routerAxisAverages]); + + const spiderChartDomain: [number, number] = + activeMetric === 'cost' ? spiderValueDomain : [0, 100]; + + useEffect(() => { + if (activeBarAxis && !currentAxes.includes(activeBarAxis)) { + setActiveBarAxis(''); + } + }, [currentAxes, activeBarAxis]); + + useEffect(() => { + if (!activeCategory) return; + + const categoryOption = scopeOptions.find(option => option.category === activeCategory); + if (!categoryOption) { + setActiveCategory(null); + setActiveBarAxis(''); + return; + } + + if (activeBarAxis && !categoryOption.subcategories.includes(activeBarAxis)) { + setActiveBarAxis(''); + } + }, [scopeOptions, activeCategory, activeBarAxis]); + + const scopeSelectValue = useMemo(() => { + if (!activeCategory) { + return OVERALL_SCOPE_VALUE; + } + + return buildCategoryScopeValue(activeCategory); + }, [activeCategory]); + + const scopeDisplayLabel = useMemo(() => (activeCategory ? activeCategory : 'All categories'), [activeCategory]); + + const applyScopeValue = useCallback((value: string) => { + const nextScope = parseScopeSelectValue(value); + + if (nextScope.type === 'overall') { + setActiveCategory(null); + setActiveBarAxis(''); + return; + } + + setActiveCategory(nextScope.category); + setActiveBarAxis(''); + }, []); + + const handleScopeOptionClick = useCallback( + (value: string) => { + applyScopeValue(value); + setIsScopeDropdownOpen(false); + }, + [applyScopeValue] + ); + + const difficultyBarData = useMemo(() => { + if (!routerIds.length) return []; + + const difficultiesToShow = + activeDifficulty === 'all' ? compareDifficulties : [activeDifficulty]; + + return difficultiesToShow.map(difficulty => { + + const entry: Record = { + difficulty: difficulty.charAt(0).toUpperCase() + difficulty.slice(1), + }; + + routerIds.forEach(id => { + const routerData = routerCategoryScores[id]; + if (!routerData) { + entry[id] = 0; + return; + } + + if (!activeBarAxis) { + if (activeCategory) { + const categoryMetric = + routerData.categories[activeCategory]?.metrics?.[difficulty]?.[activeMetric]; + entry[id] = typeof categoryMetric === 'number' ? categoryMetric : 0; + return; + } + + const routerLevelMetric = routerData.metrics?.[difficulty]?.[activeMetric]; + if (typeof routerLevelMetric === 'number') { + entry[id] = routerLevelMetric; + return; + } + + // fallback: global average across categories + const categories = Object.values(routerData.categories); + if (!categories.length) { + entry[id] = 0; + return; + } + + const average = + categories.reduce((sum, category) => { + const metricValue = category.metrics?.[difficulty]?.[activeMetric] ?? 0; + return sum + metricValue; + }, 0) / categories.length; + + entry[id] = average; + return; + } + + + const metricSource = activeCategory + ? routerData.categories[activeCategory]?.subcategories?.[activeBarAxis]?.metrics?.[difficulty] + : routerData.categories[activeBarAxis]?.metrics?.[difficulty]; + + entry[id] = metricSource?.[activeMetric] ?? 0; + }); + + return entry; + }); + }, [routerIds, activeBarAxis, activeMetric, activeCategory, activeDifficulty]); + + const barContextLabel = useMemo(() => { + const metricLabel = METRIC_LABELS[activeMetric]; + + if (activeCategory) { + return `${activeCategory} · ${metricLabel}`; + } + + if (activeBarAxis) { + return `${activeBarAxis} · ${metricLabel}`; + } + + return `${metricLabel} · All categories`; + }, [activeBarAxis, activeCategory, activeMetric]); + + + const selectedRouterNames = useMemo(() => { + return routerIds.reduce>((acc, id) => { + acc[id] = routerIdToName[id] ?? id; + return acc; + }, {}); + }, [routerIds]); + + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if (scopeDropdownRef.current && !scopeDropdownRef.current.contains(event.target as Node)) { + setIsScopeDropdownOpen(false); + } + }; + document.addEventListener('mousedown', handleClickOutside); + return () => document.removeEventListener('mousedown', handleClickOutside); + }, []); + + useEffect(() => { + const handleKeyDown = (event: KeyboardEvent) => { + if (event.key === 'Escape') { + setIsScopeDropdownOpen(false); + } + }; + document.addEventListener('keydown', handleKeyDown); + return () => document.removeEventListener('keydown', handleKeyDown); + }, []); + + const getDeferralMetricValue = useCallback( + (routerId: string): number | null => { + const routerData = routerCategoryScores[routerId]; + if (!routerData) return null; + + if (activeCategory) { + const category = routerData.categories[activeCategory]; + const metrics = category?.metrics?.[activeDifficulty]; + return metrics?.[activeMetric] ?? null; + } + + if (activeMetric !== 'cost') { + const routerLevelMetric = routerData.metrics?.[activeDifficulty]?.[activeMetric]; + if (typeof routerLevelMetric === 'number') { + return routerLevelMetric; + } + } + + const categories = Object.values(routerData.categories); + if (!categories.length) return null; + + if (activeMetric === 'cost') { + const routerMetrics = routerMetricsById[routerId]; + return routerMetrics ? computeCostScore(routerMetrics.costPer1k) : null; + } + + const values = categories + .map(category => category.metrics?.[activeDifficulty]?.[activeMetric]) + .filter((v): v is number => typeof v === 'number'); + + if (!values.length) return null; + + return values.reduce((a, b) => a + b, 0) / values.length; + + }, + [activeCategory, activeDifficulty, activeMetric] + ); + + const deferralPoints = useMemo(() => { + return routerIds + .map((routerId, index) => { + const routerMetrics = routerMetricsById[routerId]; + const metricValue = getDeferralMetricValue(routerId); + if (!routerMetrics || metricValue === null) return null; + + return { + routerId, + routerName: routerIdToName[routerId] ?? routerId, + metricValue, + costPer1k: Math.max(routerMetrics.costPer1k, 0.001), + color: ROUTER_COLORS[index % ROUTER_COLORS.length], + }; + }) + .filter((point): point is DeferralPoint => Boolean(point)); + }, [routerIds, getDeferralMetricValue]); + + const backgroundDeferralPoints = useMemo(() => { + const remainingIds = compareRouterNames.filter(id => !routerIds.includes(id)); + return remainingIds + .map(routerId => { + const routerMetrics = routerMetricsById[routerId]; + const metricValue = getDeferralMetricValue(routerId); + if (!routerMetrics || metricValue === null) return null; + + return { + routerId, + routerName: routerIdToName[routerId] ?? routerId, + metricValue, + costPer1k: Math.max(routerMetrics.costPer1k, 0.001), + color: BACKGROUND_POINT_COLOR, + }; + }) + .filter((point): point is DeferralPoint => Boolean(point)) + .slice(0, MAX_BACKGROUND_DEFERRAL_POINTS); + }, [routerIds, getDeferralMetricValue]); + + const deferralContextLabel = useMemo(() => { + const metricLabel = METRIC_LABELS[activeMetric]; + if (activeCategory) { + return `${activeCategory} · ${metricLabel} · ${activeDifficulty}`; + } + return `${metricLabel} · Overall`; + }, [activeCategory, activeMetric, activeDifficulty]); + + const handleAxisClick = (axis: string) => { + if (activeCategory) return; + + setActiveBarAxis(prev => (prev === axis ? '' : axis)); + const hasSubs = routerIds.some(id => routerCategoryScores[id]?.categories[axis]?.subcategories); + if (hasSubs) setActiveCategory(axis); + }; + + return ( +
+
event.stopPropagation()}> +
+
+

Compare Routers

+

+ {activeCategory ? `${activeCategory} Category` : 'All Categories'} · {activeMetric} ·{' '} + {activeDifficulty} +

+
+ + +
+ +
+
+
+ +
+ setSearchTerm(event.target.value)} + /> +
+
+ {canSelectMore && + filteredRouterOptions.map(option => ( + + ))} + {canSelectMore && !filteredRouterOptions.length && searchTerm && ( +

No routers match “{searchTerm}”.

+ )} +
+
+ +
+ +
+ {routerIds.map((id, index) => ( + + ))} + {!routerIds.length &&

No routers selected

} +
+
+ +
+ +
+ {compareMetrics.map(metric => ( + + ))} +
+
+ +
+ +
+ {compareDifficulties.map(level => ( + + ))} +
+
+ + +
+ +
+
+
+
+ {(['spider', 'bars', 'deferral'] as const).map(view => { + const isDeferralView = view === 'deferral'; + const disabled = isDeferralView && activeMetric === 'cost'; + const title = disabled ? 'Deferral curve is unavailable for cost metric' : undefined; + + return ( + + ); + })} +
+
+ +
+ {activeCategory && ( + + )} + +
+ + {isScopeDropdownOpen && ( +
+ + {scopeOptions.map(option => { + const value = buildCategoryScopeValue(option.category); + return ( + + ); + })} +
+ )} +
+
+ +
+ + + + + {activeChartView === 'spider' && ( +
+ {routerIds.length && chartData.length ? ( + canShowSpider ? ( + + + + ( + handleAxisClick(payload.value)} + > + {payload.value} + + )} + /> + + {routerIds.map((id, index) => ( + + ))} + + + ) : ( + { + const transformed: Record = { + difficulty: row.axis as string, + }; + routerIds.forEach(id => { + const value = Number(row[id]); + transformed[id] = Number.isFinite(value) ? value : 0; + }); + return transformed; + })} + routerIds={routerIds} + routerNames={selectedRouterNames} + colors={ROUTER_COLORS} + contextLabel={`Grouped view · ${activeCategory ?? 'All categories'} · ${METRIC_LABELS[activeMetric]}`} + titleText="Grouped bar comparison" + height={320} + metricKey={activeMetric} + /> + ) + ) : ( +
+

Select routers to compare

+
+ )} +
+ )} + + {activeChartView === 'bars' && ( + + )} + + {activeChartView === 'deferral' && activeMetric !== 'cost' && ( + + )} +
+
+
+
+ ); +}; + +const ROUTER_COLORS = ['#2563eb', '#f97316', '#10b981']; + +export default CompareModal; diff --git a/src/components/DifficultyBarPanel.tsx b/src/components/DifficultyBarPanel.tsx new file mode 100644 index 0000000..305ca6b --- /dev/null +++ b/src/components/DifficultyBarPanel.tsx @@ -0,0 +1,114 @@ +import React from 'react'; +import { + BarChart, + Bar, + XAxis, + YAxis, + CartesianGrid, + Tooltip, + Legend, + ResponsiveContainer, + LabelList, +} from 'recharts'; + +interface DifficultyBarPanelProps { + data: Array>; + routerIds: string[]; + routerNames: Record; + colors: string[]; + contextLabel?: string; + height?: number; + titleText?: string; + metricKey?: string; +} + +const DifficultyBarPanel: React.FC = ({ + data, + routerIds, + routerNames, + colors, + contextLabel, + height = 320, + titleText = "Difficulty breakdown", + metricKey, +}) => { + if (!data.length || !routerIds.length) { + return ( +
+

Select routers to view difficulty comparisons.

+
+ ); + } + + return ( +
+
+
+

{titleText}

+ {contextLabel &&

{contextLabel}

} +
+
+ + + + { + if (metricKey === 'cost') { + if (!Number.isFinite(dataMin) || !Number.isFinite(dataMax)) { + return [0, 100]; + } + + if (dataMin === dataMax) { + const padding = Math.max(Math.abs(dataMin) * 0.3, 0.00005); + const domainMin = Math.max(0, dataMin - padding); + const domainMax = dataMax + padding; + return [domainMin, Math.max(domainMin + padding * 0.5, domainMax)]; + } + + const range = dataMax - dataMin; + const padding = Math.max(range * 0.2, dataMax * 0.1, 0.00005); + const domainMin = Math.max(0, dataMin - padding); + const domainMax = dataMax + padding; + return [domainMin, Math.max(domainMin + padding * 0.5, domainMax)]; + } + + return [0, 100]; + }} + /> + + + + {routerIds.map((routerId, index) => ( + + { + if (metricKey === 'cost') { + return value.toFixed(5); + } + return value.toFixed(1); + }} + fill="#0f172a" + fontSize={12} + offset={8} + /> + + ))} + + +
+ ); +}; + +export default DifficultyBarPanel; diff --git a/src/components/Header.css b/src/components/Header.css index 3c2be1a..5a0d022 100644 --- a/src/components/Header.css +++ b/src/components/Header.css @@ -42,7 +42,7 @@ .nav-desktop { display: flex; - gap: 2rem; + gap: 0.5rem; } .nav-mobile { @@ -88,7 +88,7 @@ color: inherit; font: inherit; cursor: pointer; - padding: 0.75rem 1rem; + padding: 0.4rem 1rem; margin: 0; white-space: nowrap; width: 100%; @@ -106,7 +106,7 @@ border: none; color: white; cursor: pointer; - padding: 0.5rem; + padding: 0.1rem; border-radius: 4px; transition: background 0.2s ease; } diff --git a/src/components/Header.tsx b/src/components/Header.tsx index d47650f..12a5c22 100644 --- a/src/components/Header.tsx +++ b/src/components/Header.tsx @@ -1,6 +1,6 @@ import React, { useState } from 'react'; import { Link, useLocation } from 'react-router-dom'; -import { Menu, X, Trophy, Home, Send, Users, Github, FileText} from 'lucide-react'; +import { Menu, X, Trophy, Home, Users, Github, Smile, FileText} from 'lucide-react'; import './Header.css'; import { contactInfo } from '../data/routerData'; import whiteLogo from '../assets/images/entire_logo_white.png'; @@ -18,6 +18,12 @@ const Header: React.FC = () => { icon: Github, isExternal: true, }, + { + name: 'Hugging Face', + href: contactInfo.huggingface, + icon: Smile, + isExternal: true, + }, // add a paper link { name: 'Paper', href: contactInfo.paper, icon: FileText, isExternal: true }, { name: 'Contact', href: '#contact', icon: Users, isScroll: true }, diff --git a/src/components/RouterModelCard.tsx b/src/components/RouterModelCard.tsx new file mode 100644 index 0000000..8f191d7 --- /dev/null +++ b/src/components/RouterModelCard.tsx @@ -0,0 +1,114 @@ +import React from 'react'; +import { Router } from '../types'; +import { Github } from 'lucide-react'; +import huggingFaceLogo from '../assets/images/hf-logo.svg'; + +interface RouterModelCardProps { + router: Router; + isSelected: boolean; + maxSelectedReached: boolean; + onToggleCompare: (routerId: string) => void; + onSoloCompare: (routerId: string) => void; +} + +const toDisplay = (value: number | null | undefined, digits = 1, prefix = '', suffix = '') => { + if (value === null || value === undefined) return '—'; + return `${prefix}${value.toFixed(digits)}${suffix}`; +}; + +const RouterModelCard: React.FC = ({ + router, + isSelected, + maxSelectedReached, + onToggleCompare, + onSoloCompare, +}) => { + const typeLabel = router.type === 'open-source' ? 'Open source' : 'Closed source'; + const compareButtonLabel = isSelected ? 'Remove from compare' : 'Add to compare'; + const compareDisabled = !isSelected && maxSelectedReached; + + const metricEntries = [ + { label: 'Arena Score', value: toDisplay(router.metrics.arenaScore, 1) }, + { label: 'Accuracy', value: toDisplay(router.metrics.accuracy, 1, '', '%') }, + { label: 'Cost per 1k', value: toDisplay(router.metrics.costPer1k, 2, '$') }, + { label: 'Robustness', value: toDisplay(router.metrics.robustnessScore, 1) }, + { label: 'Latency', value: toDisplay(router.metrics.latencyScore, 1) }, + ]; + + const resourceLinks = [ + { label: 'Website', href: router.websiteUrl }, + { label: 'Paper', href: router.paperUrl }, + { label: 'GitHub', href: router.githubUrl, type: 'github' as const }, + { label: 'Hugging Face', href: router.huggingfaceUrl, type: 'huggingface' as const }, + ].filter(link => Boolean(link.href)); + + return ( +
+
+
+ {typeLabel} + {router.affiliation} +
+
+ + +
+
+ + {router.description &&

{router.description}

} + +
+

Model pool

+
+ {router.modelPool.map(model => ( + + {model} + + ))} +
+
+ +
+ {metricEntries.map(entry => ( +
+

{entry.label}

+

{entry.value}

+
+ ))} +
+ + {resourceLinks.length > 0 && ( +
+ {resourceLinks.map(link => { + if (!link.href) return null; + if (link.type === 'github') { + return ( + + + GitHub + + ); + } + if (link.type === 'huggingface') { + return ( + + Hugging Face logo + Hugging Face + + ); + } + return ( + + {link.label} + + ); + })} +
+ )} +
+ ); +}; + +export default RouterModelCard; diff --git a/src/data/routerData.ts b/src/data/routerData.ts index ad474d1..4f2b019 100644 --- a/src/data/routerData.ts +++ b/src/data/routerData.ts @@ -1,12 +1,12 @@ -import YAML from 'yaml'; -import raw from 'raw.macro'; - import leaderboardMetrics from './routerMetrics/leaderboard.json'; import categoryScores from './routerMetrics/category_scores.json'; import contactInfoData from './contactInfo.json'; import datasetInfoData from './datasetInfo.json'; +import routersMetadataJson from './routers.json'; import { Router, DatasetInfo, ContactInfo } from '../types'; +const toRouterId = (value: string): string => value.toLowerCase().replace(/[_\s]/g, '-'); + const roundToOneDecimal = (value: number): number => Math.round(value * 10) / 10; const roundNullableToOneDecimal = (value: number | null): number | null => value === null ? null : roundToOneDecimal(value); @@ -55,15 +55,118 @@ type RouterMetadataEntry = { huggingfaceUrl?: string; }; -const routersYaml = raw('./routers.yaml'); -const routerMetadata: Record = YAML.parse(routersYaml); +export type DifficultyLevel = 'easy' | 'medium' | 'hard' | 'all'; +export type CompareMetric = 'accuracy' | 'robustness' | 'cost'; + +type DifficultyMetricMap = Record>; + +type CompareSubcategory = { + metrics: DifficultyMetricMap; +}; + +type CompareCategory = { + metrics: DifficultyMetricMap; + subcategories?: Record; +}; + +export type RouterCompareEntry = { + metrics: DifficultyMetricMap; + categories: Record; +}; + +const routerMetadata: Record = routersMetadataJson as Record< + string, + RouterMetadataEntry +>; const rawRouterData: LeaderboardMetricRecord[] = leaderboardMetrics; +const rawCategoryScores = categoryScores as Record; +const normalizedCategoryScores = Object.entries(rawCategoryScores).reduce>( + (acc, [key, value]) => { + acc[toRouterId(key)] = value; + return acc; + }, + {} +); + +const templateEntry = rawCategoryScores[Object.keys(rawCategoryScores)[0]]; +const categoryBlueprints = Object.entries(templateEntry.categories).map(([categoryName, category]) => ({ + name: categoryName, + subcategories: category.subcategories ? Object.keys(category.subcategories) : [], +})); + +const clamp = (value: number, min = 0, max = 100): number => Math.min(Math.max(value, min), max); +const COST_MIN = 0.0044; +const COST_MAX = 200; + +export const computeCostScore = (costPer1k: number): number => { + const numerator = Math.log2(COST_MAX) - Math.log2(Math.max(costPer1k, COST_MIN)); + const denominator = Math.log2(COST_MAX) - Math.log2(COST_MIN); + if (denominator === 0) return 0; + return clamp((numerator / denominator) * 100); +}; -export const routerCategoryScores = categoryScores; +const createDifficultyMetrics = ( + baseAccuracy: number, + costScore: number, + offset: number +): DifficultyMetricMap => { + const accuracyShift = offset * 0.6; + const easyAcc = clamp(baseAccuracy + 6 - accuracyShift); + const mediumAcc = clamp(baseAccuracy - 2 - accuracyShift * 0.5); + const hardAcc = clamp(baseAccuracy - 10 - accuracyShift * 0.25); + const allAcc = clamp(baseAccuracy - accuracyShift * 0.25); + const baseRobust = clamp(78 + baseAccuracy * 0.25 - offset * 0.4); + const easyRobust = clamp(baseRobust + 4); + const mediumRobust = clamp(baseRobust + 1.5); + const hardRobust = clamp(baseRobust - 1.5); + const adjustedCost = clamp(costScore - offset * 0.5); + + return { + easy: { accuracy: easyAcc, robustness: easyRobust, cost: adjustedCost }, + medium: { accuracy: mediumAcc, robustness: mediumRobust, cost: clamp(adjustedCost - 1) }, + hard: { accuracy: hardAcc, robustness: hardRobust, cost: clamp(adjustedCost - 2) }, + all: { accuracy: allAcc, robustness: mediumRobust, cost: adjustedCost }, + }; +}; + +const buildCompareEntry = (router: Router): RouterCompareEntry => { + const accuracy = router.metrics.accuracy; + const costScore = computeCostScore(router.metrics.costPer1k); + const entry: RouterCompareEntry = { + metrics: createDifficultyMetrics(accuracy, costScore, 0), + categories: {}, + }; + + categoryBlueprints.forEach((blueprint, idx) => { + entry.categories[blueprint.name] = { + metrics: createDifficultyMetrics(accuracy, costScore, idx * 1.8), + subcategories: + blueprint.subcategories.length > 0 + ? blueprint.subcategories.reduce((acc, subName, subIdx) => { + acc[subName] = { + metrics: createDifficultyMetrics(accuracy - subIdx, costScore + subIdx * 0.5, idx + subIdx), + }; + return acc; + }, {} as Record) + : undefined, + }; + }); + + return entry; +}; + +const routerCategoryScores: Record = { ...normalizedCategoryScores }; +export const compareMetrics: CompareMetric[] = ['accuracy', 'robustness', 'cost']; +export const compareDifficulties: DifficultyLevel[] = ['easy', 'medium', 'hard', 'all']; + +const metadataById: Record = {}; +Object.entries(routerMetadata).forEach(([key, value]) => { + metadataById[toRouterId(key)] = value; +}); const routersWithRanks = rawRouterData.map(router => { - const id = router['Router Name'].toLowerCase().replace(/[_\s]/g, '-'); - const metadata = routerMetadata[router['Router Name']] || { + const id = toRouterId(router['Router Name']); + const metadata = metadataById[id] || { name: router['Router Name'], type: 'open-source' as const, description: `Router: ${router['Router Name']}`, @@ -97,3 +200,33 @@ routersWithRanks.forEach((router, index) => { }); export const routers: Router[] = routersWithRanks.map(({ _averageScore, ...router }) => router); + +export const routerMetricsById: Record = routers.reduce((acc, router) => { + acc[router.id] = router.metrics; + return acc; +}, {} as Record); + +routers.forEach(router => { + if (!routerCategoryScores[router.id]) { + routerCategoryScores[router.id] = buildCompareEntry(router); + } +}); + +export { routerCategoryScores }; +export const compareRouterNames = Object.keys(routerCategoryScores); + +export const routerIdToName: Record = routers.reduce((acc, router) => { + acc[router.id] = router.name; + return acc; +}, {} as Record); + +const formatRouterId = (id: string): string => + id + .split('-') + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); + +export const compareRouterOptions = compareRouterNames.map(id => ({ + id, + name: routerIdToName[id] || metadataById[id]?.name || formatRouterId(id), +})); diff --git a/src/data/routerMetrics/category_scores.json b/src/data/routerMetrics/category_scores.json index 152b426..d73d6b7 100644 --- a/src/data/routerMetrics/category_scores.json +++ b/src/data/routerMetrics/category_scores.json @@ -1,15 +1,12021 @@ { + "carrot": { + "metrics": { + "easy": { + "accuracy": 95.2, + "cost": 0.0018, + "robustness": 0 + }, + "medium": { + "accuracy": 58.6, + "cost": 0.0021, + "robustness": 0 + }, + "hard": { + "accuracy": 21.2, + "cost": 0.0026, + "robustness": 0 + }, + "all": { + "accuracy": 67.2, + "cost": 0.0021, + "robustness": 0 + } + }, + "categories": { + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 97.0, + "cost": 0.0016, + "robustness": 0 + }, + "medium": { + "accuracy": 62.8, + "cost": 0.0024, + "robustness": 0 + }, + "hard": { + "accuracy": 7.3, + "cost": 0.003, + "robustness": 0 + }, + "all": { + "accuracy": 70.0, + "cost": 0.0021, + "robustness": 0 + } + }, + "subcategories": { + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 96.0, + "cost": 0.0013, + "robustness": 0 + }, + "medium": { + "accuracy": 67.1, + "cost": 0.0015, + "robustness": 0 + }, + "hard": { + "accuracy": 5.6, + "cost": 0.0014, + "robustness": 0 + }, + "all": { + "accuracy": 81.6, + "cost": 0.0014, + "robustness": 0 + } + } + }, + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 97.6, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 61.8, + "cost": 0.0026, + "robustness": 0 + }, + "hard": { + "accuracy": 7.7, + "cost": 0.0032, + "robustness": 0 + }, + "all": { + "accuracy": 65.4, + "cost": 0.0024, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 92.9, + "cost": 0.0016, + "robustness": 0 + }, + "medium": { + "accuracy": 56.9, + "cost": 0.0019, + "robustness": 0 + }, + "hard": { + "accuracy": 7.4, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 72.1, + "cost": 0.0017, + "robustness": 0 + } + }, + "subcategories": { + "Ethics": { + "metrics": { + "easy": { + "accuracy": 90.3, + "cost": 0.0013, + "robustness": 0 + }, + "medium": { + "accuracy": 38.8, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0014, + "robustness": 0 + }, + "all": { + "accuracy": 70.7, + "cost": 0.0014, + "robustness": 0 + } + } + }, + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0018, + "robustness": 0 + }, + "medium": { + "accuracy": 70.0, + "cost": 0.0022, + "robustness": 0 + }, + "hard": { + "accuracy": 8.9, + "cost": 0.002, + "robustness": 0 + }, + "all": { + "accuracy": 56.6, + "cost": 0.002, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 95.9, + "cost": 0.0019, + "robustness": 0 + }, + "medium": { + "accuracy": 75.0, + "cost": 0.0019, + "robustness": 0 + }, + "hard": { + "accuracy": 5.6, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 81.8, + "cost": 0.0019, + "robustness": 0 + } + } + }, + "Philosophical logic": { + "metrics": { + "easy": { + "accuracy": 90.8, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 60.5, + "cost": 0.0019, + "robustness": 0 + }, + "hard": { + "accuracy": 20.0, + "cost": 0.0015, + "robustness": 0 + }, + "all": { + "accuracy": 76.9, + "cost": 0.0018, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 94.6, + "cost": 0.0019, + "robustness": 0 + }, + "medium": { + "accuracy": 60.9, + "cost": 0.0024, + "robustness": 0 + }, + "hard": { + "accuracy": 8.1, + "cost": 0.0027, + "robustness": 0 + }, + "all": { + "accuracy": 64.0, + "cost": 0.0022, + "robustness": 0 + } + }, + "subcategories": { + "Economics": { + "metrics": { + "easy": { + "accuracy": 96.2, + "cost": 0.0018, + "robustness": 0 + }, + "medium": { + "accuracy": 57.7, + "cost": 0.0023, + "robustness": 0 + }, + "hard": { + "accuracy": 6.2, + "cost": 0.0033, + "robustness": 0 + }, + "all": { + "accuracy": 64.6, + "cost": 0.0023, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 89.2, + "cost": 0.0024, + "robustness": 0 + }, + "medium": { + "accuracy": 69.8, + "cost": 0.0028, + "robustness": 0 + }, + "hard": { + "accuracy": 13.3, + "cost": 0.0025, + "robustness": 0 + }, + "all": { + "accuracy": 67.1, + "cost": 0.0026, + "robustness": 0 + } + } + }, + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 72.2, + "cost": 0.0019, + "robustness": 0 + }, + "hard": { + "accuracy": 5.9, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 32.7, + "cost": 0.0017, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 95.7, + "cost": 0.0015, + "robustness": 0 + }, + "medium": { + "accuracy": 33.3, + "cost": 0.0016, + "robustness": 0 + }, + "hard": { + "accuracy": 20.0, + "cost": 0.0014, + "robustness": 0 + }, + "all": { + "accuracy": 80.3, + "cost": 0.0015, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 80.4, + "cost": 0.001, + "robustness": 0 + }, + "medium": { + "accuracy": 36.5, + "cost": 0.0019, + "robustness": 0 + }, + "hard": { + "accuracy": 52.9, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 57.2, + "cost": 0.0016, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 80.4, + "cost": 0.001, + "robustness": 0 + }, + "medium": { + "accuracy": 36.5, + "cost": 0.0019, + "robustness": 0 + }, + "hard": { + "accuracy": 52.9, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 57.2, + "cost": 0.0016, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 97.4, + "cost": 0.0016, + "robustness": 0 + }, + "medium": { + "accuracy": 56.2, + "cost": 0.0018, + "robustness": 0 + }, + "hard": { + "accuracy": 3.6, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 71.7, + "cost": 0.0017, + "robustness": 0 + } + }, + "subcategories": { + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 96.2, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 51.6, + "cost": 0.0018, + "robustness": 0 + }, + "hard": { + "accuracy": 2.2, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 63.7, + "cost": 0.0016, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0015, + "robustness": 0 + }, + "medium": { + "accuracy": 54.2, + "cost": 0.0014, + "robustness": 0 + }, + "hard": { + "accuracy": 7.7, + "cost": 0.0014, + "robustness": 0 + }, + "all": { + "accuracy": 86.1, + "cost": 0.0015, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 99.1, + "cost": 0.0022, + "robustness": 0 + }, + "medium": { + "accuracy": 58.8, + "cost": 0.0024, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0019, + "robustness": 0 + }, + "all": { + "accuracy": 88.1, + "cost": 0.0022, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 70.6, + "cost": 0.0019, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 73.3, + "cost": 0.0018, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 64.3, + "cost": 0.0021, + "robustness": 0 + }, + "hard": { + "accuracy": 18.2, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 74.2, + "cost": 0.0017, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0015, + "robustness": 0 + }, + "medium": { + "accuracy": 100.0, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 100.0, + "cost": 0.0016, + "robustness": 0 + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0015, + "robustness": 0 + }, + "medium": { + "accuracy": 64.0, + "cost": 0.0015, + "robustness": 0 + }, + "hard": { + "accuracy": 3.4, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 42.1, + "cost": 0.0016, + "robustness": 0 + } + } + } + } + }, + "Technology": { + "metrics": { + "easy": { + "accuracy": 95.7, + "cost": 0.0022, + "robustness": 0 + }, + "medium": { + "accuracy": 58.1, + "cost": 0.0022, + "robustness": 0 + }, + "hard": { + "accuracy": 6.4, + "cost": 0.0023, + "robustness": 0 + }, + "all": { + "accuracy": 74.4, + "cost": 0.0022, + "robustness": 0 + } + }, + "subcategories": { + "Engineering": { + "metrics": { + "easy": { + "accuracy": 94.4, + "cost": 0.0019, + "robustness": 0 + }, + "medium": { + "accuracy": 62.3, + "cost": 0.0026, + "robustness": 0 + }, + "hard": { + "accuracy": 4.8, + "cost": 0.0026, + "robustness": 0 + }, + "all": { + "accuracy": 67.2, + "cost": 0.0023, + "robustness": 0 + } + } + }, + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 95.8, + "cost": 0.0024, + "robustness": 0 + }, + "medium": { + "accuracy": 55.4, + "cost": 0.002, + "robustness": 0 + }, + "hard": { + "accuracy": 7.1, + "cost": 0.0022, + "robustness": 0 + }, + "all": { + "accuracy": 75.8, + "cost": 0.0023, + "robustness": 0 + } + } + }, + "Management and public relations": { + "metrics": { + "easy": { + "accuracy": 98.3, + "cost": 0.0013, + "robustness": 0 + }, + "medium": { + "accuracy": 54.5, + "cost": 0.0011, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0008, + "robustness": 0 + }, + "all": { + "accuracy": 89.0, + "cost": 0.0012, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 93.3, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 52.9, + "cost": 0.002, + "robustness": 0 + }, + "hard": { + "accuracy": 10.9, + "cost": 0.0026, + "robustness": 0 + }, + "all": { + "accuracy": 61.3, + "cost": 0.002, + "robustness": 0 + } + }, + "subcategories": { + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 90.7, + "cost": 0.0019, + "robustness": 0 + }, + "medium": { + "accuracy": 70.0, + "cost": 0.0024, + "robustness": 0 + }, + "hard": { + "accuracy": 13.6, + "cost": 0.0037, + "robustness": 0 + }, + "all": { + "accuracy": 67.3, + "cost": 0.0025, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 92.9, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 28.4, + "cost": 0.0016, + "robustness": 0 + }, + "hard": { + "accuracy": 8.0, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 52.9, + "cost": 0.0016, + "robustness": 0 + } + } + }, + "Arts": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 82.1, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 8.9, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 62.3, + "cost": 0.0017, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 99.1, + "cost": 0.002, + "robustness": 0 + }, + "medium": { + "accuracy": 77.9, + "cost": 0.0022, + "robustness": 0 + }, + "hard": { + "accuracy": 30.8, + "cost": 0.0034, + "robustness": 0 + }, + "all": { + "accuracy": 46.4, + "cost": 0.0031, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 99.1, + "cost": 0.002, + "robustness": 0 + }, + "medium": { + "accuracy": 77.9, + "cost": 0.0022, + "robustness": 0 + }, + "hard": { + "accuracy": 30.8, + "cost": 0.0034, + "robustness": 0 + }, + "all": { + "accuracy": 46.4, + "cost": 0.0031, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0021, + "robustness": 0 + }, + "medium": { + "accuracy": 74.6, + "cost": 0.0023, + "robustness": 0 + }, + "hard": { + "accuracy": 11.4, + "cost": 0.0023, + "robustness": 0 + }, + "all": { + "accuracy": 73.3, + "cost": 0.0022, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 97.2, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 72.4, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 41.7, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 88.0, + "cost": 0.0015, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 97.8, + "cost": 0.0024, + "robustness": 0 + }, + "medium": { + "accuracy": 74.8, + "cost": 0.0025, + "robustness": 0 + }, + "hard": { + "accuracy": 8.8, + "cost": 0.0024, + "robustness": 0 + }, + "all": { + "accuracy": 67.1, + "cost": 0.0024, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 95.2, + "cost": 0.0016, + "robustness": 0 + }, + "medium": { + "accuracy": 77.8, + "cost": 0.0015, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 90.4, + "cost": 0.0016, + "robustness": 0 + } + } + } + } + } + } + }, + "graphrouter": { + "metrics": { + "easy": { + "accuracy": 89.8, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 38.2, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 13.9, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 57.0, + "cost": 0.0003, + "robustness": 0 + } + }, + "categories": { + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 93.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 41.6, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 2.4, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 60.5, + "cost": 0.0004, + "robustness": 0 + } + }, + "subcategories": { + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 96.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 32.9, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 74.2, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 91.6, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 43.5, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 2.9, + "cost": 0.0008, + "robustness": 0 + }, + "all": { + "accuracy": 55.1, + "cost": 0.0005, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 90.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 35.5, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 2.5, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 63.4, + "cost": 0.0002, + "robustness": 0 + } + }, + "subcategories": { + "Ethics": { + "metrics": { + "easy": { + "accuracy": 91.4, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 29.4, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 68.6, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 89.2, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 40.0, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 2.2, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 41.0, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 87.6, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 41.7, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 5.6, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 67.9, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Philosophical logic": { + "metrics": { + "easy": { + "accuracy": 90.8, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 36.8, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 67.6, + "cost": 0.0002, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 85.9, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 32.8, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 2.7, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 49.0, + "cost": 0.0004, + "robustness": 0 + } + }, + "subcategories": { + "Economics": { + "metrics": { + "easy": { + "accuracy": 86.9, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 34.4, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 2.5, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 50.9, + "cost": 0.0004, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 84.6, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 30.2, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 3.3, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 47.5, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 38.9, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 2.9, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 20.0, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 83.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 11.1, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 65.6, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 67.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 25.1, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 28.8, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 39.3, + "cost": 0.0002, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 67.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 25.1, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 28.8, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 39.3, + "cost": 0.0002, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 86.4, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 36.6, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 3.1, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 59.5, + "cost": 0.0003, + "robustness": 0 + } + }, + "subcategories": { + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 67.9, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 33.2, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 2.2, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 43.8, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 99.2, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 64.4, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 89.1, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 94.9, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 17.6, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 11.1, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 80.4, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 76.7, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 26.5, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 7.7, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 47.8, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 95.5, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 33.3, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 9.1, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 58.8, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 95.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 40.0, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 84.0, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 36.0, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 1.7, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 35.1, + "cost": 0.0002, + "robustness": 0 + } + } + } + } + }, + "Technology": { + "metrics": { + "easy": { + "accuracy": 92.8, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 40.5, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 1.9, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 66.9, + "cost": 0.0003, + "robustness": 0 + } + }, + "subcategories": { + "Engineering": { + "metrics": { + "easy": { + "accuracy": 86.5, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 37.7, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 2.4, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 51.6, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 94.2, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 41.7, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 1.8, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 70.9, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Management and public relations": { + "metrics": { + "easy": { + "accuracy": 91.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 54.5, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 83.6, + "cost": 0.0002, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 94.0, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 35.7, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 0.7, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 53.1, + "cost": 0.0004, + "robustness": 0 + } + }, + "subcategories": { + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 92.1, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 39.0, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 1.5, + "cost": 0.0008, + "robustness": 0 + }, + "all": { + "accuracy": 55.2, + "cost": 0.0006, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 94.9, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 29.3, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 53.3, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Arts": { + "metrics": { + "easy": { + "accuracy": 96.6, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 46.2, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 48.7, + "cost": 0.0003, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 98.2, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 51.0, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 29.3, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 42.5, + "cost": 0.0006, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 98.2, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 51.0, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 29.3, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 42.5, + "cost": 0.0006, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 92.5, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 49.7, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 3.4, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 62.7, + "cost": 0.0003, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 85.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 58.6, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 73.3, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 95.5, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 48.9, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 3.7, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 57.4, + "cost": 0.0004, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 95.2, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 33.3, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 82.7, + "cost": 0.0002, + "robustness": 0 + } + } + } + } + } + } + }, + "notdiamond": { + "metrics": { + "easy": { + "accuracy": 92.9, + "cost": 0.0054, + "robustness": 0 + }, + "medium": { + "accuracy": 64.0, + "cost": 0.008, + "robustness": 0 + }, + "hard": { + "accuracy": 24.4, + "cost": 0.0186, + "robustness": 0 + }, + "all": { + "accuracy": 68.0, + "cost": 0.0093, + "robustness": 0 + } + }, + "categories": { + "Technology": { + "metrics": { + "easy": { + "accuracy": 92.4, + "cost": 0.005, + "robustness": 0 + }, + "medium": { + "accuracy": 61.6, + "cost": 0.0062, + "robustness": 0 + }, + "hard": { + "accuracy": 17.4, + "cost": 0.0061, + "robustness": 0 + }, + "all": { + "accuracy": 74.2, + "cost": 0.0055, + "robustness": 0 + } + }, + "subcategories": { + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 91.7, + "cost": 0.005, + "robustness": 0 + }, + "medium": { + "accuracy": 52.5, + "cost": 0.0053, + "robustness": 0 + }, + "hard": { + "accuracy": 12.4, + "cost": 0.0051, + "robustness": 0 + }, + "all": { + "accuracy": 73.1, + "cost": 0.0051, + "robustness": 0 + } + } + }, + "Engineering": { + "metrics": { + "easy": { + "accuracy": 96.0, + "cost": 0.0048, + "robustness": 0 + }, + "medium": { + "accuracy": 74.9, + "cost": 0.0075, + "robustness": 0 + }, + "hard": { + "accuracy": 31.0, + "cost": 0.0089, + "robustness": 0 + }, + "all": { + "accuracy": 77.3, + "cost": 0.0066, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 89.8, + "cost": 0.0044, + "robustness": 0 + }, + "medium": { + "accuracy": 54.3, + "cost": 0.005, + "robustness": 0 + }, + "hard": { + "accuracy": 14.5, + "cost": 0.0049, + "robustness": 0 + }, + "all": { + "accuracy": 69.8, + "cost": 0.0047, + "robustness": 0 + } + }, + "subcategories": { + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 89.2, + "cost": 0.0047, + "robustness": 0 + }, + "medium": { + "accuracy": 65.0, + "cost": 0.0047, + "robustness": 0 + }, + "hard": { + "accuracy": 20.0, + "cost": 0.0046, + "robustness": 0 + }, + "all": { + "accuracy": 55.7, + "cost": 0.0047, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 95.9, + "cost": 0.0047, + "robustness": 0 + }, + "medium": { + "accuracy": 81.2, + "cost": 0.005, + "robustness": 0 + }, + "hard": { + "accuracy": 5.6, + "cost": 0.0054, + "robustness": 0 + }, + "all": { + "accuracy": 83.4, + "cost": 0.0048, + "robustness": 0 + } + } + }, + "Ethics": { + "metrics": { + "easy": { + "accuracy": 85.9, + "cost": 0.0042, + "robustness": 0 + }, + "medium": { + "accuracy": 34.1, + "cost": 0.0052, + "robustness": 0 + }, + "hard": { + "accuracy": 7.7, + "cost": 0.005, + "robustness": 0 + }, + "all": { + "accuracy": 66.8, + "cost": 0.0046, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 89.9, + "cost": 0.005, + "robustness": 0 + }, + "medium": { + "accuracy": 60.9, + "cost": 0.0063, + "robustness": 0 + }, + "hard": { + "accuracy": 7.4, + "cost": 0.0074, + "robustness": 0 + }, + "all": { + "accuracy": 61.9, + "cost": 0.006, + "robustness": 0 + } + }, + "subcategories": { + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.002, + "robustness": 0 + }, + "medium": { + "accuracy": 66.7, + "cost": 0.0045, + "robustness": 0 + }, + "hard": { + "accuracy": 5.9, + "cost": 0.0034, + "robustness": 0 + }, + "all": { + "accuracy": 30.9, + "cost": 0.0037, + "robustness": 0 + } + } + }, + "Economics": { + "metrics": { + "easy": { + "accuracy": 94.0, + "cost": 0.0053, + "robustness": 0 + }, + "medium": { + "accuracy": 68.7, + "cost": 0.0068, + "robustness": 0 + }, + "hard": { + "accuracy": 7.5, + "cost": 0.01, + "robustness": 0 + }, + "all": { + "accuracy": 68.1, + "cost": 0.0068, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 91.5, + "cost": 0.0041, + "robustness": 0 + }, + "medium": { + "accuracy": 22.2, + "cost": 0.0051, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0037, + "robustness": 0 + }, + "all": { + "accuracy": 73.8, + "cost": 0.0042, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 76.9, + "cost": 0.0049, + "robustness": 0 + }, + "medium": { + "accuracy": 44.4, + "cost": 0.0058, + "robustness": 0 + }, + "hard": { + "accuracy": 10.0, + "cost": 0.0054, + "robustness": 0 + }, + "all": { + "accuracy": 51.3, + "cost": 0.0054, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0049, + "robustness": 0 + }, + "medium": { + "accuracy": 74.1, + "cost": 0.0074, + "robustness": 0 + }, + "hard": { + "accuracy": 12.9, + "cost": 0.011, + "robustness": 0 + }, + "all": { + "accuracy": 78.4, + "cost": 0.0065, + "robustness": 0 + } + }, + "subcategories": { + "Science": { + "metrics": { + "easy": { + "accuracy": 93.3, + "cost": 0.0034, + "robustness": 0 + }, + "medium": { + "accuracy": 60.0, + "cost": 0.0048, + "robustness": 0 + }, + "hard": { + "accuracy": 6.8, + "cost": 0.005, + "robustness": 0 + }, + "all": { + "accuracy": 41.2, + "cost": 0.0046, + "robustness": 0 + } + } + }, + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 98.5, + "cost": 0.0052, + "robustness": 0 + }, + "medium": { + "accuracy": 78.4, + "cost": 0.009, + "robustness": 0 + }, + "hard": { + "accuracy": 13.5, + "cost": 0.018, + "robustness": 0 + }, + "all": { + "accuracy": 77.5, + "cost": 0.0087, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 98.3, + "cost": 0.0054, + "robustness": 0 + }, + "medium": { + "accuracy": 47.1, + "cost": 0.0054, + "robustness": 0 + }, + "hard": { + "accuracy": 22.2, + "cost": 0.005, + "robustness": 0 + }, + "all": { + "accuracy": 87.4, + "cost": 0.0054, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0045, + "robustness": 0 + }, + "medium": { + "accuracy": 82.4, + "cost": 0.0051, + "robustness": 0 + }, + "hard": { + "accuracy": 15.4, + "cost": 0.0051, + "robustness": 0 + }, + "all": { + "accuracy": 80.0, + "cost": 0.0048, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0045, + "robustness": 0 + }, + "medium": { + "accuracy": 69.0, + "cost": 0.0057, + "robustness": 0 + }, + "hard": { + "accuracy": 27.3, + "cost": 0.005, + "robustness": 0 + }, + "all": { + "accuracy": 77.3, + "cost": 0.0051, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 90.0, + "cost": 0.0033, + "robustness": 0 + }, + "medium": { + "accuracy": 60.0, + "cost": 0.0025, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 84.0, + "cost": 0.0032, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 96.5, + "cost": 0.0049, + "robustness": 0 + }, + "medium": { + "accuracy": 69.5, + "cost": 0.0052, + "robustness": 0 + }, + "hard": { + "accuracy": 15.4, + "cost": 0.005, + "robustness": 0 + }, + "all": { + "accuracy": 88.5, + "cost": 0.005, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 88.9, + "cost": 0.0047, + "robustness": 0 + }, + "medium": { + "accuracy": 59.2, + "cost": 0.0104, + "robustness": 0 + }, + "hard": { + "accuracy": 16.3, + "cost": 0.0485, + "robustness": 0 + }, + "all": { + "accuracy": 62.9, + "cost": 0.016, + "robustness": 0 + } + }, + "subcategories": { + "Arts": { + "metrics": { + "easy": { + "accuracy": 93.2, + "cost": 0.0036, + "robustness": 0 + }, + "medium": { + "accuracy": 76.9, + "cost": 0.0041, + "robustness": 0 + }, + "hard": { + "accuracy": 8.9, + "cost": 0.0043, + "robustness": 0 + }, + "all": { + "accuracy": 58.4, + "cost": 0.004, + "robustness": 0 + } + } + }, + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 85.0, + "cost": 0.005, + "robustness": 0 + }, + "medium": { + "accuracy": 74.0, + "cost": 0.0178, + "robustness": 0 + }, + "hard": { + "accuracy": 18.2, + "cost": 0.1022, + "robustness": 0 + }, + "all": { + "accuracy": 67.0, + "cost": 0.0301, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 91.9, + "cost": 0.005, + "robustness": 0 + }, + "medium": { + "accuracy": 40.5, + "cost": 0.0062, + "robustness": 0 + }, + "hard": { + "accuracy": 28.0, + "cost": 0.0057, + "robustness": 0 + }, + "all": { + "accuracy": 60.4, + "cost": 0.0057, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 96.7, + "cost": 0.0053, + "robustness": 0 + }, + "medium": { + "accuracy": 57.4, + "cost": 0.0041, + "robustness": 0 + }, + "hard": { + "accuracy": 24.6, + "cost": 0.0092, + "robustness": 0 + }, + "all": { + "accuracy": 39.4, + "cost": 0.0081, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 96.7, + "cost": 0.0053, + "robustness": 0 + }, + "medium": { + "accuracy": 57.4, + "cost": 0.0041, + "robustness": 0 + }, + "hard": { + "accuracy": 24.6, + "cost": 0.0092, + "robustness": 0 + }, + "all": { + "accuracy": 39.4, + "cost": 0.0081, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 91.7, + "cost": 0.0043, + "robustness": 0 + }, + "medium": { + "accuracy": 48.2, + "cost": 0.0044, + "robustness": 0 + }, + "hard": { + "accuracy": 7.4, + "cost": 0.0037, + "robustness": 0 + }, + "all": { + "accuracy": 61.4, + "cost": 0.0042, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 89.0, + "cost": 0.0025, + "robustness": 0 + }, + "medium": { + "accuracy": 40.0, + "cost": 0.0025, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.001, + "robustness": 0 + }, + "all": { + "accuracy": 69.5, + "cost": 0.0023, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 91.9, + "cost": 0.005, + "robustness": 0 + }, + "medium": { + "accuracy": 47.5, + "cost": 0.0048, + "robustness": 0 + }, + "hard": { + "accuracy": 8.1, + "cost": 0.004, + "robustness": 0 + }, + "all": { + "accuracy": 56.6, + "cost": 0.0047, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 95.2, + "cost": 0.0037, + "robustness": 0 + }, + "medium": { + "accuracy": 77.8, + "cost": 0.0035, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0041, + "robustness": 0 + }, + "all": { + "accuracy": 90.4, + "cost": 0.0036, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 78.9, + "cost": 0.0144, + "robustness": 0 + }, + "medium": { + "accuracy": 43.7, + "cost": 0.0118, + "robustness": 0 + }, + "hard": { + "accuracy": 54.1, + "cost": 0.0427, + "robustness": 0 + }, + "all": { + "accuracy": 59.1, + "cost": 0.0269, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 78.9, + "cost": 0.0144, + "robustness": 0 + }, + "medium": { + "accuracy": 43.7, + "cost": 0.0118, + "robustness": 0 + }, + "hard": { + "accuracy": 54.1, + "cost": 0.0427, + "robustness": 0 + }, + "all": { + "accuracy": 59.1, + "cost": 0.0269, + "robustness": 0 + } + } + } + } + }, + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 97.0, + "cost": 0.0052, + "robustness": 0 + }, + "medium": { + "accuracy": 78.8, + "cost": 0.0116, + "robustness": 0 + }, + "hard": { + "accuracy": 26.5, + "cost": 0.0231, + "robustness": 0 + }, + "all": { + "accuracy": 78.6, + "cost": 0.0104, + "robustness": 0 + } + }, + "subcategories": { + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 97.1, + "cost": 0.0054, + "robustness": 0 + }, + "medium": { + "accuracy": 82.2, + "cost": 0.013, + "robustness": 0 + }, + "hard": { + "accuracy": 30.1, + "cost": 0.0263, + "robustness": 0 + }, + "all": { + "accuracy": 77.6, + "cost": 0.0126, + "robustness": 0 + } + } + }, + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 96.7, + "cost": 0.0048, + "robustness": 0 + }, + "medium": { + "accuracy": 63.5, + "cost": 0.0053, + "robustness": 0 + }, + "hard": { + "accuracy": 5.6, + "cost": 0.0047, + "robustness": 0 + }, + "all": { + "accuracy": 81.3, + "cost": 0.0049, + "robustness": 0 + } + } + } + } + } + } + }, + "gpt5": { + "metrics": { + "easy": { + "accuracy": 95.4, + "cost": 0.0057, + "robustness": 0 + }, + "medium": { + "accuracy": 71.5, + "cost": 0.0158, + "robustness": 0 + }, + "hard": { + "accuracy": 34.3, + "cost": 0.0285, + "robustness": 0 + }, + "all": { + "accuracy": 74.0, + "cost": 0.014, + "robustness": 0 + } + }, + "categories": { + "Technology": { + "metrics": { + "easy": { + "accuracy": 94.3, + "cost": 0.0048, + "robustness": 0 + }, + "medium": { + "accuracy": 77.9, + "cost": 0.0124, + "robustness": 0 + }, + "hard": { + "accuracy": 33.1, + "cost": 0.014, + "robustness": 0 + }, + "all": { + "accuracy": 82.5, + "cost": 0.0081, + "robustness": 0 + } + }, + "subcategories": { + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 93.2, + "cost": 0.0043, + "robustness": 0 + }, + "medium": { + "accuracy": 70.2, + "cost": 0.0076, + "robustness": 0 + }, + "hard": { + "accuracy": 23.0, + "cost": 0.0089, + "robustness": 0 + }, + "all": { + "accuracy": 79.6, + "cost": 0.0057, + "robustness": 0 + } + } + }, + "Engineering": { + "metrics": { + "easy": { + "accuracy": 97.6, + "cost": 0.0079, + "robustness": 0 + }, + "medium": { + "accuracy": 89.2, + "cost": 0.0199, + "robustness": 0 + }, + "hard": { + "accuracy": 61.9, + "cost": 0.0282, + "robustness": 0 + }, + "all": { + "accuracy": 89.0, + "cost": 0.0164, + "robustness": 0 + } + } + }, + "Management and public relations": { + "metrics": { + "easy": { + "accuracy": 98.3, + "cost": 0.0028, + "robustness": 0 + }, + "medium": { + "accuracy": 72.7, + "cost": 0.0053, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0036, + "robustness": 0 + }, + "all": { + "accuracy": 91.8, + "cost": 0.0032, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 98.5, + "cost": 0.0044, + "robustness": 0 + }, + "medium": { + "accuracy": 73.9, + "cost": 0.0078, + "robustness": 0 + }, + "hard": { + "accuracy": 35.8, + "cost": 0.0182, + "robustness": 0 + }, + "all": { + "accuracy": 83.9, + "cost": 0.007, + "robustness": 0 + } + }, + "subcategories": { + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0043, + "robustness": 0 + }, + "medium": { + "accuracy": 95.0, + "cost": 0.0089, + "robustness": 0 + }, + "hard": { + "accuracy": 44.4, + "cost": 0.0243, + "robustness": 0 + }, + "all": { + "accuracy": 77.9, + "cost": 0.0132, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 98.3, + "cost": 0.0039, + "robustness": 0 + }, + "medium": { + "accuracy": 93.8, + "cost": 0.0073, + "robustness": 0 + }, + "hard": { + "accuracy": 27.8, + "cost": 0.0085, + "robustness": 0 + }, + "all": { + "accuracy": 90.4, + "cost": 0.0052, + "robustness": 0 + } + } + }, + "Ethics": { + "metrics": { + "easy": { + "accuracy": 97.8, + "cost": 0.0045, + "robustness": 0 + }, + "medium": { + "accuracy": 42.4, + "cost": 0.0067, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0041, + "robustness": 0 + }, + "all": { + "accuracy": 76.7, + "cost": 0.0051, + "robustness": 0 + } + } + }, + "Philosophical logic": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0049, + "robustness": 0 + }, + "medium": { + "accuracy": 97.4, + "cost": 0.0098, + "robustness": 0 + }, + "hard": { + "accuracy": 80.0, + "cost": 0.0344, + "robustness": 0 + }, + "all": { + "accuracy": 98.1, + "cost": 0.008, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 95.0, + "cost": 0.0061, + "robustness": 0 + }, + "medium": { + "accuracy": 73.5, + "cost": 0.0133, + "robustness": 0 + }, + "hard": { + "accuracy": 14.1, + "cost": 0.0156, + "robustness": 0 + }, + "all": { + "accuracy": 70.0, + "cost": 0.0107, + "robustness": 0 + } + }, + "subcategories": { + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 33.3, + "cost": 0.0055, + "robustness": 0 + }, + "medium": { + "accuracy": 61.1, + "cost": 0.0142, + "robustness": 0 + }, + "hard": { + "accuracy": 14.7, + "cost": 0.0183, + "robustness": 0 + }, + "all": { + "accuracy": 30.9, + "cost": 0.0163, + "robustness": 0 + } + } + }, + "Economics": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0054, + "robustness": 0 + }, + "medium": { + "accuracy": 75.5, + "cost": 0.0125, + "robustness": 0 + }, + "hard": { + "accuracy": 16.2, + "cost": 0.0155, + "robustness": 0 + }, + "all": { + "accuracy": 73.7, + "cost": 0.01, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 95.7, + "cost": 0.0034, + "robustness": 0 + }, + "medium": { + "accuracy": 33.3, + "cost": 0.0051, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.003, + "robustness": 0 + }, + "all": { + "accuracy": 78.7, + "cost": 0.0036, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 90.8, + "cost": 0.0101, + "robustness": 0 + }, + "medium": { + "accuracy": 77.8, + "cost": 0.0164, + "robustness": 0 + }, + "hard": { + "accuracy": 10.0, + "cost": 0.0147, + "robustness": 0 + }, + "all": { + "accuracy": 70.3, + "cost": 0.0135, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 98.8, + "cost": 0.0043, + "robustness": 0 + }, + "medium": { + "accuracy": 83.3, + "cost": 0.0128, + "robustness": 0 + }, + "hard": { + "accuracy": 35.1, + "cost": 0.028, + "robustness": 0 + }, + "all": { + "accuracy": 85.2, + "cost": 0.0102, + "robustness": 0 + } + }, + "subcategories": { + "Science": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.003, + "robustness": 0 + }, + "medium": { + "accuracy": 68.0, + "cost": 0.0085, + "robustness": 0 + }, + "hard": { + "accuracy": 11.9, + "cost": 0.0161, + "robustness": 0 + }, + "all": { + "accuracy": 47.4, + "cost": 0.011, + "robustness": 0 + } + } + }, + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 96.9, + "cost": 0.005, + "robustness": 0 + }, + "medium": { + "accuracy": 85.6, + "cost": 0.0156, + "robustness": 0 + }, + "hard": { + "accuracy": 50.6, + "cost": 0.0438, + "robustness": 0 + }, + "all": { + "accuracy": 85.4, + "cost": 0.0151, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0043, + "robustness": 0 + }, + "medium": { + "accuracy": 70.6, + "cost": 0.0093, + "robustness": 0 + }, + "hard": { + "accuracy": 44.4, + "cost": 0.0118, + "robustness": 0 + }, + "all": { + "accuracy": 93.0, + "cost": 0.0053, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0043, + "robustness": 0 + }, + "medium": { + "accuracy": 94.1, + "cost": 0.0099, + "robustness": 0 + }, + "hard": { + "accuracy": 15.4, + "cost": 0.018, + "robustness": 0 + }, + "all": { + "accuracy": 85.6, + "cost": 0.0084, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0052, + "robustness": 0 + }, + "medium": { + "accuracy": 88.1, + "cost": 0.0108, + "robustness": 0 + }, + "hard": { + "accuracy": 54.5, + "cost": 0.0129, + "robustness": 0 + }, + "all": { + "accuracy": 89.7, + "cost": 0.0085, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0028, + "robustness": 0 + }, + "medium": { + "accuracy": 80.0, + "cost": 0.007, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 96.0, + "cost": 0.0036, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 99.6, + "cost": 0.0036, + "robustness": 0 + }, + "medium": { + "accuracy": 74.6, + "cost": 0.0076, + "robustness": 0 + }, + "hard": { + "accuracy": 30.8, + "cost": 0.009, + "robustness": 0 + }, + "all": { + "accuracy": 92.4, + "cost": 0.0046, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 95.6, + "cost": 0.0187, + "robustness": 0 + }, + "medium": { + "accuracy": 74.1, + "cost": 0.0488, + "robustness": 0 + }, + "hard": { + "accuracy": 31.3, + "cost": 0.1064, + "robustness": 0 + }, + "all": { + "accuracy": 74.3, + "cost": 0.0481, + "robustness": 0 + } + }, + "subcategories": { + "Arts": { + "metrics": { + "easy": { + "accuracy": 98.3, + "cost": 0.0039, + "robustness": 0 + }, + "medium": { + "accuracy": 92.3, + "cost": 0.0117, + "robustness": 0 + }, + "hard": { + "accuracy": 32.1, + "cost": 0.0335, + "robustness": 0 + }, + "all": { + "accuracy": 72.7, + "cost": 0.0166, + "robustness": 0 + } + } + }, + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 93.6, + "cost": 0.033, + "robustness": 0 + }, + "medium": { + "accuracy": 82.0, + "cost": 0.0847, + "robustness": 0 + }, + "hard": { + "accuracy": 33.3, + "cost": 0.1953, + "robustness": 0 + }, + "all": { + "accuracy": 76.8, + "cost": 0.0849, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 97.0, + "cost": 0.0072, + "robustness": 0 + }, + "medium": { + "accuracy": 61.2, + "cost": 0.0305, + "robustness": 0 + }, + "hard": { + "accuracy": 24.0, + "cost": 0.0347, + "robustness": 0 + }, + "all": { + "accuracy": 72.1, + "cost": 0.0213, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 97.9, + "cost": 0.0035, + "robustness": 0 + }, + "medium": { + "accuracy": 81.9, + "cost": 0.0129, + "robustness": 0 + }, + "hard": { + "accuracy": 36.0, + "cost": 0.0191, + "robustness": 0 + }, + "all": { + "accuracy": 50.4, + "cost": 0.016, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 97.9, + "cost": 0.0035, + "robustness": 0 + }, + "medium": { + "accuracy": 81.9, + "cost": 0.0129, + "robustness": 0 + }, + "hard": { + "accuracy": 36.0, + "cost": 0.0191, + "robustness": 0 + }, + "all": { + "accuracy": 50.4, + "cost": 0.016, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 98.2, + "cost": 0.0042, + "robustness": 0 + }, + "medium": { + "accuracy": 78.0, + "cost": 0.008, + "robustness": 0 + }, + "hard": { + "accuracy": 26.2, + "cost": 0.0165, + "robustness": 0 + }, + "all": { + "accuracy": 76.6, + "cost": 0.0079, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0032, + "robustness": 0 + }, + "medium": { + "accuracy": 85.0, + "cost": 0.0064, + "robustness": 0 + }, + "hard": { + "accuracy": 58.3, + "cost": 0.0151, + "robustness": 0 + }, + "all": { + "accuracy": 92.4, + "cost": 0.0052, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 97.8, + "cost": 0.0047, + "robustness": 0 + }, + "medium": { + "accuracy": 75.5, + "cost": 0.0085, + "robustness": 0 + }, + "hard": { + "accuracy": 23.5, + "cost": 0.0167, + "robustness": 0 + }, + "all": { + "accuracy": 71.3, + "cost": 0.0091, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 97.6, + "cost": 0.0028, + "robustness": 0 + }, + "medium": { + "accuracy": 100.0, + "cost": 0.0029, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0028, + "robustness": 0 + }, + "all": { + "accuracy": 96.2, + "cost": 0.0028, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 85.6, + "cost": 0.0058, + "robustness": 0 + }, + "medium": { + "accuracy": 32.3, + "cost": 0.0076, + "robustness": 0 + }, + "hard": { + "accuracy": 54.0, + "cost": 0.0135, + "robustness": 0 + }, + "all": { + "accuracy": 58.3, + "cost": 0.0098, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 85.6, + "cost": 0.0058, + "robustness": 0 + }, + "medium": { + "accuracy": 32.3, + "cost": 0.0076, + "robustness": 0 + }, + "hard": { + "accuracy": 54.0, + "cost": 0.0135, + "robustness": 0 + }, + "all": { + "accuracy": 58.3, + "cost": 0.0098, + "robustness": 0 + } + } + } + } + }, + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 92.1, + "cost": 0.0044, + "robustness": 0 + }, + "medium": { + "accuracy": 61.0, + "cost": 0.0148, + "robustness": 0 + }, + "hard": { + "accuracy": 23.7, + "cost": 0.0501, + "robustness": 0 + }, + "all": { + "accuracy": 69.9, + "cost": 0.0158, + "robustness": 0 + } + }, + "subcategories": { + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 88.3, + "cost": 0.0049, + "robustness": 0 + }, + "medium": { + "accuracy": 58.9, + "cost": 0.0165, + "robustness": 0 + }, + "hard": { + "accuracy": 25.4, + "cost": 0.0574, + "robustness": 0 + }, + "all": { + "accuracy": 64.1, + "cost": 0.0202, + "robustness": 0 + } + } + }, + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 97.8, + "cost": 0.0036, + "robustness": 0 + }, + "medium": { + "accuracy": 70.6, + "cost": 0.0074, + "robustness": 0 + }, + "hard": { + "accuracy": 13.9, + "cost": 0.0075, + "robustness": 0 + }, + "all": { + "accuracy": 84.3, + "cost": 0.0048, + "robustness": 0 + } + } + } + } + } + } + }, + "azure": { + "metrics": { + "easy": { + "accuracy": 93.5, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 61.3, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 26.5, + "cost": 0.0009, + "robustness": 0 + }, + "all": { + "accuracy": 68.1, + "cost": 0.0005, + "robustness": 0 + } + }, + "categories": { + "Technology": { + "metrics": { + "easy": { + "accuracy": 93.7, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 70.3, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 23.9, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 78.3, + "cost": 0.0005, + "robustness": 0 + } + }, + "subcategories": { + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 92.8, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 61.0, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 15.0, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 76.1, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Engineering": { + "metrics": { + "easy": { + "accuracy": 98.4, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 83.8, + "cost": 0.001, + "robustness": 0 + }, + "hard": { + "accuracy": 47.6, + "cost": 0.0015, + "robustness": 0 + }, + "all": { + "accuracy": 84.8, + "cost": 0.0009, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 92.4, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 51.4, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 7.9, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 69.5, + "cost": 0.0003, + "robustness": 0 + } + }, + "subcategories": { + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 86.5, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 62.5, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 11.1, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 50.8, + "cost": 0.0005, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 95.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 77.1, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 5.6, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 81.8, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Ethics": { + "metrics": { + "easy": { + "accuracy": 91.8, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 31.8, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 69.4, + "cost": 0.0003, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 93.3, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 65.1, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 6.7, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 64.7, + "cost": 0.0005, + "robustness": 0 + } + }, + "subcategories": { + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 33.3, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 2.9, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 18.2, + "cost": 0.0006, + "robustness": 0 + } + } + }, + "Economics": { + "metrics": { + "easy": { + "accuracy": 93.4, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 70.6, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 7.5, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 68.5, + "cost": 0.0005, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 44.4, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 83.6, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 87.7, + "cost": 0.0004, + "robustness": 0 + }, + "medium": { + "accuracy": 62.9, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 10.0, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 63.1, + "cost": 0.0005, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 97.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 75.0, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 27.8, + "cost": 0.0012, + "robustness": 0 + }, + "all": { + "accuracy": 80.6, + "cost": 0.0005, + "robustness": 0 + } + }, + "subcategories": { + "Science": { + "metrics": { + "easy": { + "accuracy": 96.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 20.0, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 3.4, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 31.6, + "cost": 0.0005, + "robustness": 0 + } + } + }, + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 96.9, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 80.4, + "cost": 0.0008, + "robustness": 0 + }, + "hard": { + "accuracy": 40.4, + "cost": 0.0019, + "robustness": 0 + }, + "all": { + "accuracy": 81.7, + "cost": 0.0007, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 98.3, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 64.7, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 22.2, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 89.5, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 93.0, + "cost": 0.0004, + "robustness": 0 + }, + "medium": { + "accuracy": 91.2, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 30.8, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 83.3, + "cost": 0.0005, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 73.8, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 54.5, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 82.5, + "cost": 0.0005, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 60.0, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 92.0, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 96.9, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 71.2, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 30.8, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 89.7, + "cost": 0.0003, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 90.9, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 52.9, + "cost": 0.0012, + "robustness": 0 + }, + "hard": { + "accuracy": 10.9, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 60.3, + "cost": 0.001, + "robustness": 0 + } + }, + "subcategories": { + "Arts": { + "metrics": { + "easy": { + "accuracy": 94.9, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 51.3, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 5.4, + "cost": 0.0009, + "robustness": 0 + }, + "all": { + "accuracy": 51.3, + "cost": 0.0006, + "robustness": 0 + } + } + }, + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 87.9, + "cost": 0.0009, + "robustness": 0 + }, + "medium": { + "accuracy": 52.0, + "cost": 0.0016, + "robustness": 0 + }, + "hard": { + "accuracy": 15.2, + "cost": 0.0025, + "robustness": 0 + }, + "all": { + "accuracy": 60.5, + "cost": 0.0015, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 92.9, + "cost": 0.0004, + "robustness": 0 + }, + "medium": { + "accuracy": 54.3, + "cost": 0.0011, + "robustness": 0 + }, + "hard": { + "accuracy": 12.0, + "cost": 0.0012, + "robustness": 0 + }, + "all": { + "accuracy": 65.8, + "cost": 0.0008, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 93.1, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 45.1, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 24.8, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 38.0, + "cost": 0.0006, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 93.1, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 45.1, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 24.8, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 38.0, + "cost": 0.0006, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 97.0, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 55.5, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 4.0, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 65.3, + "cost": 0.0004, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 42.1, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 8.3, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 76.9, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 96.4, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 56.6, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 3.7, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 60.0, + "cost": 0.0004, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 66.7, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 92.3, + "cost": 0.0002, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 84.7, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 41.2, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 56.4, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 61.3, + "cost": 0.0004, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 84.7, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 41.2, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 56.4, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 61.3, + "cost": 0.0004, + "robustness": 0 + } + } + } + } + }, + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 91.9, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 58.2, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 31.8, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 70.3, + "cost": 0.0007, + "robustness": 0 + } + }, + "subcategories": { + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 89.7, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 58.6, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 36.4, + "cost": 0.0021, + "robustness": 0 + }, + "all": { + "accuracy": 67.0, + "cost": 0.0008, + "robustness": 0 + } + } + }, + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 95.3, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 56.5, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 5.6, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 78.8, + "cost": 0.0003, + "robustness": 0 + } + } + } + } + } + } + }, + "vllm": { + "metrics": { + "easy": { + "accuracy": 95.4, + "cost": 0.0016, + "robustness": 0 + }, + "medium": { + "accuracy": 58.2, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 21.5, + "cost": 0.0019, + "robustness": 0 + }, + "all": { + "accuracy": 67.3, + "cost": 0.0017, + "robustness": 0 + } + }, + "categories": { + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 95.4, + "cost": 0.0012, + "robustness": 0 + }, + "medium": { + "accuracy": 58.7, + "cost": 0.0018, + "robustness": 0 + }, + "hard": { + "accuracy": 7.3, + "cost": 0.0022, + "robustness": 0 + }, + "all": { + "accuracy": 67.9, + "cost": 0.0016, + "robustness": 0 + } + }, + "subcategories": { + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 94.5, + "cost": 0.001, + "robustness": 0 + }, + "medium": { + "accuracy": 57.6, + "cost": 0.0009, + "robustness": 0 + }, + "hard": { + "accuracy": 2.8, + "cost": 0.001, + "robustness": 0 + }, + "all": { + "accuracy": 78.3, + "cost": 0.001, + "robustness": 0 + } + } + }, + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 95.9, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 58.9, + "cost": 0.002, + "robustness": 0 + }, + "hard": { + "accuracy": 8.1, + "cost": 0.0024, + "robustness": 0 + }, + "all": { + "accuracy": 63.7, + "cost": 0.0018, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 91.9, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 54.0, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 4.9, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 70.4, + "cost": 0.0017, + "robustness": 0 + } + }, + "subcategories": { + "Ethics": { + "metrics": { + "easy": { + "accuracy": 88.1, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 37.6, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0014, + "robustness": 0 + }, + "all": { + "accuracy": 68.9, + "cost": 0.0015, + "robustness": 0 + } + } + }, + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 91.9, + "cost": 0.0016, + "robustness": 0 + }, + "medium": { + "accuracy": 62.5, + "cost": 0.0018, + "robustness": 0 + }, + "hard": { + "accuracy": 6.7, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 50.8, + "cost": 0.0017, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 97.5, + "cost": 0.002, + "robustness": 0 + }, + "medium": { + "accuracy": 70.8, + "cost": 0.002, + "robustness": 0 + }, + "hard": { + "accuracy": 5.6, + "cost": 0.0019, + "robustness": 0 + }, + "all": { + "accuracy": 81.8, + "cost": 0.002, + "robustness": 0 + } + } + }, + "Philosophical logic": { + "metrics": { + "easy": { + "accuracy": 92.3, + "cost": 0.0018, + "robustness": 0 + }, + "medium": { + "accuracy": 60.5, + "cost": 0.0014, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 76.9, + "cost": 0.0016, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 95.0, + "cost": 0.0011, + "robustness": 0 + }, + "medium": { + "accuracy": 60.9, + "cost": 0.0013, + "robustness": 0 + }, + "hard": { + "accuracy": 9.4, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 64.4, + "cost": 0.0013, + "robustness": 0 + } + }, + "subcategories": { + "Economics": { + "metrics": { + "easy": { + "accuracy": 94.5, + "cost": 0.001, + "robustness": 0 + }, + "medium": { + "accuracy": 60.1, + "cost": 0.0012, + "robustness": 0 + }, + "hard": { + "accuracy": 10.0, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 65.5, + "cost": 0.0012, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 95.4, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 65.1, + "cost": 0.0016, + "robustness": 0 + }, + "hard": { + "accuracy": 10.0, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 67.1, + "cost": 0.0015, + "robustness": 0 + } + } + }, + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0007, + "robustness": 0 + }, + "medium": { + "accuracy": 66.7, + "cost": 0.0015, + "robustness": 0 + }, + "hard": { + "accuracy": 5.9, + "cost": 0.0015, + "robustness": 0 + }, + "all": { + "accuracy": 30.9, + "cost": 0.0014, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 95.7, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 33.3, + "cost": 0.0016, + "robustness": 0 + }, + "hard": { + "accuracy": 20.0, + "cost": 0.0014, + "robustness": 0 + }, + "all": { + "accuracy": 80.3, + "cost": 0.0015, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 87.6, + "cost": 0.0011, + "robustness": 0 + }, + "medium": { + "accuracy": 34.7, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 50.1, + "cost": 0.0012, + "robustness": 0 + }, + "all": { + "accuracy": 57.6, + "cost": 0.0013, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 87.6, + "cost": 0.0011, + "robustness": 0 + }, + "medium": { + "accuracy": 34.7, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 50.1, + "cost": 0.0012, + "robustness": 0 + }, + "all": { + "accuracy": 57.6, + "cost": 0.0013, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 97.5, + "cost": 0.0015, + "robustness": 0 + }, + "medium": { + "accuracy": 61.8, + "cost": 0.0015, + "robustness": 0 + }, + "hard": { + "accuracy": 5.2, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 73.7, + "cost": 0.0016, + "robustness": 0 + } + }, + "subcategories": { + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 96.2, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 61.6, + "cost": 0.0016, + "robustness": 0 + }, + "hard": { + "accuracy": 3.4, + "cost": 0.0022, + "robustness": 0 + }, + "all": { + "accuracy": 68.1, + "cost": 0.0016, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 98.1, + "cost": 0.0012, + "robustness": 0 + }, + "medium": { + "accuracy": 66.1, + "cost": 0.0011, + "robustness": 0 + }, + "hard": { + "accuracy": 15.4, + "cost": 0.0011, + "robustness": 0 + }, + "all": { + "accuracy": 89.1, + "cost": 0.0012, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 98.3, + "cost": 0.0022, + "robustness": 0 + }, + "medium": { + "accuracy": 47.1, + "cost": 0.002, + "robustness": 0 + }, + "hard": { + "accuracy": 22.2, + "cost": 0.0022, + "robustness": 0 + }, + "all": { + "accuracy": 87.4, + "cost": 0.0022, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0016, + "robustness": 0 + }, + "medium": { + "accuracy": 61.8, + "cost": 0.0015, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0012, + "robustness": 0 + }, + "all": { + "accuracy": 70.0, + "cost": 0.0015, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0015, + "robustness": 0 + }, + "medium": { + "accuracy": 57.1, + "cost": 0.0016, + "robustness": 0 + }, + "hard": { + "accuracy": 9.1, + "cost": 0.0015, + "robustness": 0 + }, + "all": { + "accuracy": 70.1, + "cost": 0.0015, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 100.0, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 100.0, + "cost": 0.0017, + "robustness": 0 + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 64.0, + "cost": 0.0015, + "robustness": 0 + }, + "hard": { + "accuracy": 3.4, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 42.1, + "cost": 0.0016, + "robustness": 0 + } + } + } + } + }, + "Technology": { + "metrics": { + "easy": { + "accuracy": 96.1, + "cost": 0.002, + "robustness": 0 + }, + "medium": { + "accuracy": 60.7, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 8.3, + "cost": 0.0021, + "robustness": 0 + }, + "all": { + "accuracy": 75.6, + "cost": 0.0019, + "robustness": 0 + } + }, + "subcategories": { + "Engineering": { + "metrics": { + "easy": { + "accuracy": 93.7, + "cost": 0.0011, + "robustness": 0 + }, + "medium": { + "accuracy": 67.1, + "cost": 0.0013, + "robustness": 0 + }, + "hard": { + "accuracy": 11.9, + "cost": 0.0015, + "robustness": 0 + }, + "all": { + "accuracy": 70.1, + "cost": 0.0012, + "robustness": 0 + } + } + }, + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 96.2, + "cost": 0.0023, + "robustness": 0 + }, + "medium": { + "accuracy": 56.6, + "cost": 0.002, + "robustness": 0 + }, + "hard": { + "accuracy": 7.1, + "cost": 0.0023, + "robustness": 0 + }, + "all": { + "accuracy": 76.4, + "cost": 0.0022, + "robustness": 0 + } + } + }, + "Management and public relations": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0008, + "robustness": 0 + }, + "medium": { + "accuracy": 54.5, + "cost": 0.0008, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 90.4, + "cost": 0.0008, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 95.6, + "cost": 0.0018, + "robustness": 0 + }, + "medium": { + "accuracy": 51.8, + "cost": 0.0019, + "robustness": 0 + }, + "hard": { + "accuracy": 6.1, + "cost": 0.002, + "robustness": 0 + }, + "all": { + "accuracy": 60.9, + "cost": 0.0019, + "robustness": 0 + } + }, + "subcategories": { + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 94.3, + "cost": 0.0018, + "robustness": 0 + }, + "medium": { + "accuracy": 59.0, + "cost": 0.0023, + "robustness": 0 + }, + "hard": { + "accuracy": 6.1, + "cost": 0.0023, + "robustness": 0 + }, + "all": { + "accuracy": 63.7, + "cost": 0.0021, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 94.9, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 35.3, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 56.2, + "cost": 0.0017, + "robustness": 0 + } + } + }, + "Arts": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 82.1, + "cost": 0.0018, + "robustness": 0 + }, + "hard": { + "accuracy": 8.9, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 62.3, + "cost": 0.0017, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 98.2, + "cost": 0.0018, + "robustness": 0 + }, + "medium": { + "accuracy": 81.1, + "cost": 0.0018, + "robustness": 0 + }, + "hard": { + "accuracy": 33.7, + "cost": 0.0022, + "robustness": 0 + }, + "all": { + "accuracy": 48.7, + "cost": 0.0021, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 98.2, + "cost": 0.0018, + "robustness": 0 + }, + "medium": { + "accuracy": 81.1, + "cost": 0.0018, + "robustness": 0 + }, + "hard": { + "accuracy": 33.7, + "cost": 0.0022, + "robustness": 0 + }, + "all": { + "accuracy": 48.7, + "cost": 0.0021, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 97.1, + "cost": 0.0016, + "robustness": 0 + }, + "medium": { + "accuracy": 65.5, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 11.4, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 70.9, + "cost": 0.0016, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 96.3, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 75.9, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 33.3, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 87.3, + "cost": 0.0015, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 97.8, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 64.0, + "cost": 0.0017, + "robustness": 0 + }, + "hard": { + "accuracy": 9.6, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 64.3, + "cost": 0.0017, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 95.2, + "cost": 0.0016, + "robustness": 0 + }, + "medium": { + "accuracy": 55.6, + "cost": 0.0013, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 86.5, + "cost": 0.0016, + "robustness": 0 + } + } + } + } + } + } + }, + "mirt_bert": { + "metrics": { + "easy": { + "accuracy": 96.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 57.3, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 19.6, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 66.9, + "cost": 0.0002, + "robustness": 0 + } + }, + "categories": { + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 65.6, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 16.3, + "cost": 0.0008, + "robustness": 0 + }, + "all": { + "accuracy": 72.9, + "cost": 0.0003, + "robustness": 0 + } + }, + "subcategories": { + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 97.1, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 50.6, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 78.3, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 98.1, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 69.0, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 19.1, + "cost": 0.0009, + "robustness": 0 + }, + "all": { + "accuracy": 70.7, + "cost": 0.0004, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 96.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 48.8, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 1.2, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 71.0, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Ethics": { + "metrics": { + "easy": { + "accuracy": 96.2, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 32.9, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 72.8, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 94.6, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 45.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 2.2, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 44.3, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 95.9, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 68.8, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 79.7, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Philosophical logic": { + "metrics": { + "easy": { + "accuracy": 98.5, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 63.2, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 81.5, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 96.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 53.8, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 2.7, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 61.0, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Economics": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 54.6, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 1.2, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 62.9, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 93.8, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 52.4, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 6.7, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 60.8, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 66.7, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 2.9, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 29.1, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 95.7, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 22.2, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 77.0, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 86.6, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 34.1, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 51.4, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 57.8, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 86.6, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 34.1, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 51.4, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 57.8, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 98.1, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 66.0, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 4.6, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 75.2, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 67.2, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 5.6, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 71.2, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 99.2, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 64.4, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 89.1, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 98.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 70.6, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 88.8, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 73.5, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 74.4, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 95.5, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 69.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 9.1, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 74.2, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 60.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 92.0, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 96.7, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 40.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 5.1, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 36.8, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Technology": { + "metrics": { + "easy": { + "accuracy": 94.4, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 53.8, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 5.7, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 72.3, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Engineering": { + "metrics": { + "easy": { + "accuracy": 97.6, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 68.9, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 7.1, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 71.9, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 93.2, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 43.8, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 5.3, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 71.2, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Management and public relations": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 45.5, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 89.0, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 94.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 49.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 3.4, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 58.7, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 90.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 53.0, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 6.1, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 59.8, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 98.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 39.7, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 59.6, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Arts": { + "metrics": { + "easy": { + "accuracy": 98.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 66.7, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 1.8, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 55.2, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 99.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 77.3, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 27.2, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 43.7, + "cost": 0.0002, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 99.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 77.3, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 27.2, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 43.7, + "cost": 0.0002, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 64.4, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 6.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 69.6, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 72.4, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 33.3, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 89.3, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 96.9, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 62.6, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 3.7, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 61.8, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 92.9, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 66.7, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 86.5, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + } + } + }, + "nirt_bert": { + "metrics": { + "easy": { + "accuracy": 93.5, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 46.4, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 17.4, + "cost": 0.0009, + "robustness": 0 + }, + "all": { + "accuracy": 62.0, + "cost": 0.0007, + "robustness": 0 + } + }, + "categories": { + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 94.9, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 51.3, + "cost": 0.0009, + "robustness": 0 + }, + "hard": { + "accuracy": 6.5, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 65.1, + "cost": 0.0008, + "robustness": 0 + } + }, + "subcategories": { + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 96.0, + "cost": 0.0004, + "robustness": 0 + }, + "medium": { + "accuracy": 45.9, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 2.8, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 76.8, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 94.3, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 52.5, + "cost": 0.0011, + "robustness": 0 + }, + "hard": { + "accuracy": 7.2, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 60.5, + "cost": 0.001, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 93.1, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 47.9, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 3.7, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 69.1, + "cost": 0.0005, + "robustness": 0 + } + }, + "subcategories": { + "Ethics": { + "metrics": { + "easy": { + "accuracy": 89.7, + "cost": 0.0004, + "robustness": 0 + }, + "medium": { + "accuracy": 38.8, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 70.3, + "cost": 0.0004, + "robustness": 0 + } + } + }, + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 47.5, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 4.4, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 46.7, + "cost": 0.0005, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 98.3, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 56.2, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 5.6, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 78.6, + "cost": 0.0005, + "robustness": 0 + } + } + }, + "Philosophical logic": { + "metrics": { + "easy": { + "accuracy": 90.8, + "cost": 0.0007, + "robustness": 0 + }, + "medium": { + "accuracy": 57.9, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 75.0, + "cost": 0.0007, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 93.3, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 47.4, + "cost": 0.0008, + "robustness": 0 + }, + "hard": { + "accuracy": 5.4, + "cost": 0.001, + "robustness": 0 + }, + "all": { + "accuracy": 58.0, + "cost": 0.0008, + "robustness": 0 + } + }, + "subcategories": { + "Economics": { + "metrics": { + "easy": { + "accuracy": 94.0, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 48.5, + "cost": 0.0009, + "robustness": 0 + }, + "hard": { + "accuracy": 7.5, + "cost": 0.0014, + "robustness": 0 + }, + "all": { + "accuracy": 60.3, + "cost": 0.0009, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 89.2, + "cost": 0.0007, + "robustness": 0 + }, + "medium": { + "accuracy": 50.8, + "cost": 0.0009, + "robustness": 0 + }, + "hard": { + "accuracy": 6.7, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 58.2, + "cost": 0.0008, + "robustness": 0 + } + } + }, + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 44.4, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 20.0, + "cost": 0.0004, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 95.7, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 11.1, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 75.4, + "cost": 0.0003, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 84.7, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 26.3, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 51.9, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 55.6, + "cost": 0.0004, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 84.7, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 26.3, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 51.9, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 55.6, + "cost": 0.0004, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 94.8, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 55.8, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 1.0, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 69.8, + "cost": 0.0006, + "robustness": 0 + } + }, + "subcategories": { + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 91.2, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 57.6, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 1.1, + "cost": 0.0008, + "robustness": 0 + }, + "all": { + "accuracy": 63.9, + "cost": 0.0006, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 98.1, + "cost": 0.0004, + "robustness": 0 + }, + "medium": { + "accuracy": 61.0, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 87.6, + "cost": 0.0005, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 95.7, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 41.2, + "cost": 0.0009, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.001, + "robustness": 0 + }, + "all": { + "accuracy": 83.2, + "cost": 0.0007, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 88.4, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 55.9, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0008, + "robustness": 0 + }, + "all": { + "accuracy": 63.3, + "cost": 0.0006, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 57.1, + "cost": 0.0008, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 69.1, + "cost": 0.0007, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 95.0, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 40.0, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 84.0, + "cost": 0.0006, + "robustness": 0 + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0004, + "robustness": 0 + }, + "medium": { + "accuracy": 36.0, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 1.7, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 35.1, + "cost": 0.0004, + "robustness": 0 + } + } + } + } + }, + "Technology": { + "metrics": { + "easy": { + "accuracy": 93.3, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 47.1, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 5.1, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 69.6, + "cost": 0.0007, + "robustness": 0 + } + }, + "subcategories": { + "Engineering": { + "metrics": { + "easy": { + "accuracy": 94.4, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 57.5, + "cost": 0.0009, + "robustness": 0 + }, + "hard": { + "accuracy": 7.1, + "cost": 0.0008, + "robustness": 0 + }, + "all": { + "accuracy": 65.1, + "cost": 0.0007, + "robustness": 0 + } + } + }, + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 92.6, + "cost": 0.0007, + "robustness": 0 + }, + "medium": { + "accuracy": 41.3, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 4.4, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 70.1, + "cost": 0.0007, + "robustness": 0 + } + } + }, + "Management and public relations": { + "metrics": { + "easy": { + "accuracy": 98.3, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 18.2, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 83.6, + "cost": 0.0003, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 91.9, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 32.2, + "cost": 0.0008, + "robustness": 0 + }, + "hard": { + "accuracy": 1.4, + "cost": 0.001, + "robustness": 0 + }, + "all": { + "accuracy": 51.1, + "cost": 0.0007, + "robustness": 0 + } + }, + "subcategories": { + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 88.6, + "cost": 0.0007, + "robustness": 0 + }, + "medium": { + "accuracy": 41.0, + "cost": 0.0013, + "robustness": 0 + }, + "hard": { + "accuracy": 3.0, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 54.6, + "cost": 0.0011, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 97.0, + "cost": 0.0004, + "robustness": 0 + }, + "medium": { + "accuracy": 25.9, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 52.5, + "cost": 0.0004, + "robustness": 0 + } + } + }, + "Arts": { + "metrics": { + "easy": { + "accuracy": 91.5, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 28.2, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 42.2, + "cost": 0.0004, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 95.6, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 48.9, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 25.3, + "cost": 0.0012, + "robustness": 0 + }, + "all": { + "accuracy": 38.9, + "cost": 0.001, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 95.6, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 48.9, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 25.3, + "cost": 0.0012, + "robustness": 0 + }, + "all": { + "accuracy": 38.9, + "cost": 0.001, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 94.9, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 44.6, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 2.0, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 62.4, + "cost": 0.0006, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 96.3, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 44.8, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 78.7, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 94.6, + "cost": 0.0008, + "robustness": 0 + }, + "medium": { + "accuracy": 44.6, + "cost": 0.0008, + "robustness": 0 + }, + "hard": { + "accuracy": 2.2, + "cost": 0.0008, + "robustness": 0 + }, + "all": { + "accuracy": 55.4, + "cost": 0.0008, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 92.9, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 44.4, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0008, + "robustness": 0 + }, + "all": { + "accuracy": 82.7, + "cost": 0.0003, + "robustness": 0 + } + } + } + } + } + } + }, + "routellm": { + "metrics": { + "easy": { + "accuracy": 79.5, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 24.1, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 9.5, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 47.0, + "cost": 0.0003, + "robustness": 0 + } + }, + "categories": { + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 74.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 16.2, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 0.8, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 42.5, + "cost": 0.0004, + "robustness": 0 + } + }, + "subcategories": { + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 85.8, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 35.3, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 2.8, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 67.4, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 67.5, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 11.9, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 0.5, + "cost": 0.0008, + "robustness": 0 + }, + "all": { + "accuracy": 32.7, + "cost": 0.0005, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 79.9, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 20.4, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 52.7, + "cost": 0.0002, + "robustness": 0 + } + }, + "subcategories": { + "Ethics": { + "metrics": { + "easy": { + "accuracy": 82.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 15.3, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 58.7, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 75.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 30.0, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 32.8, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 75.2, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 18.8, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 53.5, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Philosophical logic": { + "metrics": { + "easy": { + "accuracy": 83.1, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 23.7, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 58.3, + "cost": 0.0002, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 75.5, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 20.2, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 2.7, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 40.0, + "cost": 0.0003, + "robustness": 0 + } + }, + "subcategories": { + "Economics": { + "metrics": { + "easy": { + "accuracy": 71.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 21.5, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 2.5, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 39.2, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 72.3, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 11.1, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 3.3, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 34.8, + "cost": 0.0004, + "robustness": 0 + } + } + }, + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 33.3, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 2.9, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 18.2, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 95.7, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 33.3, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 78.7, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 69.9, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 58.7, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 30.7, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 49.1, + "cost": 0.0003, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 69.9, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 58.7, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 30.7, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 49.1, + "cost": 0.0003, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 73.6, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 16.7, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 1.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 46.0, + "cost": 0.0002, + "robustness": 0 + } + }, + "subcategories": { + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 50.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 13.6, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 1.1, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 27.6, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 90.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 23.7, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 75.2, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 88.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 23.5, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 74.8, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 62.8, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 8.8, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 7.7, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 34.4, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 68.2, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 23.8, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 41.2, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 90.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 40.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 80.0, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 90.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 20.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 28.1, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Technology": { + "metrics": { + "easy": { + "accuracy": 86.3, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 25.2, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 3.8, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 58.7, + "cost": 0.0002, + "robustness": 0 + } + }, + "subcategories": { + "Engineering": { + "metrics": { + "easy": { + "accuracy": 70.6, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 24.6, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 9.5, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 40.0, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 88.4, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 25.2, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.9, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 63.0, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Management and public relations": { + "metrics": { + "easy": { + "accuracy": 96.7, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 36.4, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 50.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 86.3, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 84.9, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 29.0, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 0.7, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 46.9, + "cost": 0.0002, + "robustness": 0 + } + }, + "subcategories": { + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 77.9, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 30.0, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 1.5, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 45.8, + "cost": 0.0004, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 88.9, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 31.9, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 52.1, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Arts": { + "metrics": { + "easy": { + "accuracy": 94.9, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 17.9, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 40.9, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 90.8, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 23.4, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 13.5, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 26.9, + "cost": 0.0004, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 90.8, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 23.4, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 13.5, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 26.9, + "cost": 0.0004, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 86.4, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 31.1, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 2.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 54.4, + "cost": 0.0002, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 94.5, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 41.4, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 76.7, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 82.1, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 28.8, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 1.5, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 45.2, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 88.1, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 33.3, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 100.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 78.8, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + } + } + }, + "routerbench_knn": { + "metrics": { + "easy": { + "accuracy": 89.3, + "cost": 0.0031, + "robustness": 0 + }, + "medium": { + "accuracy": 42.2, + "cost": 0.0051, + "robustness": 0 + }, + "hard": { + "accuracy": 17.1, + "cost": 0.0057, + "robustness": 0 + }, + "all": { + "accuracy": 58.7, + "cost": 0.0043, + "robustness": 0 + } + }, + "categories": { + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 87.0, + "cost": 0.0024, + "robustness": 0 + }, + "medium": { + "accuracy": 39.6, + "cost": 0.0067, + "robustness": 0 + }, + "hard": { + "accuracy": 1.6, + "cost": 0.0111, + "robustness": 0 + }, + "all": { + "accuracy": 56.4, + "cost": 0.0053, + "robustness": 0 + } + }, + "subcategories": { + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 81.5, + "cost": 0.0013, + "robustness": 0 + }, + "medium": { + "accuracy": 36.5, + "cost": 0.0013, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0012, + "robustness": 0 + }, + "all": { + "accuracy": 64.4, + "cost": 0.0013, + "robustness": 0 + } + } + }, + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 90.7, + "cost": 0.0031, + "robustness": 0 + }, + "medium": { + "accuracy": 40.3, + "cost": 0.0079, + "robustness": 0 + }, + "hard": { + "accuracy": 1.9, + "cost": 0.0128, + "robustness": 0 + }, + "all": { + "accuracy": 53.3, + "cost": 0.0069, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 89.0, + "cost": 0.0033, + "robustness": 0 + }, + "medium": { + "accuracy": 38.4, + "cost": 0.0038, + "robustness": 0 + }, + "hard": { + "accuracy": 3.7, + "cost": 0.0026, + "robustness": 0 + }, + "all": { + "accuracy": 63.9, + "cost": 0.0034, + "robustness": 0 + } + }, + "subcategories": { + "Ethics": { + "metrics": { + "easy": { + "accuracy": 84.3, + "cost": 0.0032, + "robustness": 0 + }, + "medium": { + "accuracy": 30.6, + "cost": 0.004, + "robustness": 0 + }, + "hard": { + "accuracy": 7.7, + "cost": 0.0034, + "robustness": 0 + }, + "all": { + "accuracy": 64.7, + "cost": 0.0035, + "robustness": 0 + } + } + }, + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0024, + "robustness": 0 + }, + "medium": { + "accuracy": 37.5, + "cost": 0.0032, + "robustness": 0 + }, + "hard": { + "accuracy": 4.4, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 43.4, + "cost": 0.0024, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 91.7, + "cost": 0.0035, + "robustness": 0 + }, + "medium": { + "accuracy": 56.2, + "cost": 0.0037, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0036, + "robustness": 0 + }, + "all": { + "accuracy": 73.8, + "cost": 0.0035, + "robustness": 0 + } + } + }, + "Philosophical logic": { + "metrics": { + "easy": { + "accuracy": 92.3, + "cost": 0.0036, + "robustness": 0 + }, + "medium": { + "accuracy": 34.2, + "cost": 0.0041, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.004, + "robustness": 0 + }, + "all": { + "accuracy": 67.6, + "cost": 0.0038, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 87.9, + "cost": 0.0037, + "robustness": 0 + }, + "medium": { + "accuracy": 41.5, + "cost": 0.0058, + "robustness": 0 + }, + "hard": { + "accuracy": 2.0, + "cost": 0.0065, + "robustness": 0 + }, + "all": { + "accuracy": 52.9, + "cost": 0.0051, + "robustness": 0 + } + }, + "subcategories": { + "Economics": { + "metrics": { + "easy": { + "accuracy": 84.7, + "cost": 0.004, + "robustness": 0 + }, + "medium": { + "accuracy": 40.5, + "cost": 0.0068, + "robustness": 0 + }, + "hard": { + "accuracy": 3.8, + "cost": 0.0097, + "robustness": 0 + }, + "all": { + "accuracy": 52.6, + "cost": 0.0062, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 90.8, + "cost": 0.0044, + "robustness": 0 + }, + "medium": { + "accuracy": 44.4, + "cost": 0.005, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0048, + "robustness": 0 + }, + "all": { + "accuracy": 55.1, + "cost": 0.0047, + "robustness": 0 + } + } + }, + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0012, + "robustness": 0 + }, + "medium": { + "accuracy": 50.0, + "cost": 0.0011, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0011, + "robustness": 0 + }, + "all": { + "accuracy": 21.8, + "cost": 0.0011, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 95.7, + "cost": 0.0018, + "robustness": 0 + }, + "medium": { + "accuracy": 22.2, + "cost": 0.0019, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 77.0, + "cost": 0.0018, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 86.1, + "cost": 0.0021, + "robustness": 0 + }, + "medium": { + "accuracy": 46.7, + "cost": 0.0034, + "robustness": 0 + }, + "hard": { + "accuracy": 49.4, + "cost": 0.0037, + "robustness": 0 + }, + "all": { + "accuracy": 59.7, + "cost": 0.0032, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 86.1, + "cost": 0.0021, + "robustness": 0 + }, + "medium": { + "accuracy": 46.7, + "cost": 0.0034, + "robustness": 0 + }, + "hard": { + "accuracy": 49.4, + "cost": 0.0037, + "robustness": 0 + }, + "all": { + "accuracy": 59.7, + "cost": 0.0032, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 91.3, + "cost": 0.0029, + "robustness": 0 + }, + "medium": { + "accuracy": 39.6, + "cost": 0.0054, + "robustness": 0 + }, + "hard": { + "accuracy": 3.6, + "cost": 0.0072, + "robustness": 0 + }, + "all": { + "accuracy": 63.2, + "cost": 0.0043, + "robustness": 0 + } + }, + "subcategories": { + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 89.3, + "cost": 0.0039, + "robustness": 0 + }, + "medium": { + "accuracy": 40.4, + "cost": 0.0069, + "robustness": 0 + }, + "hard": { + "accuracy": 4.5, + "cost": 0.013, + "robustness": 0 + }, + "all": { + "accuracy": 56.4, + "cost": 0.0065, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 91.5, + "cost": 0.0013, + "robustness": 0 + }, + "medium": { + "accuracy": 22.0, + "cost": 0.001, + "robustness": 0 + }, + "hard": { + "accuracy": 15.4, + "cost": 0.0009, + "robustness": 0 + }, + "all": { + "accuracy": 76.1, + "cost": 0.0012, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 93.2, + "cost": 0.0043, + "robustness": 0 + }, + "medium": { + "accuracy": 29.4, + "cost": 0.0054, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.005, + "robustness": 0 + }, + "all": { + "accuracy": 79.7, + "cost": 0.0045, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 95.3, + "cost": 0.0044, + "robustness": 0 + }, + "medium": { + "accuracy": 52.9, + "cost": 0.0059, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0051, + "robustness": 0 + }, + "all": { + "accuracy": 65.6, + "cost": 0.005, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 90.9, + "cost": 0.003, + "robustness": 0 + }, + "medium": { + "accuracy": 50.0, + "cost": 0.0056, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0043, + "robustness": 0 + }, + "all": { + "accuracy": 62.9, + "cost": 0.0043, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 85.0, + "cost": 0.0014, + "robustness": 0 + }, + "medium": { + "accuracy": 60.0, + "cost": 0.0014, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 80.0, + "cost": 0.0014, + "robustness": 0 + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 40.0, + "cost": 0.0012, + "robustness": 0 + }, + "hard": { + "accuracy": 1.7, + "cost": 0.0013, + "robustness": 0 + }, + "all": { + "accuracy": 36.0, + "cost": 0.0014, + "robustness": 0 + } + } + } + } + }, + "Technology": { + "metrics": { + "easy": { + "accuracy": 87.2, + "cost": 0.0039, + "robustness": 0 + }, + "medium": { + "accuracy": 33.8, + "cost": 0.0051, + "robustness": 0 + }, + "hard": { + "accuracy": 3.2, + "cost": 0.0047, + "robustness": 0 + }, + "all": { + "accuracy": 61.8, + "cost": 0.0043, + "robustness": 0 + } + }, + "subcategories": { + "Engineering": { + "metrics": { + "easy": { + "accuracy": 84.9, + "cost": 0.0039, + "robustness": 0 + }, + "medium": { + "accuracy": 35.3, + "cost": 0.0075, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0073, + "robustness": 0 + }, + "all": { + "accuracy": 49.6, + "cost": 0.0061, + "robustness": 0 + } + } + }, + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 87.4, + "cost": 0.0042, + "robustness": 0 + }, + "medium": { + "accuracy": 33.5, + "cost": 0.0036, + "robustness": 0 + }, + "hard": { + "accuracy": 4.4, + "cost": 0.0038, + "robustness": 0 + }, + "all": { + "accuracy": 64.8, + "cost": 0.004, + "robustness": 0 + } + } + }, + "Management and public relations": { + "metrics": { + "easy": { + "accuracy": 90.0, + "cost": 0.0012, + "robustness": 0 + }, + "medium": { + "accuracy": 18.2, + "cost": 0.0009, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.001, + "robustness": 0 + }, + "all": { + "accuracy": 76.7, + "cost": 0.0012, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 90.9, + "cost": 0.0028, + "robustness": 0 + }, + "medium": { + "accuracy": 47.8, + "cost": 0.0046, + "robustness": 0 + }, + "hard": { + "accuracy": 7.5, + "cost": 0.0059, + "robustness": 0 + }, + "all": { + "accuracy": 57.7, + "cost": 0.0041, + "robustness": 0 + } + }, + "subcategories": { + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 89.3, + "cost": 0.0039, + "robustness": 0 + }, + "medium": { + "accuracy": 61.0, + "cost": 0.0082, + "robustness": 0 + }, + "hard": { + "accuracy": 6.1, + "cost": 0.0107, + "robustness": 0 + }, + "all": { + "accuracy": 62.1, + "cost": 0.0068, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 89.9, + "cost": 0.0021, + "robustness": 0 + }, + "medium": { + "accuracy": 30.2, + "cost": 0.0027, + "robustness": 0 + }, + "hard": { + "accuracy": 16.0, + "cost": 0.003, + "robustness": 0 + }, + "all": { + "accuracy": 53.3, + "cost": 0.0025, + "robustness": 0 + } + } + }, + "Arts": { + "metrics": { + "easy": { + "accuracy": 96.6, + "cost": 0.0016, + "robustness": 0 + }, + "medium": { + "accuracy": 66.7, + "cost": 0.0013, + "robustness": 0 + }, + "hard": { + "accuracy": 5.4, + "cost": 0.0014, + "robustness": 0 + }, + "all": { + "accuracy": 55.8, + "cost": 0.0015, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 95.0, + "cost": 0.0027, + "robustness": 0 + }, + "medium": { + "accuracy": 71.7, + "cost": 0.0029, + "robustness": 0 + }, + "hard": { + "accuracy": 25.6, + "cost": 0.0051, + "robustness": 0 + }, + "all": { + "accuracy": 41.3, + "cost": 0.0045, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 95.0, + "cost": 0.0027, + "robustness": 0 + }, + "medium": { + "accuracy": 71.7, + "cost": 0.0029, + "robustness": 0 + }, + "hard": { + "accuracy": 25.6, + "cost": 0.0051, + "robustness": 0 + }, + "all": { + "accuracy": 41.3, + "cost": 0.0045, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 93.6, + "cost": 0.0029, + "robustness": 0 + }, + "medium": { + "accuracy": 57.6, + "cost": 0.0034, + "robustness": 0 + }, + "hard": { + "accuracy": 6.7, + "cost": 0.0034, + "robustness": 0 + }, + "all": { + "accuracy": 66.0, + "cost": 0.0031, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 89.9, + "cost": 0.001, + "robustness": 0 + }, + "medium": { + "accuracy": 58.6, + "cost": 0.0013, + "robustness": 0 + }, + "hard": { + "accuracy": 25.0, + "cost": 0.0011, + "robustness": 0 + }, + "all": { + "accuracy": 78.7, + "cost": 0.0011, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 94.6, + "cost": 0.004, + "robustness": 0 + }, + "medium": { + "accuracy": 57.6, + "cost": 0.004, + "robustness": 0 + }, + "hard": { + "accuracy": 5.1, + "cost": 0.0036, + "robustness": 0 + }, + "all": { + "accuracy": 59.8, + "cost": 0.0039, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 97.6, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 55.6, + "cost": 0.0016, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 88.5, + "cost": 0.0017, + "robustness": 0 + } + } + } + } + } + } + }, + "routerbench_mlp": { + "metrics": { + "easy": { + "accuracy": 93.5, + "cost": 0.0036, + "robustness": 0 + }, + "medium": { + "accuracy": 45.5, + "cost": 0.0057, + "robustness": 0 + }, + "hard": { + "accuracy": 17.0, + "cost": 0.0064, + "robustness": 0 + }, + "all": { + "accuracy": 61.6, + "cost": 0.0048, + "robustness": 0 + } + }, + "categories": { + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 94.8, + "cost": 0.0032, + "robustness": 0 + }, + "medium": { + "accuracy": 43.7, + "cost": 0.0081, + "robustness": 0 + }, + "hard": { + "accuracy": 1.6, + "cost": 0.0142, + "robustness": 0 + }, + "all": { + "accuracy": 61.6, + "cost": 0.0068, + "robustness": 0 + } + }, + "subcategories": { + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 94.2, + "cost": 0.0024, + "robustness": 0 + }, + "medium": { + "accuracy": 47.1, + "cost": 0.0021, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0023, + "robustness": 0 + }, + "all": { + "accuracy": 75.5, + "cost": 0.0023, + "robustness": 0 + } + } + }, + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 95.2, + "cost": 0.0037, + "robustness": 0 + }, + "medium": { + "accuracy": 43.0, + "cost": 0.0095, + "robustness": 0 + }, + "hard": { + "accuracy": 1.9, + "cost": 0.0163, + "robustness": 0 + }, + "all": { + "accuracy": 56.2, + "cost": 0.0085, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 90.0, + "cost": 0.0035, + "robustness": 0 + }, + "medium": { + "accuracy": 39.3, + "cost": 0.0039, + "robustness": 0 + }, + "hard": { + "accuracy": 3.7, + "cost": 0.0025, + "robustness": 0 + }, + "all": { + "accuracy": 64.7, + "cost": 0.0035, + "robustness": 0 + } + }, + "subcategories": { + "Ethics": { + "metrics": { + "easy": { + "accuracy": 84.3, + "cost": 0.0032, + "robustness": 0 + }, + "medium": { + "accuracy": 30.6, + "cost": 0.004, + "robustness": 0 + }, + "hard": { + "accuracy": 7.7, + "cost": 0.0034, + "robustness": 0 + }, + "all": { + "accuracy": 64.7, + "cost": 0.0035, + "robustness": 0 + } + } + }, + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0028, + "robustness": 0 + }, + "medium": { + "accuracy": 40.0, + "cost": 0.0033, + "robustness": 0 + }, + "hard": { + "accuracy": 4.4, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 44.3, + "cost": 0.0026, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 95.0, + "cost": 0.004, + "robustness": 0 + }, + "medium": { + "accuracy": 58.3, + "cost": 0.0039, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0035, + "robustness": 0 + }, + "all": { + "accuracy": 76.5, + "cost": 0.0039, + "robustness": 0 + } + } + }, + "Philosophical logic": { + "metrics": { + "easy": { + "accuracy": 92.3, + "cost": 0.0036, + "robustness": 0 + }, + "medium": { + "accuracy": 34.2, + "cost": 0.0041, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.004, + "robustness": 0 + }, + "all": { + "accuracy": 67.6, + "cost": 0.0038, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 93.3, + "cost": 0.0043, + "robustness": 0 + }, + "medium": { + "accuracy": 46.6, + "cost": 0.0063, + "robustness": 0 + }, + "hard": { + "accuracy": 3.4, + "cost": 0.0067, + "robustness": 0 + }, + "all": { + "accuracy": 57.3, + "cost": 0.0055, + "robustness": 0 + } + }, + "subcategories": { + "Economics": { + "metrics": { + "easy": { + "accuracy": 91.8, + "cost": 0.0048, + "robustness": 0 + }, + "medium": { + "accuracy": 46.0, + "cost": 0.0072, + "robustness": 0 + }, + "hard": { + "accuracy": 5.0, + "cost": 0.01, + "robustness": 0 + }, + "all": { + "accuracy": 58.0, + "cost": 0.0067, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 93.8, + "cost": 0.0048, + "robustness": 0 + }, + "medium": { + "accuracy": 49.2, + "cost": 0.006, + "robustness": 0 + }, + "hard": { + "accuracy": 3.3, + "cost": 0.0052, + "robustness": 0 + }, + "all": { + "accuracy": 58.9, + "cost": 0.0053, + "robustness": 0 + } + } + }, + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0012, + "robustness": 0 + }, + "medium": { + "accuracy": 55.6, + "cost": 0.0012, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0011, + "robustness": 0 + }, + "all": { + "accuracy": 23.6, + "cost": 0.0012, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 97.9, + "cost": 0.0019, + "robustness": 0 + }, + "medium": { + "accuracy": 22.2, + "cost": 0.0019, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0017, + "robustness": 0 + }, + "all": { + "accuracy": 78.7, + "cost": 0.0019, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 88.0, + "cost": 0.0023, + "robustness": 0 + }, + "medium": { + "accuracy": 46.1, + "cost": 0.0036, + "robustness": 0 + }, + "hard": { + "accuracy": 49.5, + "cost": 0.0038, + "robustness": 0 + }, + "all": { + "accuracy": 60.2, + "cost": 0.0033, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 88.0, + "cost": 0.0023, + "robustness": 0 + }, + "medium": { + "accuracy": 46.1, + "cost": 0.0036, + "robustness": 0 + }, + "hard": { + "accuracy": 49.5, + "cost": 0.0038, + "robustness": 0 + }, + "all": { + "accuracy": 60.2, + "cost": 0.0033, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 95.9, + "cost": 0.0036, + "robustness": 0 + }, + "medium": { + "accuracy": 44.7, + "cost": 0.006, + "robustness": 0 + }, + "hard": { + "accuracy": 3.1, + "cost": 0.0074, + "robustness": 0 + }, + "all": { + "accuracy": 67.2, + "cost": 0.0048, + "robustness": 0 + } + }, + "subcategories": { + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 92.4, + "cost": 0.0042, + "robustness": 0 + }, + "medium": { + "accuracy": 44.4, + "cost": 0.0073, + "robustness": 0 + }, + "hard": { + "accuracy": 3.4, + "cost": 0.0131, + "robustness": 0 + }, + "all": { + "accuracy": 59.2, + "cost": 0.0068, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0027, + "robustness": 0 + }, + "medium": { + "accuracy": 37.3, + "cost": 0.0027, + "robustness": 0 + }, + "hard": { + "accuracy": 15.4, + "cost": 0.0023, + "robustness": 0 + }, + "all": { + "accuracy": 83.6, + "cost": 0.0027, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 97.4, + "cost": 0.0046, + "robustness": 0 + }, + "medium": { + "accuracy": 29.4, + "cost": 0.006, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.005, + "robustness": 0 + }, + "all": { + "accuracy": 83.2, + "cost": 0.0048, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 97.7, + "cost": 0.0043, + "robustness": 0 + }, + "medium": { + "accuracy": 55.9, + "cost": 0.0061, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0054, + "robustness": 0 + }, + "all": { + "accuracy": 67.8, + "cost": 0.0051, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 95.5, + "cost": 0.0034, + "robustness": 0 + }, + "medium": { + "accuracy": 54.8, + "cost": 0.0059, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0044, + "robustness": 0 + }, + "all": { + "accuracy": 67.0, + "cost": 0.0046, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 60.0, + "cost": 0.0014, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 92.0, + "cost": 0.0016, + "robustness": 0 + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0017, + "robustness": 0 + }, + "medium": { + "accuracy": 40.0, + "cost": 0.0012, + "robustness": 0 + }, + "hard": { + "accuracy": 1.7, + "cost": 0.0013, + "robustness": 0 + }, + "all": { + "accuracy": 36.0, + "cost": 0.0014, + "robustness": 0 + } + } + } + } + }, + "Technology": { + "metrics": { + "easy": { + "accuracy": 91.6, + "cost": 0.0043, + "robustness": 0 + }, + "medium": { + "accuracy": 38.1, + "cost": 0.0058, + "robustness": 0 + }, + "hard": { + "accuracy": 3.8, + "cost": 0.0052, + "robustness": 0 + }, + "all": { + "accuracy": 65.7, + "cost": 0.0049, + "robustness": 0 + } + }, + "subcategories": { + "Engineering": { + "metrics": { + "easy": { + "accuracy": 90.5, + "cost": 0.0046, + "robustness": 0 + }, + "medium": { + "accuracy": 38.3, + "cost": 0.0086, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0081, + "robustness": 0 + }, + "all": { + "accuracy": 53.1, + "cost": 0.007, + "robustness": 0 + } + } + }, + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 91.4, + "cost": 0.0045, + "robustness": 0 + }, + "medium": { + "accuracy": 37.6, + "cost": 0.004, + "robustness": 0 + }, + "hard": { + "accuracy": 5.3, + "cost": 0.0042, + "robustness": 0 + }, + "all": { + "accuracy": 68.4, + "cost": 0.0043, + "robustness": 0 + } + } + }, + "Management and public relations": { + "metrics": { + "easy": { + "accuracy": 96.7, + "cost": 0.0022, + "robustness": 0 + }, + "medium": { + "accuracy": 45.5, + "cost": 0.0022, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0024, + "robustness": 0 + }, + "all": { + "accuracy": 86.3, + "cost": 0.0022, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 94.0, + "cost": 0.0031, + "robustness": 0 + }, + "medium": { + "accuracy": 49.4, + "cost": 0.0048, + "robustness": 0 + }, + "hard": { + "accuracy": 7.5, + "cost": 0.0059, + "robustness": 0 + }, + "all": { + "accuracy": 59.6, + "cost": 0.0043, + "robustness": 0 + } + }, + "subcategories": { + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 91.4, + "cost": 0.004, + "robustness": 0 + }, + "medium": { + "accuracy": 63.0, + "cost": 0.0083, + "robustness": 0 + }, + "hard": { + "accuracy": 6.1, + "cost": 0.0107, + "robustness": 0 + }, + "all": { + "accuracy": 63.7, + "cost": 0.0069, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 93.9, + "cost": 0.0027, + "robustness": 0 + }, + "medium": { + "accuracy": 30.2, + "cost": 0.0029, + "robustness": 0 + }, + "hard": { + "accuracy": 16.0, + "cost": 0.0033, + "robustness": 0 + }, + "all": { + "accuracy": 55.0, + "cost": 0.0029, + "robustness": 0 + } + } + }, + "Arts": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0018, + "robustness": 0 + }, + "medium": { + "accuracy": 71.8, + "cost": 0.0014, + "robustness": 0 + }, + "hard": { + "accuracy": 5.4, + "cost": 0.0014, + "robustness": 0 + }, + "all": { + "accuracy": 58.4, + "cost": 0.0016, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 95.4, + "cost": 0.0028, + "robustness": 0 + }, + "medium": { + "accuracy": 65.3, + "cost": 0.0024, + "robustness": 0 + }, + "hard": { + "accuracy": 24.7, + "cost": 0.0057, + "robustness": 0 + }, + "all": { + "accuracy": 40.1, + "cost": 0.0049, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 95.4, + "cost": 0.0028, + "robustness": 0 + }, + "medium": { + "accuracy": 65.3, + "cost": 0.0024, + "robustness": 0 + }, + "hard": { + "accuracy": 24.7, + "cost": 0.0057, + "robustness": 0 + }, + "all": { + "accuracy": 40.1, + "cost": 0.0049, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 96.0, + "cost": 0.0032, + "robustness": 0 + }, + "medium": { + "accuracy": 62.1, + "cost": 0.0038, + "robustness": 0 + }, + "hard": { + "accuracy": 6.7, + "cost": 0.0036, + "robustness": 0 + }, + "all": { + "accuracy": 68.4, + "cost": 0.0035, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 95.4, + "cost": 0.0013, + "robustness": 0 + }, + "medium": { + "accuracy": 58.6, + "cost": 0.0013, + "robustness": 0 + }, + "hard": { + "accuracy": 25.0, + "cost": 0.0011, + "robustness": 0 + }, + "all": { + "accuracy": 82.7, + "cost": 0.0013, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 96.0, + "cost": 0.0044, + "robustness": 0 + }, + "medium": { + "accuracy": 62.6, + "cost": 0.0045, + "robustness": 0 + }, + "hard": { + "accuracy": 5.1, + "cost": 0.0038, + "robustness": 0 + }, + "all": { + "accuracy": 61.8, + "cost": 0.0043, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 97.6, + "cost": 0.0019, + "robustness": 0 + }, + "medium": { + "accuracy": 66.7, + "cost": 0.0018, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 90.4, + "cost": 0.0019, + "robustness": 0 + } + } + } + } + } + } + }, "routerdc": { + "metrics": { + "easy": { + "accuracy": 54.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 14.4, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 8.7, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 32.0, + "cost": 0.0001, + "robustness": 0 + } + }, "categories": { - "math": { - "easy": { "accuracy": 41.2, "robustness": 98.1 }, - "medium": { "accuracy": 33.5, "robustness": 94.0 }, - "hard": { "accuracy": 21.0, "robustness": 90.3 } + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 55.3, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 11.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 1.2, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 31.2, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 58.2, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 16.5, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 8.3, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 44.7, + "cost": 0.0, + "robustness": 0 + } + } + }, + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 53.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 9.8, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 25.9, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 52.9, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 19.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 7.4, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 37.4, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Ethics": { + "metrics": { + "easy": { + "accuracy": 60.0, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 30.6, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 7.7, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 48.8, + "cost": 0.0, + "robustness": 0 + } + } + }, + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 32.4, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 10.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 2.2, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 13.9, + "cost": 0.0, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 49.6, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 18.8, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 11.1, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 38.0, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Philosophical logic": { + "metrics": { + "easy": { + "accuracy": 50.8, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 2.6, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 40.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 33.3, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 46.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 11.1, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 2.7, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 24.3, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Economics": { + "metrics": { + "easy": { + "accuracy": 47.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 10.4, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 3.8, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 24.9, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 29.2, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 6.3, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 3.3, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 15.2, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 33.3, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 5.6, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 3.6, + "cost": 0.0, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 68.1, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 66.7, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 62.3, + "cost": 0.0, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 47.8, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 24.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 24.2, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 31.2, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 47.8, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 24.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 24.2, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 31.2, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 57.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 12.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 4.1, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 35.8, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 50.4, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 11.6, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 2.2, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 27.1, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 67.4, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 8.5, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 15.4, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 54.8, + "cost": 0.0, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 61.5, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 23.5, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 11.1, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 53.8, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 44.2, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 23.5, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 15.4, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 32.2, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 40.9, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 9.5, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 9.1, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 23.7, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 55.0, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 0.0, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 44.0, + "cost": 0.0, + "robustness": 0 + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 50.0, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 8.0, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 14.9, + "cost": 0.0, + "robustness": 0 + } + } + } + } + }, + "Technology": { + "metrics": { + "easy": { + "accuracy": 57.4, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 18.6, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 1.9, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 39.5, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Engineering": { + "metrics": { + "easy": { + "accuracy": 49.2, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 15.6, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 26.3, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 57.6, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 20.2, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 2.7, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 42.2, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Management and public relations": { + "metrics": { + "easy": { + "accuracy": 71.7, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 27.3, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 63.0, + "cost": 0.0, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 52.7, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 15.3, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.7, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 28.1, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 43.6, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 11.0, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 23.5, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 59.6, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 22.4, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 35.4, + "cost": 0.0, + "robustness": 0 + } + } + }, + "Arts": { + "metrics": { + "easy": { + "accuracy": 62.7, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 5.1, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 1.8, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 26.0, + "cost": 0.0, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 57.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 9.5, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 12.4, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 19.4, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 57.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 9.5, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 12.4, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 19.4, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 51.6, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 10.2, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 2.7, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 30.7, + "cost": 0.0001, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 60.6, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 6.9, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 45.3, + "cost": 0.0, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 48.9, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 10.8, + "cost": 0.0001, + "robustness": 0 + }, + "hard": { + "accuracy": 2.2, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 25.5, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 42.9, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 11.1, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 100.0, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 38.5, + "cost": 0.0, + "robustness": 0 + } + } + } + } + } + } + }, + "chayan": { + "metrics": { + "easy": { + "accuracy": 90.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 55.6, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 24.1, + "cost": 0.0012, + "robustness": 0 + }, + "all": { + "accuracy": 64.9, + "cost": 0.0006, + "robustness": 0 + } + }, + "categories": { + "Computer science, information, and general works": { + "metrics": { + "easy": { + "accuracy": 91.9, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 68.2, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 27.8, + "cost": 0.0016, + "robustness": 0 + }, + "all": { + "accuracy": 72.9, + "cost": 0.0005, + "robustness": 0 + } + }, + "subcategories": { + "Library and information sciences": { + "metrics": { + "easy": { + "accuracy": 88.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 44.7, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 2.8, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 71.5, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Computer science, knowledge, and systems": { + "metrics": { + "easy": { + "accuracy": 94.0, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 73.5, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 32.1, + "cost": 0.0018, + "robustness": 0 + }, + "all": { + "accuracy": 73.4, + "cost": 0.0006, + "robustness": 0 + } + } + } + } + }, + "Philosophy and psychology": { + "metrics": { + "easy": { + "accuracy": 90.2, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 51.2, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 9.9, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 69.1, + "cost": 0.0003, + "robustness": 0 + } + }, + "subcategories": { + "Ethics": { + "metrics": { + "easy": { + "accuracy": 88.6, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 30.6, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 67.1, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Philosophy": { + "metrics": { + "easy": { + "accuracy": 86.5, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 62.5, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 13.3, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 51.6, + "cost": 0.0004, + "robustness": 0 + } + } + }, + "Psychology": { + "metrics": { + "easy": { + "accuracy": 92.6, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 60.4, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 75.4, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Philosophical logic": { + "metrics": { + "easy": { + "accuracy": 92.3, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 73.7, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 40.0, + "cost": 0.0009, + "robustness": 0 + }, + "all": { + "accuracy": 83.3, + "cost": 0.0003, + "robustness": 0 + } + } + } + } + }, + "Social Science": { + "metrics": { + "easy": { + "accuracy": 90.3, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 56.5, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 6.0, + "cost": 0.001, + "robustness": 0 + }, + "all": { + "accuracy": 60.1, + "cost": 0.0005, + "robustness": 0 + } + }, + "subcategories": { + "Economics": { + "metrics": { + "easy": { + "accuracy": 89.6, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 60.7, + "cost": 0.0007, + "robustness": 0 + }, + "hard": { + "accuracy": 7.5, + "cost": 0.0015, + "robustness": 0 + }, + "all": { + "accuracy": 63.1, + "cost": 0.0006, + "robustness": 0 + } + } + }, + "Law": { + "metrics": { + "easy": { + "accuracy": 89.2, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 50.8, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 6.7, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 58.2, + "cost": 0.0004, + "robustness": 0 + } + } + }, + "Social sciences, sociology, and anthropology": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 61.1, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 2.9, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 27.3, + "cost": 0.0005, + "robustness": 0 + } + } + }, + "Social problems": { + "metrics": { + "easy": { + "accuracy": 93.6, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 11.1, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 73.8, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Language": { + "metrics": { + "easy": { + "accuracy": 76.1, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 38.3, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 49.9, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 54.9, + "cost": 0.0004, + "robustness": 0 + } + }, + "subcategories": { + "Language": { + "metrics": { + "easy": { + "accuracy": 76.1, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 38.3, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 49.9, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 54.9, + "cost": 0.0004, + "robustness": 0 + } + } + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 94.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 62.7, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 17.0, + "cost": 0.0009, + "robustness": 0 + }, + "all": { + "accuracy": 74.1, + "cost": 0.0004, + "robustness": 0 + } + }, + "subcategories": { + "Mathematics": { + "metrics": { + "easy": { + "accuracy": 93.1, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 62.0, + "cost": 0.0006, + "robustness": 0 + }, + "hard": { + "accuracy": 27.0, + "cost": 0.0013, + "robustness": 0 + }, + "all": { + "accuracy": 70.4, + "cost": 0.0006, + "robustness": 0 + } + } + }, + "Earth sciences and geology": { + "metrics": { + "easy": { + "accuracy": 96.1, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 57.6, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 85.5, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Biology": { + "metrics": { + "easy": { + "accuracy": 97.4, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 47.1, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 11.1, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 86.0, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Chemistry": { + "metrics": { + "easy": { + "accuracy": 95.3, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 76.5, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 7.7, + "cost": 0.0002, + "robustness": 0 + }, + "all": { + "accuracy": 75.6, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Physics": { + "metrics": { + "easy": { + "accuracy": 88.6, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 76.2, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 36.4, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 77.3, + "cost": 0.0003, + "robustness": 0 + } + } + }, + "Animals (Zoology)": { + "metrics": { + "easy": { + "accuracy": 90.0, + "cost": 0.0, + "robustness": 0 + }, + "medium": { + "accuracy": 40.0, + "cost": 0.0002, + "robustness": 0 + }, + "hard": { + "accuracy": 0, + "cost": 0, + "robustness": 0 + }, + "all": { + "accuracy": 80.0, + "cost": 0.0001, + "robustness": 0 + } + } + }, + "Science": { + "metrics": { + "easy": { + "accuracy": 96.7, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 56.0, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 5.1, + "cost": 0.0006, + "robustness": 0 + }, + "all": { + "accuracy": 40.4, + "cost": 0.0004, + "robustness": 0 + } + } + } + } + }, + "Technology": { + "metrics": { + "easy": { + "accuracy": 93.1, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 53.1, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 13.4, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 72.1, + "cost": 0.0003, + "robustness": 0 + } + }, + "subcategories": { + "Engineering": { + "metrics": { + "easy": { + "accuracy": 93.7, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 70.1, + "cost": 0.0008, + "robustness": 0 + }, + "hard": { + "accuracy": 33.3, + "cost": 0.0008, + "robustness": 0 + }, + "all": { + "accuracy": 74.3, + "cost": 0.0006, + "robustness": 0 + } + } + }, + "Medicine and health": { + "metrics": { + "easy": { + "accuracy": 92.8, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 42.6, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 6.2, + "cost": 0.0003, + "robustness": 0 + }, + "all": { + "accuracy": 70.7, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "Management and public relations": { + "metrics": { + "easy": { + "accuracy": 95.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 27.3, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0001, + "robustness": 0 + }, + "all": { + "accuracy": 82.2, + "cost": 0.0001, + "robustness": 0 + } + } + } + } + }, + "Arts & recreation": { + "metrics": { + "easy": { + "accuracy": 81.5, + "cost": 0.0006, + "robustness": 0 + }, + "medium": { + "accuracy": 32.9, + "cost": 0.0013, + "robustness": 0 + }, + "hard": { + "accuracy": 6.1, + "cost": 0.0015, + "robustness": 0 + }, + "all": { + "accuracy": 48.0, + "cost": 0.001, + "robustness": 0 + } + }, + "subcategories": { + "Sports, games and entertainment": { + "metrics": { + "easy": { + "accuracy": 70.7, + "cost": 0.0009, + "robustness": 0 + }, + "medium": { + "accuracy": 26.0, + "cost": 0.0023, + "robustness": 0 + }, + "hard": { + "accuracy": 3.0, + "cost": 0.0026, + "robustness": 0 + }, + "all": { + "accuracy": 41.5, + "cost": 0.0017, + "robustness": 0 + } + } + }, + "Music": { + "metrics": { + "easy": { + "accuracy": 85.9, + "cost": 0.0004, + "robustness": 0 + }, + "medium": { + "accuracy": 27.6, + "cost": 0.0008, + "robustness": 0 + }, + "hard": { + "accuracy": 24.0, + "cost": 0.0007, + "robustness": 0 + }, + "all": { + "accuracy": 51.2, + "cost": 0.0006, + "robustness": 0 + } + } + }, + "Arts": { + "metrics": { + "easy": { + "accuracy": 100.0, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 66.7, + "cost": 0.0003, + "robustness": 0 + }, + "hard": { + "accuracy": 1.8, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 55.8, + "cost": 0.0003, + "robustness": 0 + } + } + } + } + }, + "Literature": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 64.7, + "cost": 0.0009, + "robustness": 0 + }, + "hard": { + "accuracy": 29.2, + "cost": 0.002, + "robustness": 0 + }, + "all": { + "accuracy": 43.6, + "cost": 0.0016, + "robustness": 0 + } + }, + "subcategories": { + "Literature, rhetoric and criticism": { + "metrics": { + "easy": { + "accuracy": 97.3, + "cost": 0.0005, + "robustness": 0 + }, + "medium": { + "accuracy": 64.7, + "cost": 0.0009, + "robustness": 0 + }, + "hard": { + "accuracy": 29.2, + "cost": 0.002, + "robustness": 0 + }, + "all": { + "accuracy": 43.6, + "cost": 0.0016, + "robustness": 0 + } + } + } + } }, - "coding": { - "easy": { "accuracy": 55.0, "robustness": 99.0 }, - "medium": { "accuracy": 43.3, "robustness": 95.0 }, - "hard": { "accuracy": 27.1, "robustness": 92.4 } + "History": { + "metrics": { + "easy": { + "accuracy": 89.0, + "cost": 0.0003, + "robustness": 0 + }, + "medium": { + "accuracy": 61.0, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 8.1, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 64.7, + "cost": 0.0004, + "robustness": 0 + } + }, + "subcategories": { + "Geography": { + "metrics": { + "easy": { + "accuracy": 91.7, + "cost": 0.0002, + "robustness": 0 + }, + "medium": { + "accuracy": 69.0, + "cost": 0.0004, + "robustness": 0 + }, + "hard": { + "accuracy": 16.7, + "cost": 0.0004, + "robustness": 0 + }, + "all": { + "accuracy": 81.3, + "cost": 0.0002, + "robustness": 0 + } + } + }, + "History": { + "metrics": { + "easy": { + "accuracy": 86.5, + "cost": 0.0004, + "robustness": 0 + }, + "medium": { + "accuracy": 59.7, + "cost": 0.0005, + "robustness": 0 + }, + "hard": { + "accuracy": 7.4, + "cost": 0.0005, + "robustness": 0 + }, + "all": { + "accuracy": 57.4, + "cost": 0.0004, + "robustness": 0 + } + } + }, + "Biography and genealogy": { + "metrics": { + "easy": { + "accuracy": 95.2, + "cost": 0.0001, + "robustness": 0 + }, + "medium": { + "accuracy": 55.6, + "cost": 0.0, + "robustness": 0 + }, + "hard": { + "accuracy": 0.0, + "cost": 0.0, + "robustness": 0 + }, + "all": { + "accuracy": 86.5, + "cost": 0.0001, + "robustness": 0 + } + } + } + } } } } diff --git a/src/data/routers.json b/src/data/routers.json new file mode 100644 index 0000000..f15c882 --- /dev/null +++ b/src/data/routers.json @@ -0,0 +1,312 @@ +{ + "RouterDC": { + "name": "RouterDC", + "type": "open-source", + "description": "Dual contrastive learning-based router with cost optimization", + "affiliation": "SUSTech", + "modelPool": [ + "mistralai/Mistral-7B-v0.1", + "meta-math/MetaMath-Mistral-7B", + "itpossible/Chinese-Mistral-7B-v0.1", + "HuggingFaceH4/zephyr-7b-beta", + "cognitivecomputations/dolphin-2.6-mistral-7b", + "meta-llama/llama-3-8b-instruct", + "cognitivecomputations/dolphin-2.9-llama3-8b" + ], + "paperUrl": "https://arxiv.org/abs/2409.19886", + "websiteUrl": "https://arxiv.org/abs/2409.19886", + "githubUrl": "https://github.com/shuhao02/RouterDC" + }, + "azure": { + "name": "Azure-Router", + "type": "closed-source", + "description": "Microsoft Azure's model routing service", + "affiliation": "Microsoft", + "modelPool": [ + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4.1-nano", + "o4-mini", + "gpt-5-mini", + "gpt-5-nano", + "gpt-5-chat" + ], + "paperUrl": "https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/model-router", + "websiteUrl": "https://ai.azure.com/catalog/models/model-router" + }, + "carrot": { + "name": "CARROT", + "type": "open-source", + "description": "Cost-aware routing with dual contrastive learning approach", + "affiliation": "UMich", + "modelPool": [ + "aws-claude-3-5-sonnet-v1", + "aws-titan-text-premier-v1", + "openai-gpt-4o", + "openai-gpt-4o-mini", + "wxai-granite-3-2b-instruct-8k-max-tokens", + "wxai-granite-3-8b-instruct-8k-max-tokens", + "wxai-llama-3-1-70b-instruct", + "wxai-llama-3-1-8b-instruct", + "wxai-llama-3-2-1b-instruct", + "wxai-llama-3-2-3b-instruct", + "wxai-llama-3-3-70b-instruct", + "wxai-mixtral-8x7b-instruct-v01", + "wxai-llama-3-405b-instruct" + ], + "paperUrl": "https://arxiv.org/abs/2502.03261", + "websiteUrl": "https://arxiv.org/abs/2502.03261", + "githubUrl": "https://github.com/somerstep/CARROT", + "huggingfaceUrl": "https://huggingface.co/CARROT-LLM-Routing" + }, + "gpt5": { + "name": "GPT-5", + "type": "closed-source", + "description": "OpenAI's internal routing system for GPT model family", + "affiliation": "OpenAI", + "modelPool": ["gpt-5-chat"], + "paperUrl": "https://openai.com/index/introducing-gpt-5/", + "websiteUrl": "https://openai.com/index/introducing-gpt-5/" + }, + "graphrouter": { + "name": "GraphRouter", + "type": "open-source", + "description": "Graph neural network-based routing with semantic understanding", + "affiliation": "UIUC", + "modelPool": [ + "meta-llama/llama-3-8b-instruct", + "mistralai/mixtral-8x7b-chat", + "nousresearch/nous-34b-chat", + "meta/llama-2-7b-chat", + "mistralai/mistral-7b-chat", + "meta/llama-3-70b-chat", + "meta/llama-3-turbo-8b-chat", + "meta/llama-3-turbo-70b-chat", + "meta/llama-3.1-turbo-70b-chat", + "qwen/qwen-1.5-72b-chat" + ], + "paperUrl": "https://arxiv.org/abs/2410.03834", + "websiteUrl": "https://arxiv.org/abs/2410.03834", + "githubUrl": "https://github.com/ulab-uiuc/GraphRouter" + }, + "universal_router": { + "name": "Universal Router", + "type": "open-source", + "description": "Universal baseline that mirrors the RouterBench model mix", + "affiliation": "RouterArena", + "modelPool": [ + "WizardLM/WizardLM-13B-V1.2", + "claude-instant-v1", + "claude-v1", + "claude-v2", + "gpt-3.5-turbo-1106", + "gpt-4-1106-preview", + "meta/codellama-34b-instruct", + "meta/llama-2-70b-chat", + "mistralai/mistral-7b-chat", + "mistralai/mixtral-8x7b-chat", + "zero-one-ai/Yi-34B-Chat" + ], + "paperUrl": "https://huggingface.co/datasets/withmartian/routerbench", + "websiteUrl": "https://huggingface.co/datasets/withmartian/routerbench" + }, + "mirt_bert": { + "name": "MIRT-BERT", + "type": "open-source", + "description": "Multi-item response theory with BERT embeddings", + "affiliation": "USTC", + "modelPool": [ + "glm-4-air", + "glm-4-flash", + "glm-4-plus", + "gpt-4o", + "gpt-4o-mini", + "gpt-4o-mini-cot", + "deepseek-coder", + "deepseek-chat", + "qwen25-32b-int4", + "qwen25-7b-instruct", + "qwen25-72b-instruct", + "qwq-32b-preview", + "qwen25-math-7b-instruct", + "llama31-8b-instruct", + "llama31-70b-instruct", + "llama31-405b-instruct", + "mixtral-8x7b-instruct", + "mistral-7b-instruct-v02", + "ministral-8b-instruct-2410", + "gemini15-flash", + "claude35-haiku20241022" + ], + "paperUrl": "https://arxiv.org/pdf/2506.01048", + "websiteUrl": "https://arxiv.org/pdf/2506.01048", + "githubUrl": "https://github.com/Mercidaiha/IRT-Router" + }, + "nirt_bert": { + "name": "NIRT-BERT", + "type": "open-source", + "description": "Neural item response theory with BERT-based routing", + "affiliation": "USTC", + "modelPool": [ + "glm-4-air", + "glm-4-flash", + "glm-4-plus", + "gpt-4o", + "gpt-4o-mini", + "gpt-4o-mini-cot", + "deepseek-coder", + "deepseek-chat", + "qwen25-32b-int4", + "qwen25-7b-instruct", + "qwen25-72b-instruct", + "qwq-32b-preview", + "qwen25-math-7b-instruct", + "llama31-8b-instruct", + "llama31-70b-instruct", + "llama31-405b-instruct", + "mixtral-8x7b-instruct", + "mistral-7b-instruct-v02", + "ministral-8b-instruct-2410", + "gemini15-flash", + "claude35-haiku20241022" + ], + "paperUrl": "https://arxiv.org/pdf/2506.01048", + "websiteUrl": "https://arxiv.org/pdf/2506.01048", + "githubUrl": "https://github.com/Mercidaiha/IRT-Router" + }, + "notdiamond": { + "name": "NotDiamond", + "type": "closed-source", + "description": "Closed-source routing service with access to 60+ models", + "affiliation": "NotDiamond", + "modelPool": [ + "openai/gpt-4o", + "openai/gpt-4o-mini", + "openai/gpt-3.5-turbo-0125", + "openai/gpt-4.1", + "openai/gpt-4.1-mini", + "openai/gpt-4.1-nano", + "openai/o1-mini", + + "anthropic/claude-3-7-sonnet-20250219", + "anthropic/claude-3-5-haiku-20241022", + "anthropic/claude-3-haiku-20240307", + "anthropic/claude-sonnet-4-20250514", + + "google/gemini-1.5-pro-latest", + "google/gemini-1.5-flash-latest", + "google/gemini-2.0-flash-001", + "google/gemini-2.5-flash", + "google/gemini-2.5-pro", + + "mistral/codestral-latest", + "mistral/open-mixtral-8x7b", + "mistral/mistral-large-latest", + "mistral/mistral-medium-latest", + "mistral/mistral-small-latest", + "mistral/open-mistral-7b", + "mistral/open-mistral-nemo", + + "togetherai/Meta-Llama-3.1-8B-Instruct-Turbo", + "togetherai/Meta-Llama-3.1-70B-Instruct-Turbo", + "togetherai/Meta-Llama-3.1-405B-Instruct-Turbo" + ], + "paperUrl": "https://www.notdiamond.ai/", + "websiteUrl": "https://www.notdiamond.ai/" + }, + "routellm": { + "name": "RouteLLM", + "type": "open-source", + "description": "Binary selection between strong and weak models", + "affiliation": "Berkeley", + "modelPool": [ + "openai-gpt-4o", + "mixtral-8x7b-instruct" + ], + "paperUrl": "https://arxiv.org/abs/2406.18665", + "websiteUrl": "https://arxiv.org/abs/2406.18665", + "githubUrl": "https://github.com/lm-sys/RouteLLM", + "huggingfaceUrl": "https://huggingface.co/routellm" + }, + "routerbench_knn": { + "name": "RouterBench-KNN", + "type": "open-source", + "description": "K-Nearest Neighbors-based router benchmark", + "affiliation": "Martian", + "modelPool": [ + "WizardLM/WizardLM-13B-V1.2", + "claude-instant-v1", + "claude-v1", + "claude-v2", + "gpt-3.5-turbo-1106", + "gpt-4-1106-preview", + "meta/codellama-34b-instruct", + "meta/llama-2-70b-chat", + "mistralai/mistral-7b-chat", + "mistralai/mixtral-8x7b-chat", + "zero-one-ai/Yi-34B-Chat" + ], + "paperUrl": "https://arxiv.org/pdf/2403.12031", + "websiteUrl": "https://arxiv.org/pdf/2403.12031", + "githubUrl": "https://github.com/withmartian/routerbench", + "huggingfaceUrl": "https://huggingface.co/datasets/withmartian/routerbench" + }, + "routerbench_mlp": { + "name": "RouterBench-MLP", + "type": "open-source", + "description": "Multi-Layer Perceptron-based router benchmark", + "affiliation": "Martian", + "modelPool": [ + "WizardLM/WizardLM-13B-V1.2", + "claude-instant-v1", + "claude-v1", + "claude-v2", + "gpt-3.5-turbo-1106", + "gpt-4-1106-preview", + "meta/codellama-34b-instruct", + "meta/llama-2-70b-chat", + "mistralai/mistral-7b-chat", + "mistralai/mixtral-8x7b-chat", + "zero-one-ai/Yi-34B-Chat" + ], + "paperUrl": "https://arxiv.org/pdf/2403.12031", + "websiteUrl": "https://arxiv.org/pdf/2403.12031", + "githubUrl": "https://github.com/withmartian/routerbench", + "huggingfaceUrl": "https://huggingface.co/datasets/withmartian/routerbench" + }, + "vllm": { + "name": "vLLM-SR", + "type": "open-source", + "description": "vLLM-based routing service", + "affiliation": "vLLM SR Team", + "modelPool": [ + "meta-llama/llama-3-8b-instruct", + "mistralai/mistral-7b-chat", + "meta-llama/llama-3-70b-chat", + "qwen/qwen-2.5-72b-instruct", + "mistralai/mixtral-8x7b-chat", + "meta-llama/llama-3.1-405b-instruct", + "openai/gpt-4o", + "anthropic/claude-3.5-sonnet" + ], + "paperUrl": "https://vllm-semantic-router.com/", + "websiteUrl": "https://vllm-semantic-router.com/", + "githubUrl": "https://github.com/vllm-project/semantic-router", + "huggingfaceUrl": "https://huggingface.co/llm-semantic-router" + }, + "chayan": { + "name": "Chayan", + "type": "open-source", + "description": "Chayan-based routing service", + "affiliation": "Adaptive Classifier", + "modelPool": [ + "openai/gpt-4o-mini", + "google/gemini-2.5-flash-lite", + "google/gemini-2.5-flash", + "openai/gpt-4o" + ], + "paperUrl": "https://huggingface.co/adaptive-classifier/chayan", + "websiteUrl": "https://huggingface.co/adaptive-classifier/chayan", + "huggingfaceUrl": "https://huggingface.co/adaptive-classifier/chayan" + } +} diff --git a/src/pages/LeaderboardPage.css b/src/pages/LeaderboardPage.css index 8c7f758..bac938a 100644 --- a/src/pages/LeaderboardPage.css +++ b/src/pages/LeaderboardPage.css @@ -10,6 +10,22 @@ padding: 0 1rem; } + +.leaderboard-full-bleed { + width: 100vw; + position: relative; + left: 50%; + right: 50%; + margin-left: -50vw; + padding: 0 1rem; + margin-bottom: 2rem; +} + +.leaderboard-full-bleed .leaderboard-container { + width:90vw; + margin: 0 auto; +} + .page-header { text-align: center; margin-bottom: 2rem; @@ -207,195 +223,950 @@ transform-origin: left; } -.beta-label-link:hover .beta-label-text::after { - transform: scaleX(1); +.beta-label-link:hover .beta-label-text::after { + transform: scaleX(1); +} + + +.beta-value { + font-variant-numeric: tabular-nums; + color: #2563eb; +} + +.beta-slider { + width: 100%; + accent-color: #2563eb; +} + +.beta-hints { + display: flex; + justify-content: space-between; + font-size: 0.9rem; + color: #363636; +} + +.beta-weights { + display: flex; + gap: 1rem; + align-items: center; + flex-wrap: wrap; + font-size: 0.9rem; + color: #1f2937; +} + +.beta-weight { + display: flex; + flex-direction: column; + background: #eef2ff; + border-radius: 8px; + padding: 0.5rem 0.75rem; +} + +.beta-weight strong { + font-size: 1rem; + font-weight: 700; +} + +.beta-weight-pill { + background: #dbeafe; + border-radius: 999px; + padding: 0.5rem 1rem; + font-weight: 600; + color: #1d4ed8; + +} + +.filter-group { + display: flex; + flex-direction: column; + gap: 0.25rem; +} + +.filter-group label { + font-weight: 500; + color: #6b7280; + font-size: 0.8rem; +} + +.filter-select { + padding: 0.5rem 0.75rem; + background: white; + border: 1px solid #d1d5db; + border-radius: 6px; + color: #1f2937; + font-size: 0.9rem; + cursor: pointer; + transition: border-color 0.2s ease; +} + +.filter-select:focus { + outline: none; + border-color: #2563eb; +} + +.leaderboard-container { + background: white; + border-radius: 12px; + border: 1px solid #e5e7eb; + overflow: hidden; + margin-bottom: 2rem; + width: 100%; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); +} + +.leaderboard-scroll { + width: 100%; + overflow-x: auto; + overflow-y: hidden; + -webkit-overflow-scrolling: touch; +} + +.leaderboard-scroll::-webkit-scrollbar { + height: 8px; +} + +.leaderboard-scroll::-webkit-scrollbar-thumb { + background: #cbd5f5; + border-radius: 999px; +} + +.leaderboard-scroll::-webkit-scrollbar-track { + background: transparent; +} + + +:root { + --lb-grid: + minmax(0, 0.5fr) /* Select */ + minmax(0, 0.7fr) /* Rank */ + minmax(0, 2.0fr) /* Name */ + minmax(0, 1.1fr) /* Affiliation */ + minmax(0, 0.9fr) /* Arena */ + minmax(0, 0.9fr) /* Accuracy */ + minmax(0, 0.9fr) /* Cost/1k */ + minmax(0, 0.9fr) /* Opt. Select */ + minmax(0, 0.9fr) /* Opt. Cost */ + minmax(0, 0.9fr) /* Opt. Acc */ + minmax(0, 0.9fr) /* Latency */ + minmax(0, 0.9fr); /* Robust */ +} + + +.leaderboard-header { + display: grid; + grid-template-columns: var(--lb-grid); + gap: 0.75rem; + padding: 1rem 1.5rem; + background: #f8fafc; + font-weight: 600; + color: #6b7280; + font-size: 0.85rem; + text-transform: uppercase; + letter-spacing: 0.05em; + border-bottom: 2px solid #ddd; + align-items: center; +} + +.leaderboard-header > div, +.leaderboard-row > div { + padding: 0.75rem 0.75rem; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + + + +.leaderboard-body { + max-height: none; + overflow: visible; +} + +.leaderboard-row { + display: grid; + grid-template-columns: var(--lb-grid); + gap: 0.5rem; + padding: 0.5rem 0.75rem; + border-bottom: 1px solid #eee; + transition: all 0.2s ease; + align-items: center; + background: white; +} + +.leaderboard-row > div { + padding: 8px 12px; + overflow: hidden; + /* text-overflow: ellipsis; */ + white-space: nowrap; + text-align: center; +} + + +.leaderboard-row:hover { + background: #f8fafc; +} + +.leaderboard-row:nth-child(even) { + background: #f9fafb; +} + +.leaderboard-row:nth-child(even):hover { + background: #f3f4f6; +} + +.router-info { + display: flex; + flex-direction: column; + gap: 0.35rem; + align-items: flex-start; +} + +.select-col { + display: flex; + justify-content: center; + align-items: center; +} + +.select-col input[type='checkbox'] { + width: 16px; + height: 16px; + cursor: pointer; +} + +.router-model-card { + margin-top: 0.75rem; + background: #ffffff; + border-radius: 16px; + padding: 1.25rem 1.5rem; + border: 1px solid rgba(226, 232, 240, 0.9); + display: flex; + flex-direction: column; + gap: 1rem; + box-shadow: 0 12px 30px rgba(15, 23, 42, 0.08); +} + +.router-card-header { + display: flex; + justify-content: space-between; + gap: 1rem; + flex-wrap: wrap; +} + +.router-card-title { + display: flex; + flex-direction: column; + gap: 0.3rem; +} + +.router-type-pill { + display: inline-flex; + align-items: center; + gap: 0.3rem; + padding: 0.2rem 0.75rem; + border-radius: 999px; + font-size: 0.75rem; + font-weight: 700; + letter-spacing: 0.05em; + text-transform: uppercase; + width: fit-content; +} + +.router-type-pill.open-source { + + background: rgba(59, 130, 246, 0.18); + color: #1d4ed8; +} + +.router-type-pill.closed-source { + background: rgba(16, 185, 129, 0.15); + color: #047857; +} + +.router-card-affiliation { + margin: 0; + font-size: 0.95rem; + color: #475569; +} + +.router-card-actions { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; + justify-content: flex-end; +} + +.card-label { + margin: 0; + font-size: 0.75rem; + letter-spacing: 0.05em; + color: #94a3b8; + text-transform: uppercase; +} + +.router-card-description { + margin: 0; + color: #475569; + line-height: 1.4; +} + +.router-card-section { + border: 1px solid #e2e8f0; + border-radius: 12px; + padding: 0.75rem 1rem; + background: #f8fafc; +} + +.model-pool { + display: flex; + flex-wrap: wrap; + gap: 0.35rem; + overflow-x: auto; + padding-bottom: 0.25rem; +} + +.model-chip { + border-radius: 999px; + padding: 0.2rem 0.7rem; + background: white; + border: 1px solid #e2e8f0; + font-size: 0.85rem; + color: #475569; + white-space: nowrap; +} + +.card-compare-btn { + border: none; + border-radius: 999px; + padding: 0.35rem 0.9rem; + font-weight: 600; + cursor: pointer; + background: #dbeafe; + color: #1d4ed8; +} + +.card-compare-btn.selected { + background: #1d4ed8; + color: white; +} + +.card-compare-btn:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.card-solo-btn { + border: 1px solid #1d4ed8; + border-radius: 999px; + padding: 0.35rem 1rem; + background: #1d4ed8; + color: white; + font-weight: 600; + cursor: pointer; + transition: box-shadow 0.2s ease; +} + +.card-solo-btn:hover { + box-shadow: 0 8px 20px rgba(29, 78, 216, 0.25); +} + +.router-card-metrics-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(140px, 1fr)); + gap: 0.75rem; +} + +.router-card-metric { + border: 1px solid #e2e8f0; + border-radius: 12px; + padding: 0.65rem 0.75rem; + background: #ffffff; + display: flex; + flex-direction: column; + gap: 0.3rem; +} + +.router-card-metric .metric-label { + margin: 0; + font-size: 0.75rem; + letter-spacing: 0.05em; + color: #94a3b8; + text-transform: uppercase; +} + +.router-card-metric .metric-value { + margin: 0; + font-size: 1.25rem; + color: #0f172a; + font-weight: 700; +} + +.router-card-links { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; +} + +.router-card-link { + display: inline-flex; + align-items: center; + gap: 0.35rem; + font-size: 0.85rem; + font-weight: 600; + color: #2563eb; + text-decoration: none; + padding: 0.3rem 0.65rem; + border-radius: 999px; + background: rgba(37, 99, 235, 0.08); +} + +.router-card-link .hf-logo { + width: 14px; + height: 14px; +} + +.compare-fab { + position: fixed; + bottom: 2rem; + right: 2rem; + background: #1d4ed8; + color: white; + border: none; + border-radius: 999px; + padding: 0.85rem 1.4rem; + font-weight: 600; + box-shadow: 0 15px 35px rgba(37, 99, 235, 0.35); + cursor: pointer; + display: flex; + align-items: center; + gap: 0.5rem; + z-index: 50; +} + +.compare-modal-overlay { + position: fixed; + inset: 0; + background: rgba(15, 23, 42, 0.65); + display: flex; + justify-content: center; + align-items: center; + z-index: 1000; + padding: 1rem; +} + +.compare-modal { + background: white; + border-radius: 24px; + padding: 1.5rem; + width: min(1200px, 100%); + height: 82vh; /* 👈 FIXED height */ + overflow: scroll; /* 👈 prevent modal scrolling */ + display: flex; + flex-direction: column; +} + +.compare-modal-body { + min-height: 0; +} +.compare-modal-header { + display: flex; + justify-content: space-between; + align-items: flex-start; + gap: 1rem; +} + +.compare-modal-header h3 { + margin: 0; + font-size: 1.4rem; +} + +.compare-modal-header p { + margin: 0.15rem 0 0; + color: #64748b; +} + +.compare-modal-body { + display: grid; + grid-template-columns: 320px minmax(0, 1fr); + gap: 1.5rem; + align-items: stretch; +} + +@media (max-width: 768px) { + .compare-modal-overlay { + align-items: flex-start; + padding: 0; + overflow-y: auto; + } + + .compare-modal { + width: 100%; + max-height: none; + min-height: 100vh; + border-radius: 0; + padding: 1rem; + } + + .compare-modal-body { + grid-template-columns: 1fr; + } +} + + +.compare-modal-sidebar { + display: flex; + flex-direction: column; + gap: 1rem; +} + +.compare-modal-main { + display: flex; + flex-direction: column; + gap: 1rem; +} + +.compare-main-controls { + display: flex; + align-items: center; + justify-content: space-between; + gap: 1rem; + width: 100%; +} + + +.compare-main-controls .link-button { + white-space: nowrap; +} + +.compare-toolbar { + display: flex; + align-items: center; + gap: 0.75rem; + width: 100%; + flex-wrap: wrap; /* 👈 allow wrapping */ +} + + + +.compare-toolbar-left { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; +} + +.compare-toolbar-center { + display: flex; + justify-content: center; +} + +.compare-toolbar-right { + display: flex; + align-items: center; + gap: 0.5rem; + margin-left: auto; /* 👈 push right on desktop */ + flex-wrap: wrap; +} + +.scope-dropdown { + position: relative; + + width: 220px; + max-width: 100%; +} + +.scope-dropdown-toggle { + width: 100%; + display: flex; + align-items: center; + justify-content: space-between; + gap: 0.25rem; + padding: 0.45rem 0.85rem; + border-radius: 999px; + border: 1px solid #e2e8f0; + background: #ffffff; + + font-size: 0.9rem; + font-weight: 600; + color: #1f2937; + transition: border-color 0.2s ease, box-shadow 0.2s ease; +} + +.scope-dropdown-toggle.open { + border-color: #2563eb; + box-shadow: 0 0 0 3px rgba(37, 99, 235, 0.15); +} + +.scope-dropdown-menu { + position: absolute; + top: calc(100% + 0.4rem); + right: 0; + width: 100%; + min-width: 200px; + background: #ffffff; + border: 1px solid #e2e8f0; + border-radius: 14px; + box-shadow: 0 18px 36px rgba(15, 23, 42, 0.2); + padding: 0.35rem; + display: flex; + + flex-direction: column; + z-index: 20; +} + +.scope-dropdown-option { + border: none; + background: transparent; + padding: 0.45rem 0.6rem; + text-align: left; + border-radius: 10px; + font-size: 0.9rem; + font-weight: 500; + color: #1f2937; + cursor: pointer; + transition: background 0.15s ease, color 0.15s ease; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + max-width: 100%; +} + +.scope-dropdown-option:hover, +.scope-dropdown-option.selected { + background: #eff6ff; + color: #1d4ed8; +} + +@media (max-width: 640px) { + .scope-dropdown { + width: 100%; + } +} + +.axis-label { + font-size: 0.85rem; + fill: #1f2937; + paint-order: stroke; + stroke: rgba(255, 255, 255, 0.9); + stroke-width: 3px; + stroke-linecap: butt; + stroke-linejoin: round; +} + +.axis-label--clickable { + cursor: pointer; +} + +.scope-dropdown-label { + flex: 1; /* take remaining space */ + min-width: 0; /* 🚨 critical for flex ellipsis */ + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + + + + +.compare-modal-chart.primary { + min-height: 480px; +} + +@media (max-width: 768px) { + .compare-modal-chart.primary { + min-height: 360px; + } +} + +.view-toggle { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; + justify-content: flex-start; +} + +.control-section { + display: flex; + flex-direction: column; + gap: 0.45rem; +} + +.compare-search-input { + display: flex; + align-items: center; + border: 1px solid #e2e8f0; + border-radius: 12px; + padding: 0.35rem 0.75rem; +} + +.compare-search-input input { + border: none; + outline: none; + width: 100%; + font-size: 0.95rem; +} + +.search-results { + display: flex; + flex-direction: column; + gap: 0.4rem; + border: 1px solid #e2e8f0; + border-radius: 14px; + background: #fff; + box-shadow: 0 12px 32px rgba(15, 23, 42, 0.12); + padding: 0.4rem; + max-height: 180px; + overflow-y: auto; +} + +.search-results.collapsed { + min-height: 0; + max-height: 0; + height: 0; + padding: 0; + border: none; + box-shadow: none; + overflow: hidden; +} + +.search-result-item { + display: flex; + justify-content: space-between; + align-items: center; + border: none; + border-radius: 10px; + padding: 0.55rem 0.85rem; + background: transparent; + font-weight: 500; + color: #334155; + cursor: pointer; + transition: all 0.2s ease; +} + +.search-result-item:not(.disabled):hover { + background: #eff6ff; + color: #1d4ed8; +} + +.pill-group { + display: flex; + gap: 0.35rem; + flex-wrap: nowrap; + overflow-x: auto; + padding-bottom: 0.35rem; +} + +.pill-button { + border: 1px solid #e2e8f0; + border-radius: 999px; + background: white; + padding: 0.25rem 0.9rem; + font-size: 0.9rem; + font-weight: 500; + color: #475569; + cursor: pointer; + transition: all 0.2s ease; + white-space: nowrap; } +.pill-button.active { + background: #1d4ed8; + border-color: #1d4ed8; + color: white; + box-shadow: 0 6px 20px rgba(29, 78, 216, 0.2); +} -.beta-value { - font-variant-numeric: tabular-nums; +.link-button { + border: none; + background: none; color: #2563eb; + font-weight: 600; + cursor: pointer; + padding: 0; } -.beta-slider { - width: 100%; - accent-color: #2563eb; +.helper-text { + font-size: 0.85rem; + color: #94a3b8; } -.beta-hints { +.selected-router-chips { display: flex; - justify-content: space-between; - font-size: 0.9rem; - color: #363636; + gap: 0.35rem; + overflow-x: auto; + padding-bottom: 0.35rem; } -.beta-weights { - display: flex; - gap: 1rem; +.router-chip { + border: 1px solid #e2e8f0; + border-radius: 999px; + padding: 0.3rem 0.9rem; + background: #fff; + font-weight: 600; + cursor: pointer; + white-space: nowrap; + display: inline-flex; align-items: center; - flex-wrap: wrap; - font-size: 0.9rem; - color: #1f2937; + gap: 0.35rem; } -.beta-weight { +.router-chip-dot { + width: 0.5rem; + height: 0.5rem; + border-radius: 999px; + display: inline-flex; +} + +.compare-modal-chart { + min-height: 320px; display: flex; flex-direction: column; - background: #eef2ff; - border-radius: 8px; - padding: 0.5rem 0.75rem; + justify-content: center; } -.beta-weight strong { - font-size: 1rem; - font-weight: 700; +.difficulty-bar-panel, +.deferral-chart-panel { + min-height: 320px; + display: flex; + flex-direction: column; + justify-content: center; } -.beta-weight-pill { - background: #dbeafe; - border-radius: 999px; - padding: 0.5rem 1rem; - font-weight: 600; - color: #1d4ed8; +@media (max-width: 768px) { + .compare-modal-chart { + min-height: 260px; + } + .difficulty-bar-panel, + .deferral-chart-panel { + min-height: 260px; + } } -.filter-group { - display: flex; - flex-direction: column; - gap: 0.25rem; +.difficulty-bar-panel { + background: #f8fafc; + border-radius: 16px; + padding: 1rem; + border: 1px solid #e2e8f0; } -.filter-group label { - font-weight: 500; - color: #6b7280; - font-size: 0.8rem; +.difficulty-bar-panel.empty { + display: flex; + justify-content: center; + align-items: center; + min-height: 200px; + color: #94a3b8; + border-style: dashed; } -.filter-select { - padding: 0.5rem 0.75rem; - background: white; - border: 1px solid #d1d5db; - border-radius: 6px; - color: #1f2937; - font-size: 0.9rem; - cursor: pointer; - transition: border-color 0.2s ease; +.deferral-chart-panel { + background: #f8fafc; + border-radius: 16px; + padding: 1rem; + border: 1px solid #e2e8f0; } -.filter-select:focus { - outline: none; - border-color: #2563eb; +.deferral-chart-panel.empty { + display: flex; + justify-content: center; + align-items: center; + min-height: 200px; + color: #94a3b8; + border-style: dashed; } -.leaderboard-container { - background: white; - border-radius: 12px; - border: 1px solid #e5e7eb; - overflow: hidden; - margin-bottom: 2rem; - width: 100%; - box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); +.panel-header { + display: flex; + align-items: flex-start; + justify-content: space-between; + margin-bottom: 0.75rem; } -.leaderboard-scroll { - width: 100%; - overflow-x: auto; - overflow-y: hidden; - -webkit-overflow-scrolling: touch; +.panel-eyebrow { + font-size: 0.75rem; + text-transform: uppercase; + letter-spacing: 0.08em; + font-weight: 600; + color: #94a3b8; + margin: 0; } -.leaderboard-scroll::-webkit-scrollbar { - height: 8px; +.panel-title { + margin: 0.2rem 0 0; + font-size: 1.05rem; + color: #0f172a; } -.leaderboard-scroll::-webkit-scrollbar-thumb { - background: #cbd5f5; - border-radius: 999px; +.panel-context { + margin: 0.2rem 0 0; + font-size: 0.85rem; + color: #64748b; } -.leaderboard-scroll::-webkit-scrollbar-track { - background: transparent; +.filter-select { + border: 1px solid #e2e8f0; + border-radius: 10px; + padding: 0.45rem 0.75rem; + background: white; + font-size: 0.95rem; + color: #0f172a; } - -:root { - --lb-grid: - minmax(0, 0.7fr) /* Rank */ - minmax(0, 1.5fr) /* Name */ - minmax(0, 1.3fr) /* Affiliation */ - minmax(0, 1.6fr) /* Type */ - minmax(0, 0.9fr) /* Arena */ - minmax(0, 1fr) /* Accuracy */ - minmax(0, 0.9fr) /* Cost/1k */ - minmax(0, 1.1fr) /* Opt. Select */ - minmax(0, 0.9fr) /* Opt. Cost */ - minmax(0, 0.9fr) /* Opt. Acc */ - minmax(0, 0.9fr) /* Latency */ - minmax(0, 0.9fr) /* Robust */ +.deferral-tooltip { + background: white; + border: 1px solid #e2e8f0; + border-radius: 12px; + padding: 0.65rem 0.9rem; + box-shadow: 0 10px 30px rgba(15, 23, 42, 0.15); + font-size: 0.85rem; + color: #0f172a; } - -.leaderboard-header { - display: grid; - grid-template-columns: var(--lb-grid); - gap: 0.75rem; - padding: 1rem 1.5rem; - background: #f8fafc; +.deferral-tooltip-name { + margin: 0 0 0.35rem; font-weight: 600; - color: #6b7280; - font-size: 0.85rem; - text-transform: uppercase; - letter-spacing: 0.05em; - border-bottom: 2px solid #ddd; - align-items: center; + color: #111827; } -.leaderboard-header > div, -.leaderboard-row > div { - padding: 0.75rem 0.75rem; - overflow: hidden; - text-overflow: ellipsis; - white-space: nowrap; +.control-section label { + font-size: 0.85rem; + font-weight: 600; + color: #0f172a; } - - -.leaderboard-body { - max-height: none; - overflow: visible; +.helper-text { + font-size: 0.85rem; + color: #94a3b8; } -.leaderboard-row { - display: grid; - grid-template-columns: var(--lb-grid); - gap: 0.5rem; - padding: 0.5rem 0.75rem; - border-bottom: 1px solid #eee; - transition: all 0.2s ease; +.model-card-modal-overlay { + position: fixed; + inset: 0; + background: rgba(15, 23, 42, 0.65); + display: flex; + justify-content: center; align-items: center; - background: white; -} - -.leaderboard-row > div { - padding: 8px 12px; - overflow: hidden; - /* text-overflow: ellipsis; */ - white-space: nowrap; - text-align: center; + z-index: 1000; + padding: 1rem; } - -.leaderboard-row:hover { - background: #f8fafc; +.model-card-modal { + background: white; + border-radius: 24px; + padding: 1.5rem; + width: min(620px, 100%); + max-height: 90vh; + overflow-y: auto; } -.leaderboard-row:nth-child(even) { - background: #f9fafb; +.model-card-modal-header { + display: flex; + justify-content: space-between; + align-items: center; + margin: 0; + padding: 0; } -.leaderboard-row:nth-child(even):hover { - background: #f3f4f6; +.model-card-modal-header h3 { + margin: 0; } .leaderboard-row.top-three { @@ -488,8 +1259,7 @@ } .metrics-col, -.rank-col, -.type-col { +.rank-col { white-space: nowrap; } @@ -530,6 +1300,11 @@ color: #92400e; } +.router-link-pill.primary { + background: rgba(15, 23, 42, 0.08); + color: #111827; +} + .router-link-pill:hover { background: #2563eb; color: #ffffff; @@ -550,21 +1325,12 @@ } .router-name-link { - text-decoration: none; + background: none; + border: none; + padding: 0; + text-align: left; color: inherit; - transition: color 0.2s ease; -} - -.router-name-link:hover { - color: #2563eb; -} - -.router-name-link .router-name { - margin: 0; - transition: color 0.2s ease; - display: inline; - text-decoration: none; - position: relative; + cursor: pointer; } .router-name-link .router-name::after { @@ -600,34 +1366,42 @@ overflow-wrap: break-word; } -.type-col { +.router-name-row { display: flex; - justify-content: center; align-items: center; - width: 120px; - flex-shrink: 0; + gap: 0.5rem; + flex-wrap: wrap; } -.type-badge { - display: inline-block; - width: auto; /* grows with text */ - padding: 0.25rem 0.75rem; - border-radius: 12px; - font-size: 0.75rem; +.router-type-indicator { + display: inline-flex; + align-items: center; + gap: 0.25rem; + padding: 0.15rem 0.5rem; + border-radius: 999px; + font-size: 0.7rem; font-weight: 600; text-transform: uppercase; - white-space: normal; /* ✅ allows wrapping */ - text-align: center; + letter-spacing: 0.04em; } -.type-badge.open-source { - background: #3b82f6; - color: white; +.router-type-indicator svg { + width: 12px; + height: 12px; } -.type-badge.closed-source { - background: #10b981; - color: white; +.router-type-indicator.open-source { + background: rgba(59, 130, 246, 0.15); + color: #1d4ed8; +} + +.router-type-indicator.closed-source { + background: rgba(16, 185, 129, 0.18); + color: #047857; +} + +.router-type-text { + line-height: 1; } .metrics-col { @@ -649,6 +1423,18 @@ color: #1f2937; } +.score--best { + background: rgba(34, 197, 94, 0.18); + color: #065f46; + border-radius: 999px; + padding: 0.1rem 0.45rem; + font-weight: 700; +} + +.score--best--invert { + color: #0f172a; +} + .progress-bar { width: 80px; height: 6px; @@ -927,6 +1713,21 @@ min-width: 1200px; } + .compare-modal-body { + grid-template-columns: minmax(0, 1fr); + } + + .compare-modal-sidebar { + flex-direction: row; + flex-wrap: wrap; + gap: 1rem; + } + + .compare-modal-main { + order: 3; + gap: 1rem; + } + .controls { flex-direction: column; align-items: stretch; diff --git a/src/pages/LeaderboardPage.tsx b/src/pages/LeaderboardPage.tsx index 796f9f5..e1652a5 100644 --- a/src/pages/LeaderboardPage.tsx +++ b/src/pages/LeaderboardPage.tsx @@ -1,9 +1,11 @@ import React, { useState, useMemo } from 'react'; -import { Trophy, Search, Medal, Github } from 'lucide-react'; +import { Trophy, Search, Medal, Github, Layers, Link as LinkIcon, Unlock, Lock } from 'lucide-react'; import { Router } from '../types'; import { routers } from '../data/routerData'; import SpiderChart from '../components/SpiderChart'; import DeferralCurve from '../components/DeferralCurve'; +import RouterModelCard from '../components/RouterModelCard'; +import CompareModal from '../components/CompareModal'; import './LeaderboardPage.css'; import 'katex/dist/katex.min.css'; import { InlineMath, BlockMath } from 'react-katex'; @@ -14,6 +16,14 @@ type RouterWithDynamicArena = Router & { dynamicArena: number }; const COST_MIN = 0.0044; const COST_MAX = 200; +const DEFAULT_BETA = 0.1; +const defaultCostWeight = DEFAULT_BETA / (1 + DEFAULT_BETA); + +// 👇 snapping behavior +const SNAP_TARGET = defaultCostWeight; // ~0.0909 +const SNAP_THRESHOLD = 0.015; // how close before snapping + + const computeNormalizedCost = (costPer1k: number): number => { // const safeCost = Math.max(costPer1k, COST_MIN); const numerator = Math.log2(COST_MAX) - Math.log2(costPer1k); @@ -31,9 +41,6 @@ const computeArenaScore = (router: Router, beta: number): number => { return (((1 + beta) * accuracy * normalizedCost) / denominator) * 100; }; -const DEFAULT_BETA = 0.1; -const defaultCostWeight = DEFAULT_BETA / (1 + DEFAULT_BETA); - const LeaderboardPage: React.FC = () => { const [searchTerm, setSearchTerm] = useState(''); const [filterType, setFilterType] = useState<'all' | 'open-source' | 'closed-source'>('all'); @@ -43,8 +50,13 @@ const LeaderboardPage: React.FC = () => { const [activeTab, setActiveTab] = useState<'spider' | 'deferral'>('spider'); const [costWeight, setCostWeight] = useState(defaultCostWeight); + const [selectedCompareIds, setSelectedCompareIds] = useState([]); + const [isCompareModalOpen, setIsCompareModalOpen] = useState(false); const beta = costWeight / (1 - costWeight); const accuracyWeight = 1 - costWeight; + const MAX_COMPARE = 3; + const maxCompareReached = selectedCompareIds.length >= MAX_COMPARE; + const [modelCardRouter, setModelCardRouter] = useState(null); // Deferral curve data const openSourcePoints = { @@ -65,17 +77,6 @@ const LeaderboardPage: React.FC = () => { 'vLLM-SR': { accuracy: 0.6665, cost_per_1k: 1.61393 }, }; - // Helper function to calculate average score for overall ranking - const calculateAverageScore = (metrics: Router['metrics']): number => { - const scores: number[] = [metrics.arenaScore]; - if (metrics.optimalSelectionScore !== null) scores.push(metrics.optimalSelectionScore); - if (metrics.optimalCostScore !== null) scores.push(metrics.optimalCostScore); - if (metrics.optimalAccScore !== null) scores.push(metrics.optimalAccScore); - if (metrics.robustnessScore !== null) scores.push(metrics.robustnessScore); - if (metrics.latencyScore !== null) scores.push(metrics.latencyScore); - return scores.reduce((sum, score) => sum + score, 0) / scores.length; - }; - const filteredAndSortedRouters = useMemo(() => { const metricKeyMap = { arena: 'arenaScore', @@ -118,6 +119,64 @@ const LeaderboardPage: React.FC = () => { }); }, [searchTerm, filterType, activeMetric, beta]); + type MetricBestMap = { + dynamicArena: number | null; + accuracy: number | null; + costPer1k: number | null; + optimalSelectionScore: number | null; + optimalCostScore: number | null; + optimalAccScore: number | null; + latencyScore: number | null; + robustnessScore: number | null; + }; + + const bestMetricValues = useMemo(() => { + const best: MetricBestMap = { + dynamicArena: null, + accuracy: null, + costPer1k: null, + optimalSelectionScore: null, + optimalCostScore: null, + optimalAccScore: null, + latencyScore: null, + robustnessScore: null, + }; + + const updateBest = ( + key: keyof MetricBestMap, + value: number | null | undefined, + favorLower = false + ) => { + if (value === null || value === undefined) return; + const current = best[key]; + if (current === null) { + best[key] = value; + return; + } + if ((!favorLower && value > current) || (favorLower && value < current)) { + best[key] = value; + } + }; + + filteredAndSortedRouters.forEach(router => { + updateBest('dynamicArena', router.dynamicArena); + updateBest('accuracy', router.metrics.accuracy); + updateBest('costPer1k', router.metrics.costPer1k, true); + updateBest('optimalSelectionScore', router.metrics.optimalSelectionScore); + updateBest('optimalCostScore', router.metrics.optimalCostScore); + updateBest('optimalAccScore', router.metrics.optimalAccScore); + updateBest('latencyScore', router.metrics.latencyScore); + updateBest('robustnessScore', router.metrics.robustnessScore); + }); + + return best; + }, [filteredAndSortedRouters]); + + const isBestValue = (value: number | null | undefined, best: number | null) => { + if (value === null || value === undefined || best === null) return false; + return Math.abs(value - best) < 0.0001; + }; + // const getRankBadge = (rank: number) => { // if (rank === 1) return 'rank-1'; // if (rank === 2) return 'rank-2'; @@ -151,6 +210,34 @@ const LeaderboardPage: React.FC = () => { } }; + const toggleCompareSelection = (routerId: string) => { + setSelectedCompareIds(prev => { + if (prev.includes(routerId)) { + return prev.filter(id => id !== routerId); + } + if (prev.length >= MAX_COMPARE) { + return prev; + } + return [...prev, routerId]; + }); + }; + + const handleSoloCompare = (routerId: string) => { + setSelectedCompareIds([routerId]); + setIsCompareModalOpen(true); + setModelCardRouter(null); + }; + + const handleRemoveFromCompare = (routerId: string) => { + setSelectedCompareIds(prev => { + const updated = prev.filter(id => id !== routerId); + if (!updated.length) { + setIsCompareModalOpen(false); + } + return updated; + }); + }; + return (
@@ -262,10 +349,19 @@ const LeaderboardPage: React.FC = () => { step={0.01} value={costWeight} onChange={event => { - const value = parseFloat(event.target.value); - const clamped = Math.min(0.95, Math.max(0.05, value)); - setCostWeight(clamped); + let value = parseFloat(event.target.value); + + // Clamp first + value = Math.min(0.95, Math.max(0.05, value)); + + // Snap to default if close enough + if (Math.abs(value - SNAP_TARGET) < SNAP_THRESHOLD) { + value = SNAP_TARGET; + } + + setCostWeight(value); }} + className="beta-slider" />
@@ -282,13 +378,16 @@ const LeaderboardPage: React.FC = () => {
{/* Leaderboard Table */} +
+
+ +
Rank
Router
Affiliation
-
Type
Arena
Accuracy
Cost/1K
@@ -299,29 +398,62 @@ const LeaderboardPage: React.FC = () => {
Robust
+
{filteredAndSortedRouters.map((router, index) => { + const isSelectedForCompare = selectedCompareIds.includes(router.id); const primaryLink = router.websiteUrl || router.paperUrl || router.githubUrl; + const TypeIcon = router.type === 'open-source' ? Unlock : Lock; + const typeLabel = router.type === 'open-source' ? 'Open' : 'Closed'; + const typeDescription = + router.type === 'open-source' ? 'Open-source router' : 'Closed-source router'; return (
+
+ toggleCompareSelection(router.id)} + disabled={!isSelectedForCompare && maxCompareReached} + aria-label={`Select ${router.name} for comparison`} + /> +
{renderRankBadge(index + 1)}
- {primaryLink ? ( - + + + + + + +
+ {(primaryLink || router.githubUrl || router.huggingfaceUrl) && (
+ {primaryLink && ( + + + + )} {router.githubUrl && ( { {router.affiliation}
-
- {router.type} -
-
- {router.dynamicArena.toFixed(1)} + + {router.dynamicArena.toFixed(1)} +
- {router.metrics.accuracy.toFixed(1)} + + {router.metrics.accuracy.toFixed(1)} +
- ${router.metrics.costPer1k.toFixed(2)} + + ${router.metrics.costPer1k.toFixed(2)} +
- - {router.metrics.optimalSelectionScore !== null - ? router.metrics.optimalSelectionScore.toFixed(1) - : '—'} - + {router.metrics.optimalSelectionScore !== null ? ( + + {router.metrics.optimalSelectionScore.toFixed(1)} + + ) : ( + + )}
- - {router.metrics.optimalCostScore !== null - ? router.metrics.optimalCostScore.toFixed(1) - : '—'} - + {router.metrics.optimalCostScore !== null ? ( + + {router.metrics.optimalCostScore.toFixed(1)} + + ) : ( + + )}
- - {router.metrics.optimalAccScore !== null - ? router.metrics.optimalAccScore.toFixed(1) - : '—'} - + {router.metrics.optimalAccScore !== null ? ( + + {router.metrics.optimalAccScore.toFixed(1)} + + ) : ( + + )}
- - {router.metrics.latencyScore !== null - ? router.metrics.latencyScore.toFixed(1) - : '—'} - + {router.metrics.latencyScore !== null ? ( + + {router.metrics.latencyScore.toFixed(1)} + + ) : ( + + )}
- - {router.metrics.robustnessScore !== null - ? router.metrics.robustnessScore.toFixed(1) - : '—'} - + {router.metrics.robustnessScore !== null ? ( + + {router.metrics.robustnessScore.toFixed(1)} + + ) : ( + + )}
-
+
); })}
+
+ {modelCardRouter && ( +
setModelCardRouter(null)}> +
event.stopPropagation()}> +
+

{modelCardRouter.name}

+ +
+ +
+
+ )} + {selectedCompareIds.length > 0 && ( + + )} + {isCompareModalOpen && selectedCompareIds.length > 0 && ( + setIsCompareModalOpen(false)} + onAdd={toggleCompareSelection} + onRemove={handleRemoveFromCompare} + maxSelected={MAX_COMPARE} + /> + )} {/* Visualizations Section with Tabs */}
@@ -676,6 +896,7 @@ platforms.
+ );