diff --git a/src/guidellm/benchmark/outputs/html.py b/src/guidellm/benchmark/outputs/html.py
index 34cf71073..318d9d4de 100644
--- a/src/guidellm/benchmark/outputs/html.py
+++ b/src/guidellm/benchmark/outputs/html.py
@@ -29,7 +29,7 @@
GenerativeBenchmark,
GenerativeBenchmarksReport,
)
-from guidellm.schemas import DistributionSummary
+from guidellm.schemas import DistributionSummary, Percentiles
from guidellm.settings import settings
from guidellm.utils import camelize_str, recursive_key_update
from guidellm.utils.text import load_text
@@ -190,6 +190,24 @@ def percentile_rows(self) -> list[dict[str, str | float]]:
filter(lambda row: row["percentile"] in ["p50", "p90", "p95", "p99"], rows)
)
+ def model_dump(self, **kwargs) -> dict:
+ """
+ Override model_dump to filter duplicate consecutive percentile values.
+
+ This prevents visualization errors when distributions have limited data
+ points causing multiple percentiles to collapse to the same value.
+
+ :param kwargs: Arguments to pass to parent model_dump
+ :return: Dictionary with filtered percentiles
+ """
+ data = super().model_dump(**kwargs)
+
+ if "percentiles" in data and data["percentiles"]:
+ filtered_percentiles = _filter_duplicate_percentiles(data["percentiles"])
+ data["percentiles"] = filtered_percentiles
+
+ return data
+
@classmethod
def from_distribution_summary(
cls, distribution: DistributionSummary
@@ -222,6 +240,39 @@ def _create_html_report(js_data: dict[str, str], output_path: Path) -> Path:
return output_path
+def _filter_duplicate_percentiles(percentiles: dict[str, float]) -> dict[str, float]:
+ """
+ Filter out consecutive duplicate percentile values.
+
+ When distributions have very few data points, multiple percentiles can have
+ the same value, which causes visualization libraries to fail. This function
+ keeps only the largest percentile for consecutive duplicate values, which is
+ more mathematically accurate as higher percentiles have greater statistical
+ significance.
+
+ :param percentiles: Dictionary of percentile names to values
+ :return: Filtered percentiles dictionary with no consecutive duplicates
+ """
+ if not percentiles:
+ return percentiles
+
+ percentile_order = list(Percentiles.model_fields.keys())
+
+ # Iterate in reverse to keep the largest percentile for each value
+ filtered = {}
+ previous_value = None
+
+ for key in reversed(percentile_order):
+ if key in percentiles:
+ current_value = percentiles[key]
+ if previous_value is None or current_value != previous_value:
+ filtered[key] = current_value
+ previous_value = current_value
+
+ # Restore original order
+ return {key: filtered[key] for key in percentile_order if key in filtered}
+
+
def _inject_data(js_data: dict[str, str], html: str) -> str:
"""
Inject JavaScript data into HTML head section.
diff --git a/tests/unit/benchmark/test_html_output.py b/tests/unit/benchmark/test_html_output.py
new file mode 100644
index 000000000..39c46a763
--- /dev/null
+++ b/tests/unit/benchmark/test_html_output.py
@@ -0,0 +1,164 @@
+## WRITTEN BY AI ##
+from guidellm.benchmark.outputs.html import _filter_duplicate_percentiles
+from guidellm.schemas import Percentiles
+
+
+def test_filter_all_same_values():
+ """Test filtering when all percentiles have the same value."""
+ percentiles = {
+ "p001": 15.288091352804853,
+ "p01": 15.288091352804853,
+ "p05": 15.288091352804853,
+ "p10": 15.288091352804853,
+ "p25": 15.288091352804853,
+ "p50": 15.288091352804853,
+ "p75": 15.288091352804853,
+ "p90": 15.288091352804853,
+ "p95": 15.288091352804853,
+ "p99": 15.288091352804853,
+ "p999": 15.288091352804853,
+ }
+
+ filtered = _filter_duplicate_percentiles(percentiles)
+
+ # Should only keep the largest (p999) for mathematical accuracy
+ assert filtered == {"p999": 15.288091352804853}
+
+
+def test_filter_consecutive_duplicates():
+ """Test filtering when some consecutive percentiles have the same value."""
+ percentiles = {
+ "p001": 15.288091352804853,
+ "p01": 15.288091352804853,
+ "p05": 15.288091352804853,
+ "p10": 15.288091352804853,
+ "p25": 15.288091352804853,
+ "p50": 16.41327511776994, # Different value
+ "p75": 16.41327511776994,
+ "p90": 17.03541629998259, # Different value
+ "p95": 17.03541629998259,
+ "p99": 17.03541629998259,
+ "p999": 17.03541629998259,
+ }
+
+ filtered = _filter_duplicate_percentiles(percentiles)
+
+ # Should keep largest of each group for mathematical accuracy
+ assert filtered == {
+ "p25": 15.288091352804853,
+ "p75": 16.41327511776994,
+ "p999": 17.03541629998259,
+ }
+
+
+def test_no_duplicates():
+ """Test that unique values are all preserved."""
+ percentiles = {
+ "p001": 13.181080445834912,
+ "p01": 13.181080445834912, # Same as p001
+ "p05": 13.530595573836457, # Different
+ "p10": 13.843972502554365,
+ "p25": 14.086376978251748,
+ "p50": 14.403258051191058,
+ "p75": 14.738608817056042,
+ "p90": 15.18136631856698,
+ "p95": 15.7213110894772,
+ "p99": 15.7213110894772, # Same as p95
+ "p999": 15.7213110894772, # Same as p99
+ }
+
+ filtered = _filter_duplicate_percentiles(percentiles)
+
+ # Should keep largest of each duplicate group (e.g. p999 instead of p95)
+ assert filtered == {
+ "p01": 13.181080445834912,
+ "p05": 13.530595573836457,
+ "p10": 13.843972502554365,
+ "p25": 14.086376978251748,
+ "p50": 14.403258051191058,
+ "p75": 14.738608817056042,
+ "p90": 15.18136631856698,
+ "p999": 15.7213110894772,
+ }
+
+
+def test_empty_percentiles():
+ """Test with empty percentiles dictionary."""
+ filtered = _filter_duplicate_percentiles({})
+ assert filtered == {}
+
+
+def test_single_percentile():
+ """Test with only one percentile."""
+ percentiles = {"p50": 14.403258051191058}
+ filtered = _filter_duplicate_percentiles(percentiles)
+ assert filtered == {"p50": 14.403258051191058}
+
+
+def test_two_different_values():
+ """Test with two different values."""
+ percentiles = {
+ "p25": 14.086376978251748,
+ "p50": 14.403258051191058,
+ }
+ filtered = _filter_duplicate_percentiles(percentiles)
+ assert filtered == percentiles
+
+
+def test_partial_percentiles():
+ """Test that order is maintained even with partial percentiles."""
+ percentiles = {
+ "p50": 16.41327511776994,
+ "p10": 15.288091352804853,
+ "p90": 17.03541629998259,
+ }
+
+ filtered = _filter_duplicate_percentiles(percentiles)
+
+ # Should maintain order from percentile_order list
+ assert list(filtered.keys()) == ["p10", "p50", "p90"]
+
+
+def test_model_dump_filters_duplicates():
+ """Test that model_dump applies percentile filtering."""
+ from guidellm.benchmark.outputs.html import _TabularDistributionSummary
+
+ # Create a distribution with duplicate percentiles (typical of small datasets)
+ dist = _TabularDistributionSummary(
+ mean=15.5,
+ median=15.288091352804853,
+ mode=15.288091352804853,
+ variance=0.1,
+ std_dev=0.316,
+ min=15.288091352804853,
+ max=17.03541629998259,
+ count=3,
+ total_sum=46.5,
+ percentiles=Percentiles(
+ p001=15.288091352804853,
+ p01=15.288091352804853,
+ p05=15.288091352804853,
+ p10=15.288091352804853,
+ p25=15.288091352804853,
+ p50=16.41327511776994,
+ p75=16.41327511776994,
+ p90=17.03541629998259,
+ p95=17.03541629998259,
+ p99=17.03541629998259,
+ p999=17.03541629998259,
+ ),
+ )
+
+ data = dist.model_dump()
+
+ # Check that percentiles were filtered, keeping largest of each group
+ assert data["percentiles"] == {
+ "p25": 15.288091352804853,
+ "p75": 16.41327511776994,
+ "p999": 17.03541629998259,
+ }
+
+ # Ensure other fields remain unchanged
+ assert data["mean"] == 15.5
+ assert data["median"] == 15.288091352804853
+ assert data["count"] == 3