Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/dashboard/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ <h1>Benchmark Dashboard</h1>
1. CONSTANTS & COLOR PALETTE
================================================================ */
// HSL-based palette: each model gets a distinct base hue; backend offsets keep
// ATOM and ATOM-vLLM visually related but not identical.
// ATOM, ATOM-vLLM, and ATOM-SGLang visually related but not identical.
const MODEL_HUES = {
'DeepSeek-R1-0528': 210, // blue
'DeepSeek-R1-0528-mtp3': 175, // cyan/teal — distinct from base DeepSeek
Expand All @@ -356,6 +356,7 @@ <h1>Benchmark Dashboard</h1>
const BACKEND_HUE_OFFSETS = {
'ATOM': 0,
'ATOM-vLLM': 28,
'ATOM-SGLang': -30,
};
const FALLBACK_HUES = [45, 330, 190, 30]; // yellow, pink, teal, amber
let fallbackHueIdx = 0;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""OOT-specific regression summary built on top of shared summarize helpers."""
"""Regression summary built on top of shared summarize helpers."""

from __future__ import annotations

Expand Down Expand Up @@ -39,7 +39,7 @@ def build_report(

def main() -> int:
parser = argparse.ArgumentParser(
description="Print only the OOT regression report without the full results table"
description="Print only the regression report without the full results table"
)
parser.add_argument(
"--result-dir",
Expand Down Expand Up @@ -88,4 +88,4 @@ def main() -> int:


if __name__ == "__main__":
raise SystemExit(main())
raise SystemExit(main())
19 changes: 12 additions & 7 deletions .github/scripts/oot_benchmark_summary.py → .github/scripts/plugin_benchmark_summary.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""Generate a resilient OOT benchmark summary table.
"""Generate a resilient benchmark summary table.

This script is intentionally tolerant of partial or total benchmark failure:
- missing result JSON => case is marked FAIL
Expand Down Expand Up @@ -143,12 +143,12 @@ def _build_rows(result_dir: Path, matrix_payload: dict) -> list[dict]:
return rows


def _print_markdown_table(rows: list[dict], run_url: str | None) -> None:
def _print_markdown_table(rows: list[dict], run_url: str | None, title: str) -> None:
total_cases = len(rows)
passed_cases = sum(1 for row in rows if row["status"] == "PASS")
failed_cases = total_cases - passed_cases

print("## OOT Benchmark Summary\n")
print(f"## {title}\n")
if run_url:
print(f"Run: {run_url}\n")
print(
Expand Down Expand Up @@ -185,11 +185,11 @@ def _print_markdown_table(rows: list[dict], run_url: str | None) -> None:


def main() -> int:
parser = argparse.ArgumentParser(description="Summarize OOT benchmark results")
parser = argparse.ArgumentParser(description="Summarize benchmark results")
parser.add_argument(
"--result-dir",
required=True,
help="Directory containing downloaded OOT benchmark JSON files",
help="Directory containing downloaded benchmark JSON files",
)
parser.add_argument(
"--matrix-json",
Expand All @@ -206,12 +206,17 @@ def main() -> int:
default=None,
help="Optional path to write a structured summary report",
)
parser.add_argument(
"--title",
default="Benchmark Summary",
help="Title for the markdown report",
)
args = parser.parse_args()

matrix_payload = json.loads(args.matrix_json)
rows = _build_rows(Path(args.result_dir), matrix_payload)

_print_markdown_table(rows, args.run_url)
_print_markdown_table(rows, args.run_url, args.title)

if args.output_json:
report = {
Expand All @@ -229,4 +234,4 @@ def main() -> int:


if __name__ == "__main__":
raise SystemExit(main())
raise SystemExit(main())
33 changes: 20 additions & 13 deletions ...hub/scripts/oot_benchmark_to_dashboard.py → .../scripts/plugin_benchmark_to_dashboard.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""Convert OOT benchmark JSON results to github-action-benchmark input."""
"""Convert benchmark JSON results to github-action-benchmark input."""

from __future__ import annotations

Expand All @@ -9,7 +9,6 @@
from pathlib import Path

VARIANT_RE = re.compile(r"-(mtp\d*)-")
DEFAULT_BACKEND = "ATOM-vLLM"


def derive_model_name(result_path: Path, payload: dict) -> str:
Expand Down Expand Up @@ -59,11 +58,11 @@ def is_dashboard_publish_allowed(payload: dict) -> bool:
return str(publish_flag).strip().lower() not in {"0", "false", "no"}


def build_entries(result_dir: Path, run_url: str | None) -> list[dict]:
def build_entries(result_dir: Path, run_url: str | None, default_backend: str) -> list[dict]:
entries: list[dict] = []

for result_path in sorted(result_dir.glob("*.json")):
if result_path.name == "regression_report.json":
if result_path.name == "regression_report.json" or result_path.name.endswith("_benchmark_summary.json"):
continue

try:
Expand All @@ -81,20 +80,23 @@ def build_entries(result_dir: Path, run_url: str | None) -> list[dict]:
isl = int(payload.get("random_input_len", 0))
osl = int(payload.get("random_output_len", 0))
conc = int(payload.get("max_concurrency", 0))
label_prefix = f"{DEFAULT_BACKEND}::{model} {isl}/{osl} c={conc}"
label_prefix = f"{default_backend}::{model} {isl}/{osl} c={conc}"
extra = f"Run: {run_url}" if run_url else ""
gpu_name = payload.get("gpu_name", "")
gpu_vram = payload.get("gpu_vram_gb", 0)
rocm_ver = payload.get("rocm_version", "")
oot_image_tag = payload.get("oot_image_tag", "")

# Support both OOT and SGLang image tag fields
image_tag = payload.get("oot_image_tag", payload.get("sglang_image_tag", ""))

if gpu_name:
extra += f" | GPU: {gpu_name}"
if gpu_vram:
extra += f" | VRAM: {gpu_vram}GB"
if rocm_ver:
extra += f" | ROCm: {rocm_ver}"
if oot_image_tag:
extra += f" | Docker: {oot_image_tag}"
if image_tag:
extra += f" | Docker: {image_tag}"
extra = extra or None

append_metric(
Expand Down Expand Up @@ -145,26 +147,31 @@ def build_entries(result_dir: Path, run_url: str | None) -> list[dict]:

def main() -> None:
parser = argparse.ArgumentParser(
description="Convert OOT benchmark JSON files to github-action-benchmark input"
description="Convert benchmark JSON files to github-action-benchmark input"
)
parser.add_argument(
"result_dir", help="Directory containing OOT benchmark JSON files"
"result_dir", help="Directory containing benchmark JSON files"
)
parser.add_argument("--output", required=True, help="Output JSON path")
parser.add_argument(
"--run-url",
default=None,
help="Optional GitHub Actions run URL added to each metric as extra metadata",
)
parser.add_argument(
"--default-backend",
required=True,
help="Default backend name (e.g. ATOM-SGLang or ATOM-vLLM)",
)
args = parser.parse_args()

result_dir = Path(args.result_dir)
entries = build_entries(result_dir, args.run_url)
entries = build_entries(result_dir, args.run_url, args.default_backend)

output_path = Path(args.output)
output_path.write_text(json.dumps(entries, indent=2), encoding="utf-8")
print(f"Generated {len(entries)} OOT entries at {output_path}")
print(f"Generated {len(entries)} entries at {output_path}")


if __name__ == "__main__":
main()
main()
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import json
from pathlib import Path

SKIP_FILENAMES = {"regression_report.json", "oot_benchmark_summary.json"}


def is_dashboard_publish_allowed(payload: dict) -> bool:
publish_flag = payload.get("dashboard_publish_allowed")
Expand All @@ -22,7 +20,7 @@ def is_dashboard_publish_allowed(payload: dict) -> bool:
def validate_result_dir(result_dir: Path) -> bool:
has_valid_result = False
for path in result_dir.rglob("*.json"):
if path.name in SKIP_FILENAMES:
if path.name == "regression_report.json" or path.name.endswith("_benchmark_summary.json"):
continue
try:
payload = json.loads(path.read_text(encoding="utf-8"))
Expand All @@ -38,7 +36,7 @@ def validate_result_dir(result_dir: Path) -> bool:

def main() -> int:
parser = argparse.ArgumentParser(
description="Validate dashboard-eligible OOT benchmark artifacts"
description="Validate dashboard-eligible benchmark artifacts"
)
parser.add_argument("result_dir", help="Directory containing downloaded artifacts")
args = parser.parse_args()
Expand All @@ -47,4 +45,4 @@ def main() -> int:


if __name__ == "__main__":
raise SystemExit(main())
raise SystemExit(main())
39 changes: 24 additions & 15 deletions .github/workflows/atom-sglang-benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -832,11 +832,12 @@ jobs:
BENCHMARK_MATRIX: ${{ needs.build-benchmark-matrix.outputs.benchmark_matrix }}
RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
python3 .github/scripts/sglang_benchmark_summary.py \
python3 .github/scripts/plugin_benchmark_summary.py \
--result-dir . \
--matrix-json "${BENCHMARK_MATRIX}" \
--run-url "${RUN_URL}" \
--output-json sglang_benchmark_summary.json \
--title "SGLang Benchmark Summary" \
>> "$GITHUB_STEP_SUMMARY"

- name: Read summary stats
Expand Down Expand Up @@ -888,7 +889,7 @@ jobs:
continue
fi

if python3 .github/scripts/sglang_benchmark_validate_baseline.py /tmp/baseline_candidate; then
if python3 .github/scripts/plugin_benchmark_validate_baseline.py /tmp/baseline_candidate; then
mv /tmp/baseline_candidate /tmp/baseline
BASELINE_DIR="/tmp/baseline"
echo "Using baseline from run #$PREV_RUN_ID"
Expand Down Expand Up @@ -918,7 +919,7 @@ jobs:
>> "$GITHUB_STEP_SUMMARY"
fi

python3 .github/scripts/sglang_benchmark_regression.py \
python3 .github/scripts/plugin_benchmark_regression.py \
--result-dir . \
$BASELINE_ARG \
--output-json regression_report.json \
Expand All @@ -945,10 +946,11 @@ jobs:
- name: Transform results for benchmark dashboard
if: needs.resolve-atom-source.outputs.publish_to_dashboard == 'true' && steps.summary-stats.outputs.passed_cases != '0'
run: |
python3 .github/scripts/sglang_benchmark_to_dashboard.py \
python3 .github/scripts/plugin_benchmark_to_dashboard.py \
. \
--output benchmark-action-input.json \
--run-url "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
--run-url "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
--default-backend "ATOM-SGLang"

- name: Store benchmark result to dashboard
if: needs.resolve-atom-source.outputs.publish_to_dashboard == 'true' && steps.summary-stats.outputs.passed_cases != '0'
Expand All @@ -965,19 +967,26 @@ jobs:
max-items-in-chart: 90
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Deploy custom dashboard to gh-pages
- name: Push dashboard data to gh-pages
if: needs.resolve-atom-source.outputs.publish_to_dashboard == 'true' && steps.summary-stats.outputs.passed_cases != '0'
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
CURRENT_SHA=$(git rev-parse HEAD)
cp .github/dashboard/index.html /tmp/dashboard_index.html
cp docs/assets/atom_logo.png /tmp/dashboard_logo.png
git fetch origin gh-pages
git checkout gh-pages
cp /tmp/dashboard_index.html benchmark-dashboard/index.html
cp /tmp/dashboard_logo.png benchmark-dashboard/atom_logo.png
git add benchmark-dashboard/
git diff --cached --quiet || git commit -m "Update SGLang benchmark data and dashboard"
git push origin gh-pages
git checkout "$CURRENT_SHA"

if [ ! -f benchmark-dashboard/data.js ]; then
echo "::error::benchmark-dashboard/data.js was not produced by github-action-benchmark"
exit 1
fi

cp benchmark-dashboard/data.js /tmp/sglang-benchmark-data.js

# Rebuild the publish branch from origin/gh-pages so only dashboard data is updated.
git checkout -B gh-pages-data-only origin/gh-pages
mkdir -p benchmark-dashboard
cp /tmp/sglang-benchmark-data.js benchmark-dashboard/data.js

git add benchmark-dashboard/data.js
git diff --cached --quiet || git commit -m "Update SGLang benchmark dashboard data"
git push origin HEAD:gh-pages
12 changes: 7 additions & 5 deletions .github/workflows/atom-vllm-benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -973,11 +973,12 @@ jobs:
BENCHMARK_MATRIX: ${{ needs.build-benchmark-matrix.outputs.benchmark_matrix }}
RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
python3 .github/scripts/oot_benchmark_summary.py \
python3 .github/scripts/plugin_benchmark_summary.py \
--result-dir . \
--matrix-json "${BENCHMARK_MATRIX}" \
--run-url "${RUN_URL}" \
--output-json oot_benchmark_summary.json \
--title "ATOM-vLLM Benchmark Summary" \
>> "$GITHUB_STEP_SUMMARY"

- name: Read summary stats
Expand Down Expand Up @@ -1029,7 +1030,7 @@ jobs:
continue
fi

if python3 .github/scripts/oot_benchmark_validate_baseline.py /tmp/baseline_candidate; then
if python3 .github/scripts/plugin_benchmark_validate_baseline.py /tmp/baseline_candidate; then
mv /tmp/baseline_candidate /tmp/baseline
BASELINE_DIR="/tmp/baseline"
echo "Using baseline from run #$PREV_RUN_ID"
Expand Down Expand Up @@ -1059,7 +1060,7 @@ jobs:
>> "$GITHUB_STEP_SUMMARY"
fi

python3 .github/scripts/oot_benchmark_regression.py \
python3 .github/scripts/plugin_benchmark_regression.py \
--result-dir . \
$BASELINE_ARG \
--output-json regression_report.json \
Expand All @@ -1086,10 +1087,11 @@ jobs:
- name: Transform results for benchmark dashboard
if: needs.resolve-atom-source.outputs.publish_to_dashboard == 'true' && steps.summary-stats.outputs.passed_cases != '0'
run: |
python3 .github/scripts/oot_benchmark_to_dashboard.py \
python3 .github/scripts/plugin_benchmark_to_dashboard.py \
. \
--output benchmark-action-input.json \
--run-url "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
--run-url "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
--default-backend "ATOM-vLLM"

- name: Store benchmark result to dashboard
if: needs.resolve-atom-source.outputs.publish_to_dashboard == 'true' && steps.summary-stats.outputs.passed_cases != '0'
Expand Down
Loading
Loading