Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Use the official ML Build container as the base
FROM us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build:latest

# Switch to root to install system packages
USER root

# Install clang and llvm which are required by XLA/tpu-raiden's bazel configuration
RUN apt-get update && apt-get install -y clang llvm && rm -rf /var/lib/apt/lists/*

# The container will run as the default user inherited from ml-build
105 changes: 105 additions & 0 deletions benchmarks/benchmark_registry.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# proto-file: https://github.com/google-ml-infra/actions/blob/main/benchmarking/proto/benchmark_registry.proto
# proto-message: BenchmarkSuite

benchmarks {
name: "tpu_raiden_h2d_d2h_perf_test_fp32"
description: "FP32 Performance Test for TPU Raiden local H2D and D2H offloading/reloading"
owner: "raiden-dev"

workload {
action: "./ml_actions/actions/workload_executors/bazel"
action_inputs { key: "target" value: "//tpu_raiden/benchmarks:multi_host_perf_test_oss" }
# Flags passed to the test runner (avoid hardcoding role/peer for multi-host dynamic execution)
action_inputs { key: "runtime_flags" value: "--num_blocks=512 --num_layers=8 --parallelism=1 --dtype=float32 --warmup=5 --iters=100" }
}

environment_configs {
id: "tpu-v5e-single-node"
runner_label: "linux-x86-ct5lp-224-8tpu"
container_image: "us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build:latest"
workload_action_inputs { key: "bazel_run_flags" value: "-c opt" }
}

metrics {
name: "d2h_time_sec"
unit: "s"
stats {
stat: MEAN
}
}

metrics {
name: "h2d_time_sec"
unit: "s"
stats {
stat: MEAN
}
}

metrics {
name: "d2h_throughput_gbps"
unit: "Gbps"
stats {
stat: MEAN
}
}

metrics {
name: "h2d_throughput_gbps"
unit: "Gbps"
stats {
stat: MEAN
}
}
}

benchmarks {
name: "tpu_raiden_h2d_d2h_perf_test_bf16"
description: "BF16 Performance Test for TPU Raiden local H2D and D2H offloading/reloading"
owner: "raiden-dev"

workload {
action: "./ml_actions/actions/workload_executors/bazel"
action_inputs { key: "target" value: "//tpu_raiden/benchmarks:multi_host_perf_test_oss" }
# Flags passed to the test runner (avoid hardcoding role/peer for multi-host dynamic execution)
action_inputs { key: "runtime_flags" value: "--num_blocks=512 --num_layers=8 --parallelism=1 --dtype=bfloat16 --warmup=5 --iters=100" }
}

environment_configs {
id: "tpu-v5e-single-node"
runner_label: "linux-x86-ct5lp-224-8tpu"
container_image: "us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build:latest"
workload_action_inputs { key: "bazel_run_flags" value: "-c opt" }
}

metrics {
name: "d2h_time_sec"
unit: "s"
stats {
stat: MEAN
}
}

metrics {
name: "h2d_time_sec"
unit: "s"
stats {
stat: MEAN
}
}

metrics {
name: "d2h_throughput_gbps"
unit: "Gbps"
stats {
stat: MEAN
}
}

metrics {
name: "h2d_throughput_gbps"
unit: "Gbps"
stats {
stat: MEAN
}
}
46 changes: 46 additions & 0 deletions tpu_raiden/benchmarks/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2026 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Copyright 2026 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

load("@rules_python//python:defs.bzl", "py_binary")

package(default_visibility = ["//visibility:public"])

exports_files(["multi_host_perf_test_oss.py"])

py_binary(
name = "multi_host_perf_test_oss",
testonly = True,
srcs = ["multi_host_perf_test_oss.py"],
deps = [
"//tpu_raiden/frameworks/jax:_tpu_raiden_jax",
"@com_google_absl_py//absl:app",
"@com_google_absl_py//absl/flags",
"@jax//jax",
"@pypi//numpy",
],
)
Loading