diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 348fe3be79548..57fd195f48bb2 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -61,8 +61,8 @@ def git_url(self) -> str: return "https://github.com/intel/compute-benchmarks.git" def git_hash(self) -> str: - # Nov 17, 2025 - return "932ae79f7cca7e156285fc10a59610927c769e89" + # Dec 17, 2025 + return "420549188cd8900c27cf9b04fd859ebe81876a99" def setup(self) -> None: if options.sycl is None: @@ -182,7 +182,7 @@ def benchmarks(self) -> list[Benchmark]: GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels) ) - # Add ULLS benchmarks + # Add ULLS benchmarks for runtime in list(RUNTIMES): if runtime == RUNTIMES.SYCL: benches.append( @@ -355,6 +355,36 @@ def createTorchMultiQueueBench(variant_name: str, **kwargs): ), ] + # Add TorchSlmSize benchmarks + for runtime in filter(lambda x: x != RUNTIMES.UR, RUNTIMES): + + def createTorchSlmSizeBench(variant_name: str, **kwargs): + return TorchSlmSize( + self, + runtime, + variant_name, + PROFILERS.TIMER, + **{**kwargs, "warmupIterations": 1}, + ) + + benches += [ + createTorchSlmSizeBench( + "small", + batchSize=512, + slmNum=1, + ), + createTorchSlmSizeBench( + "medium", + batchSize=512, + slmNum=1024, + ), + createTorchSlmSizeBench( + "max", + batchSize=512, + slmNum=-1, + ), + ] + # Add UR-specific benchmarks benches += [ # TODO: multithread_benchmark_ur fails with segfault @@ -810,25 +840,31 @@ def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: return [f"--{k}={v}" for k, v in self._rr_params.items()] -class TorchMultiQueue(ComputeBenchmark): +class TorchBenchmark(ComputeBenchmark): def __init__( - self, suite, runtime: RUNTIMES, variant_name: str, profiler_type, **kwargs + self, + suite, + runtime: RUNTIMES, + bench_name: str, + variant_name: str, + profiler_type, + **kwargs, ): self._variant_name = variant_name - self._smq_params = kwargs + self._torch_params = kwargs self._iterations_regular = 1000 self._iterations_trace = 10 super().__init__( suite, f"torch_benchmark_{runtime.value}", - "KernelSubmitMultiQueue", + bench_name, runtime, profiler_type, ) def name(self): ret = [] - for k, v in self._smq_params.items(): + for k, v in self._torch_params.items(): ret.append(f"{k} {v}") ret.sort() return self._bench_name + " " + ", ".join(ret) @@ -848,10 +884,38 @@ def _supported_runtimes(self) -> list[RUNTIMES]: def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: iters = self._get_iters(run_trace) return [f"--iterations={iters}"] + [ - f"--{k}={v}" for k, v in self._smq_params.items() + f"--{k}={v}" for k, v in self._torch_params.items() ] +class TorchMultiQueue(TorchBenchmark): + def __init__( + self, suite, runtime: RUNTIMES, variant_name: str, profiler_type, **kwargs + ): + super().__init__( + suite, + runtime, + "KernelSubmitMultiQueue", + variant_name, + profiler_type, + **kwargs, + ) + + +class TorchSlmSize(TorchBenchmark): + def __init__( + self, suite, runtime: RUNTIMES, variant_name: str, profiler_type, **kwargs + ): + super().__init__( + suite, + runtime, + "KernelSubmitSlmSize", + variant_name, + profiler_type, + **kwargs, + ) + + class QueueInOrderMemcpy(ComputeBenchmark): def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type): self._is_copy_only = isCopyOnly diff --git a/devops/scripts/benchmarks/tests/test_integration.py b/devops/scripts/benchmarks/tests/test_integration.py index 6b546f80cb3a4..f35b06ed1fc1e 100644 --- a/devops/scripts/benchmarks/tests/test_integration.py +++ b/devops/scripts/benchmarks/tests/test_integration.py @@ -3,6 +3,7 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +import argparse import json import os import shutil @@ -15,6 +16,7 @@ sys.path.append(f"{os.path.dirname(__file__)}/../") from utils.workdir_version import INTERNAL_WORKDIR_VERSION +VERBOSE_LOGS = False DataJson = namedtuple("DataJson", ["runs", "metadata", "tags", "names"]) DataJsonRun = namedtuple("DataJsonRun", ["name", "results"]) @@ -65,7 +67,7 @@ def run_main(self, *args): # TODO: not yet tested: "--detect-version", "sycl,compute_runtime" - procesResult = subprocess.run( + proc = subprocess.run( [ "./devops/scripts/benchmarks/main.py", self.WORKDIR_DIR, @@ -86,13 +88,14 @@ def run_main(self, *args): "--stddev-threshold", "999999999.9", "--exit-on-failure", + "--verbose" if VERBOSE_LOGS else "--log-level=info", *args, ], capture_output=True, ) - print("MAIN_PY_STDOUT:\n" + procesResult.stdout.decode()) - print("MAIN_PY_STDERR:\n" + procesResult.stderr.decode()) - return procesResult.returncode + print("MAIN_PY_STDOUT:\n" + proc.stdout.decode() if proc.stdout else "") + print("MAIN_PY_STDERR:\n" + proc.stderr.decode() if proc.stderr else "") + return proc.returncode def get_output(self): with open(os.path.join(self.OUTPUT_DIR, "data.json")) as f: @@ -136,9 +139,6 @@ def get_output(self): ) -# add "--verbose" for debug logs - - class TestE2E(unittest.TestCase): def setUp(self): # Load test data @@ -194,6 +194,11 @@ def test_torch_l0(self): "KernelSubmitMultiQueue large", {"pytorch", "L0"}, ) + self._checkCase( + "torch_benchmark_l0 batchSize 512, slmNum 1, warmupIterations 1", + "KernelSubmitSlmSize small", + {"pytorch", "L0"}, + ) def test_torch_sycl(self): self._checkCase( @@ -201,6 +206,11 @@ def test_torch_sycl(self): "KernelSubmitMultiQueue medium", {"pytorch", "SYCL"}, ) + self._checkCase( + "torch_benchmark_sycl batchSize 512, slmNum -1, warmupIterations 1", + "KernelSubmitSlmSize max", + {"pytorch", "SYCL"}, + ) def test_torch_syclpreview(self): self._checkCase( @@ -208,6 +218,22 @@ def test_torch_syclpreview(self): "KernelSubmitMultiQueue small", {"pytorch", "SYCL"}, ) + self._checkCase( + "torch_benchmark_syclpreview batchSize 512, slmNum 1024, warmupIterations 1", + "KernelSubmitSlmSize medium", + {"pytorch", "SYCL"}, + ) + if __name__ == "__main__": + parser = argparse.ArgumentParser(description="SYCL's benchmark test framework") + parser.add_argument( + "--verbose", + help="Set benchmark framework's logging level to DEBUG.", + action="store_true", + ) + + args = parser.parse_args() + VERBOSE_LOGS = args.verbose + unittest.main()