From 33eaedb26edcc1fd9096bbc0352156110ce2ea84 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Thu, 2 Oct 2025 12:18:30 +0200 Subject: [PATCH 1/7] fix(ci): add mode param --- .github/workflows/ci.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 12ed355..e174912 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -85,9 +85,8 @@ jobs: - name: Run the benchmarks uses: CodSpeedHQ/action@main if: matrix.codspeed-mode != 'off' - env: - CODSPEED_PERF_ENABLED: true with: + mode: ${{ matrix.codspeed-mode }} run: examples/google_benchmark_cmake/build/benchmark_example token: ${{ secrets.CODSPEED_TOKEN }} @@ -124,9 +123,8 @@ jobs: - name: Run the benchmarks uses: CodSpeedHQ/action@main if: matrix.codspeed-mode != 'off' - env: - CODSPEED_PERF_ENABLED: true with: + mode: ${{ matrix.codspeed-mode }} run: bazel run //examples/google_benchmark_bazel:my_benchmark --//core:codspeed_mode=${{ matrix.codspeed-mode }} token: ${{ secrets.CODSPEED_TOKEN }} From c897b7b543714fa190f53c2e6c73fee2540ca038 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Tue, 30 Sep 2025 17:13:48 +0200 Subject: [PATCH 2/7] feat: add support for benchmark markers --- core/include/measurement.hpp | 20 ++++++++++++++++ core/instrument-hooks | 2 +- .../include/benchmark/benchmark.h | 13 +++++++--- google_benchmark/src/benchmark.cc | 24 +++++++++++++++++++ 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/core/include/measurement.hpp b/core/include/measurement.hpp index 89de7be..1644415 100644 --- a/core/include/measurement.hpp +++ b/core/include/measurement.hpp @@ -1,6 +1,7 @@ #ifndef MEASUREMENT_H #define MEASUREMENT_H +#include #include #ifdef _WIN32 #include @@ -49,4 +50,23 @@ ALWAYS_INLINE void measurement_set_executed_benchmark(const std::string& name) { instrument_hooks_executed_benchmark(g_hooks, current_pid, name.c_str()); } +ALWAYS_INLINE uint64_t measurement_current_timestamp() { + return instrument_hooks_current_timestamp(); +} + +ALWAYS_INLINE int8_t measurement_add_marker(uint8_t marker_type, + uint64_t timestamp) { + auto pid = getpid(); + return instrument_hooks_add_marker(g_hooks, pid, marker_type, timestamp); +} + +ALWAYS_INLINE void measurement_add_benchmark_timestamps(uint64_t start, + uint64_t end) { + assert(start <= end); + assert(start != 0 && end != 0); + + measurement_add_marker(MARKER_TYPE_BENCHMARK_START, start); + measurement_add_marker(MARKER_TYPE_BENCHMARK_END, end); +} + #endif // MEASUREMENT_H diff --git a/core/instrument-hooks b/core/instrument-hooks index a9ae7a4..0d3de57 160000 --- a/core/instrument-hooks +++ b/core/instrument-hooks @@ -1 +1 @@ -Subproject commit a9ae7a4d897dbb694cb0355c9994141a7c4a1ab9 +Subproject commit 0d3de57fe46ef97714a41ed028096f6f84fdbd2a diff --git a/google_benchmark/include/benchmark/benchmark.h b/google_benchmark/include/benchmark/benchmark.h index f30977b..0e7d96d 100644 --- a/google_benchmark/include/benchmark/benchmark.h +++ b/google_benchmark/include/benchmark/benchmark.h @@ -949,6 +949,9 @@ class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State { #if defined(CODSPEED_INSTRUMENTATION) || defined(CODSPEED_WALLTIME) codspeed::CodSpeed* codspeed_; #endif +#ifdef CODSPEED_WALLTIME + uint64_t resume_timestamp_; +#endif private: bool started_; @@ -1044,11 +1047,13 @@ struct State::StateIterator { private: friend class State; BENCHMARK_ALWAYS_INLINE - StateIterator() : cached_(0), parent_() {} + StateIterator() : cached_(0), parent_() + {} BENCHMARK_ALWAYS_INLINE explicit StateIterator(State* st) - : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {} + : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) + {} public: BENCHMARK_ALWAYS_INLINE @@ -1063,7 +1068,9 @@ struct State::StateIterator { BENCHMARK_ALWAYS_INLINE bool operator!=(StateIterator const&) const { - if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true; + if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) { + return true; + } #ifdef CODSPEED_INSTRUMENTATION measurement_stop(); #endif diff --git a/google_benchmark/src/benchmark.cc b/google_benchmark/src/benchmark.cc index 91c737a..5a7dbb7 100644 --- a/google_benchmark/src/benchmark.cc +++ b/google_benchmark/src/benchmark.cc @@ -19,6 +19,10 @@ #include "codspeed.h" #include "internal_macros.h" +#ifdef CODSPEED_WALLTIME +#include "measurement.hpp" +#endif + #ifndef BENCHMARK_OS_WINDOWS #if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include @@ -185,6 +189,9 @@ State::State(std::string name, IterationCount max_iters, max_iterations(max_iters), #if defined(CODSPEED_INSTRUMENTATION) || defined(CODSPEED_WALLTIME) codspeed_(codspeed), +#endif +#ifdef CODSPEED_WALLTIME + resume_timestamp_(0), #endif started_(false), finished_(false), @@ -252,9 +259,21 @@ State::State(std::string name, IterationCount max_iters, } void State::PauseTiming() { +#ifdef CODSPEED_WALLTIME + uint64_t pause_timestamp = measurement_current_timestamp(); +#endif + // Add in time accumulated so far BM_CHECK(started_ && !finished_ && !skipped()); timer_->StopTimer(); + +#ifdef CODSPEED_WALLTIME + if (resume_timestamp_ != 0) { + measurement_add_benchmark_timestamps(resume_timestamp_, pause_timestamp); + resume_timestamp_ = 0; + } +#endif + if (perf_counters_measurement_ != nullptr) { std::vector> measurements; if (!perf_counters_measurement_->Stop(measurements)) { @@ -276,6 +295,11 @@ void State::ResumeTiming() { if (perf_counters_measurement_ != nullptr) { perf_counters_measurement_->Start(); } + +#ifdef CODSPEED_WALLTIME + BM_CHECK(resume_timestamp_ == 0); + resume_timestamp_ = measurement_current_timestamp(); +#endif } void State::SkipWithMessage(const std::string& msg) { From 7317f262b38a28231c1cc17de6008870608c49a5 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Thu, 2 Oct 2025 11:36:30 +0200 Subject: [PATCH 3/7] feat: add multithread example --- examples/google_benchmark_bazel/helper.hpp | 1 + .../multithread_bench.hpp | 1 + examples/google_benchmark_cmake/helper.hpp | 36 ++++++++++++++++ examples/google_benchmark_cmake/main.cpp | 1 + .../multithread_bench.hpp | 41 +++++++++++++++++++ 5 files changed, 80 insertions(+) create mode 120000 examples/google_benchmark_bazel/helper.hpp create mode 120000 examples/google_benchmark_bazel/multithread_bench.hpp create mode 100644 examples/google_benchmark_cmake/helper.hpp create mode 100644 examples/google_benchmark_cmake/multithread_bench.hpp diff --git a/examples/google_benchmark_bazel/helper.hpp b/examples/google_benchmark_bazel/helper.hpp new file mode 120000 index 0000000..bc2f02c --- /dev/null +++ b/examples/google_benchmark_bazel/helper.hpp @@ -0,0 +1 @@ +../google_benchmark_cmake/helper.hpp \ No newline at end of file diff --git a/examples/google_benchmark_bazel/multithread_bench.hpp b/examples/google_benchmark_bazel/multithread_bench.hpp new file mode 120000 index 0000000..5709a0d --- /dev/null +++ b/examples/google_benchmark_bazel/multithread_bench.hpp @@ -0,0 +1 @@ +../google_benchmark_cmake/multithread_bench.hpp \ No newline at end of file diff --git a/examples/google_benchmark_cmake/helper.hpp b/examples/google_benchmark_cmake/helper.hpp new file mode 100644 index 0000000..c863208 --- /dev/null +++ b/examples/google_benchmark_cmake/helper.hpp @@ -0,0 +1,36 @@ +#ifndef HELPER_HPP +#define HELPER_HPP + +#ifdef _MSC_VER +#define NOINLINE __declspec(noinline) +#else +#define NOINLINE __attribute__((noinline)) +#endif + +#include + +#include +#include +#include + +NOINLINE static uint64_t recursive_fib(int n) { + if (n <= 1) return n; + return recursive_fib(n - 1) + recursive_fib(n - 2); +} + +NOINLINE static uint64_t expensive_operation() { + // Large memory allocation + std::vector data(1024 * 1024, 42); // 8 MiB allocation + + // Expensive recursive computation that will dominate flamegraph + uint64_t fib_result = recursive_fib(30); + + // More expensive work + uint64_t sum = std::accumulate(data.begin(), data.end(), uint64_t(0)); + benchmark::DoNotOptimize(sum); + benchmark::DoNotOptimize(fib_result); + + return sum + fib_result; +} + +#endif // HELPER_HPP diff --git a/examples/google_benchmark_cmake/main.cpp b/examples/google_benchmark_cmake/main.cpp index 9dd39dd..15e778f 100644 --- a/examples/google_benchmark_cmake/main.cpp +++ b/examples/google_benchmark_cmake/main.cpp @@ -4,6 +4,7 @@ #include "fibonacci_bench.hpp" #include "fixture_bench.hpp" +#include "multithread_bench.hpp" #include "sleep_bench.hpp" #include "template_bench.hpp" diff --git a/examples/google_benchmark_cmake/multithread_bench.hpp b/examples/google_benchmark_cmake/multithread_bench.hpp new file mode 100644 index 0000000..a67b769 --- /dev/null +++ b/examples/google_benchmark_cmake/multithread_bench.hpp @@ -0,0 +1,41 @@ +#pragma once + +#include + +#include +#include + +#include "helper.hpp" + +// Simple multithreaded benchmark: spawn thread, do work, join +static void BM_SimpleMultithread(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + std::atomic work_start{false}; + std::atomic work_done{false}; + + std::thread worker([&]() { + // Wait for the signal to start + while (!work_start.load()) { + std::this_thread::yield(); + } + + // Do some CPU work + benchmark::DoNotOptimize(expensive_operation()); + work_done.store(true); + }); + state.ResumeTiming(); + + // Signal the worker to start + work_start.store(true); + + // Wait for worker to complete + while (!work_done.load()) { + std::this_thread::yield(); + } + + worker.join(); + } +} + +BENCHMARK(BM_SimpleMultithread); From 15b5d12d08e7ea212c7073d31368b548e0ba34ef Mon Sep 17 00:00:00 2001 From: not-matthias Date: Wed, 1 Oct 2025 15:29:46 +0200 Subject: [PATCH 4/7] feat: add bench with `PauseTiming` --- .../pause_timing_bench.hpp | 1 + examples/google_benchmark_cmake/main.cpp | 1 + .../pause_timing_bench.hpp | 40 +++++++++++++++++++ 3 files changed, 42 insertions(+) create mode 120000 examples/google_benchmark_bazel/pause_timing_bench.hpp create mode 100644 examples/google_benchmark_cmake/pause_timing_bench.hpp diff --git a/examples/google_benchmark_bazel/pause_timing_bench.hpp b/examples/google_benchmark_bazel/pause_timing_bench.hpp new file mode 120000 index 0000000..24e68c9 --- /dev/null +++ b/examples/google_benchmark_bazel/pause_timing_bench.hpp @@ -0,0 +1 @@ +../google_benchmark_cmake/pause_timing_bench.hpp \ No newline at end of file diff --git a/examples/google_benchmark_cmake/main.cpp b/examples/google_benchmark_cmake/main.cpp index 15e778f..525cebd 100644 --- a/examples/google_benchmark_cmake/main.cpp +++ b/examples/google_benchmark_cmake/main.cpp @@ -5,6 +5,7 @@ #include "fibonacci_bench.hpp" #include "fixture_bench.hpp" #include "multithread_bench.hpp" +#include "pause_timing_bench.hpp" #include "sleep_bench.hpp" #include "template_bench.hpp" diff --git a/examples/google_benchmark_cmake/pause_timing_bench.hpp b/examples/google_benchmark_cmake/pause_timing_bench.hpp new file mode 100644 index 0000000..9e1c2cc --- /dev/null +++ b/examples/google_benchmark_cmake/pause_timing_bench.hpp @@ -0,0 +1,40 @@ +#pragma once + +#include + +#include +#include + +#include "helper.hpp" + +NOINLINE static uint64_t actual_work() { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + return 42; +} + +static void BM_large_setup_teardown(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + benchmark::DoNotOptimize(expensive_operation()); + state.ResumeTiming(); + + benchmark::DoNotOptimize(actual_work()); + + state.PauseTiming(); + benchmark::DoNotOptimize(expensive_operation()); + state.ResumeTiming(); + } +} +// IMPORTANT: Use fixed iterations, otherwise we'll run for 10+ minutes +BENCHMARK(BM_large_setup_teardown)->Iterations(100); + +static void BM_large_setup(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + benchmark::DoNotOptimize(expensive_operation()); + state.ResumeTiming(); + + benchmark::DoNotOptimize(actual_work()); + } +} +BENCHMARK(BM_large_setup); From eaa5ee2c0150568fc2a2b3470ada70e5468207c9 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Thu, 2 Oct 2025 13:04:04 +0200 Subject: [PATCH 5/7] chore: bump instrument-hooks --- core/instrument-hooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/instrument-hooks b/core/instrument-hooks index 0d3de57..b1e401a 160000 --- a/core/instrument-hooks +++ b/core/instrument-hooks @@ -1 +1 @@ -Subproject commit 0d3de57fe46ef97714a41ed028096f6f84fdbd2a +Subproject commit b1e401a4d031ad308edb22ed59a52253a1ebe924 From 9fd25eeadc7f00a137238c5718c89b18fe42eda8 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Fri, 3 Oct 2025 18:09:13 +0200 Subject: [PATCH 6/7] fix: ignore instrument-hooks warning --- core/BUILD | 2 ++ core/CMakeLists.txt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/core/BUILD b/core/BUILD index 75c1d8f..d1cdef2 100644 --- a/core/BUILD +++ b/core/BUILD @@ -29,6 +29,8 @@ cc_library( "-Wno-unused-parameter", "-Wno-unused-but-set-variable", "-Wno-type-limits", + "-Wno-format", + "-Wno-format-security", ], }), visibility = ["//visibility:public"], diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 6a2bea3..21b2639 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -35,6 +35,8 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") -Wno-unused-parameter -Wno-unused-but-set-variable -Wno-type-limits + -Wno-format + -Wno-format-security ) elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") target_compile_options( From 7c2430e70c2462ae553fed2ed54c4463c4d321a8 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Fri, 3 Oct 2025 18:54:10 +0200 Subject: [PATCH 7/7] fix: remove memory allocations, use fixed iteration count --- examples/google_benchmark_cmake/helper.hpp | 14 ++------------ .../google_benchmark_cmake/pause_timing_bench.hpp | 2 +- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/examples/google_benchmark_cmake/helper.hpp b/examples/google_benchmark_cmake/helper.hpp index c863208..2be319c 100644 --- a/examples/google_benchmark_cmake/helper.hpp +++ b/examples/google_benchmark_cmake/helper.hpp @@ -19,18 +19,8 @@ NOINLINE static uint64_t recursive_fib(int n) { } NOINLINE static uint64_t expensive_operation() { - // Large memory allocation - std::vector data(1024 * 1024, 42); // 8 MiB allocation - - // Expensive recursive computation that will dominate flamegraph - uint64_t fib_result = recursive_fib(30); - - // More expensive work - uint64_t sum = std::accumulate(data.begin(), data.end(), uint64_t(0)); - benchmark::DoNotOptimize(sum); - benchmark::DoNotOptimize(fib_result); - - return sum + fib_result; + // Expensive recursive compuation that will dominate flamegraph + return 42 + recursive_fib(30); } #endif // HELPER_HPP diff --git a/examples/google_benchmark_cmake/pause_timing_bench.hpp b/examples/google_benchmark_cmake/pause_timing_bench.hpp index 9e1c2cc..5687470 100644 --- a/examples/google_benchmark_cmake/pause_timing_bench.hpp +++ b/examples/google_benchmark_cmake/pause_timing_bench.hpp @@ -37,4 +37,4 @@ static void BM_large_setup(benchmark::State& state) { benchmark::DoNotOptimize(actual_work()); } } -BENCHMARK(BM_large_setup); +BENCHMARK(BM_large_setup)->Iterations(100);