|
| 1 | +/* |
| 2 | + This file is part of libhttpserver |
| 3 | + Copyright (C) 2011-2026 Sebastiano Merlino |
| 4 | +
|
| 5 | + This library is free software; you can redistribute it and/or |
| 6 | + modify it under the terms of the GNU Lesser General Public |
| 7 | + License as published by the Free Software Foundation; either |
| 8 | + version 2.1 of the License, or (at your option) any later version. |
| 9 | +
|
| 10 | + This library is distributed in the hope that it will be useful, |
| 11 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | + Lesser General Public License for more details. |
| 14 | +
|
| 15 | + You should have received a copy of the GNU Lesser General Public |
| 16 | + License along with this library; if not, write to the Free Software |
| 17 | + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 |
| 18 | + USA |
| 19 | +*/ |
| 20 | +// TASK-053 -- v2 dispatch performance acceptance harness. |
| 21 | +// |
| 22 | +// After step 3 of TASK-053 cut `resolve_resource_for_request` over to |
| 23 | +// `lookup_v2()` and removed the v1 fallback, the dispatch hot path is |
| 24 | +// the cache -> exact -> radix -> regex pipeline plus the per-call LRU |
| 25 | +// promotion. The deferred-backlog plan (TASK-053 §"Step 4 -- Bench") |
| 26 | +// fixes two ceilings on that pipeline: |
| 27 | +// |
| 28 | +// (a) cache-hit ceiling -- 200 ns / lookup (median) |
| 29 | +// (b) radix tier ceiling -- 5 us / lookup for 8-segment paths |
| 30 | +// (median, cold-cache) |
| 31 | +// |
| 32 | +// The ceilings are generous (~5-10x the cost on modern x86_64 / arm64 |
| 33 | +// release builds) to absorb CI runner noise, frequency scaling and |
| 34 | +// virtualized hosts without missing a genuine regression. The point |
| 35 | +// is to lock in *order of magnitude*: the v1 dispatch path was |
| 36 | +// dominated by std::regex_match across the full regex map at every |
| 37 | +// miss; v2 is dominated by a single std::unordered_map probe at the |
| 38 | +// cache tier and a per-segment trie walk at the radix tier. If a |
| 39 | +// future change reintroduces O(N) regex scanning on the cache-hit |
| 40 | +// path, (a)'s ceiling catches it; if a future change makes the radix |
| 41 | +// walk allocate-per-segment, (b)'s ceiling catches it. |
| 42 | +// |
| 43 | +// Wired into `make bench` via `bench_targets` in test/Makefile.am; |
| 44 | +// NOT part of `make check`. Sanitizer builds skip with exit 0 so |
| 45 | +// `make bench` stays green on sanitizer hosts. |
| 46 | + |
| 47 | +#define HTTPSERVER_COMPILATION 1 // unlock webserver_test_access |
| 48 | + |
| 49 | +#include <algorithm> |
| 50 | +#include <chrono> |
| 51 | +#include <cstdio> |
| 52 | +#include <cstdlib> |
| 53 | +#include <memory> |
| 54 | +#include <string> |
| 55 | +#include <vector> |
| 56 | + |
| 57 | +#include "httpserver/create_webserver.hpp" |
| 58 | +#include "httpserver/http_resource.hpp" |
| 59 | +#include "httpserver/http_response.hpp" |
| 60 | +#include "httpserver/http_utils.hpp" |
| 61 | +#include "httpserver/webserver.hpp" |
| 62 | +#include "httpserver/detail/webserver_impl.hpp" |
| 63 | + |
| 64 | +namespace hs = httpserver; |
| 65 | + |
| 66 | +// Defeat dead-store elimination on the lookup_result. |
| 67 | +template <typename T> |
| 68 | +[[gnu::always_inline]] inline void do_not_optimize(T const& value) { |
| 69 | +#if defined(__GNUC__) || defined(__clang__) |
| 70 | + asm volatile("" : : "r,m"(&value) : "memory"); |
| 71 | +#else |
| 72 | + volatile const void* sink = static_cast<const void*>(&value); |
| 73 | + (void)sink; |
| 74 | +#endif |
| 75 | +} |
| 76 | + |
| 77 | +static constexpr bool kSanitizerBuild = |
| 78 | +#if defined(__SANITIZE_ADDRESS__) \ |
| 79 | + || defined(__SANITIZE_THREAD__) \ |
| 80 | + || defined(__SANITIZE_MEMORY__) \ |
| 81 | + || defined(__SANITIZE_HWADDRESS__) |
| 82 | + true |
| 83 | +#elif defined(__has_feature) |
| 84 | +# if __has_feature(address_sanitizer) \ |
| 85 | + || __has_feature(thread_sanitizer) \ |
| 86 | + || __has_feature(memory_sanitizer) \ |
| 87 | + || __has_feature(undefined_behavior_sanitizer) |
| 88 | + true |
| 89 | +# else |
| 90 | + false |
| 91 | +# endif |
| 92 | +#else |
| 93 | + false |
| 94 | +#endif |
| 95 | + ; // NOLINT(whitespace/semicolon) |
| 96 | + |
| 97 | +namespace { |
| 98 | + |
| 99 | +// Ceilings from the TASK-053 deferred-backlog plan, step 4. |
| 100 | +constexpr double kCacheHitNsCeiling = 200.0; // ns/lookup, median |
| 101 | +constexpr double kRadixUsCeiling = 5.0; // us/lookup, median |
| 102 | +constexpr double kRadixNsCeiling = kRadixUsCeiling * 1000.0; |
| 103 | + |
| 104 | +class noop_resource : public hs::http_resource { |
| 105 | + public: |
| 106 | + hs::http_response render_get(const hs::http_request&) override { |
| 107 | + return hs::http_response::string("ok"); |
| 108 | + } |
| 109 | +}; |
| 110 | + |
| 111 | +double sort_and_median(std::vector<double>& v) { |
| 112 | + std::sort(v.begin(), v.end()); |
| 113 | + return v[v.size() / 2]; |
| 114 | +} |
| 115 | + |
| 116 | +double p99_of_sorted(std::vector<double>& v) { |
| 117 | + // Precondition: v is sorted ascending (call sort_and_median first). |
| 118 | + const std::size_t idx = (v.size() * 99) / 100; |
| 119 | + return v[std::min(idx, v.size() - 1)]; |
| 120 | +} |
| 121 | + |
| 122 | +// Measure a no-arg lambda's median ns/call over OUTER rounds of |
| 123 | +// INNER iterations each. Prints a one-line summary. |
| 124 | +template <typename F> |
| 125 | +double measure_median_ns(const char* label, F op, |
| 126 | + std::size_t outer, std::size_t inner) { |
| 127 | + using clock = std::chrono::steady_clock; |
| 128 | + std::vector<double> samples_ns; |
| 129 | + samples_ns.reserve(outer); |
| 130 | + |
| 131 | + // Warmup: prime instruction cache + branch predictor. The first |
| 132 | + // outer round handles thermal warmup at INNER scale; this short |
| 133 | + // pre-loop just gets the trampoline hot. |
| 134 | + for (std::size_t i = 0; i < 10'000; ++i) { |
| 135 | + op(); |
| 136 | + } |
| 137 | + |
| 138 | + for (std::size_t r = 0; r < outer; ++r) { |
| 139 | + const auto t0 = clock::now(); |
| 140 | + for (std::size_t i = 0; i < inner; ++i) { |
| 141 | + op(); |
| 142 | + } |
| 143 | + const auto t1 = clock::now(); |
| 144 | + const double ns_per_call = |
| 145 | + std::chrono::duration<double, std::nano>(t1 - t0).count() / inner; |
| 146 | + samples_ns.push_back(ns_per_call); |
| 147 | + } |
| 148 | + |
| 149 | + const double min_ns = |
| 150 | + *std::min_element(samples_ns.begin(), samples_ns.end()); |
| 151 | + const double max_ns = |
| 152 | + *std::max_element(samples_ns.begin(), samples_ns.end()); |
| 153 | + const double median = sort_and_median(samples_ns); |
| 154 | + const double p99 = p99_of_sorted(samples_ns); |
| 155 | + std::printf(" %s: median=%.3fns p99=%.3fns (min=%.3fns max=%.3fns)\n", |
| 156 | + label, median, p99, min_ns, max_ns); |
| 157 | + return median; |
| 158 | +} |
| 159 | + |
| 160 | +// Build a webserver with the bench routes registered. We never start |
| 161 | +// the daemon (no MHD, no socket) -- the bench exercises only the |
| 162 | +// route-table data path through webserver_impl::lookup_v2(). |
| 163 | +std::unique_ptr<hs::webserver> make_bench_webserver() { |
| 164 | + auto ws = std::make_unique<hs::webserver>( |
| 165 | + hs::create_webserver(8080) |
| 166 | + .start_method(hs::http::http_utils::INTERNAL_SELECT)); |
| 167 | + // (a) cache-hit target: a single exact path repeatedly looked up. |
| 168 | + ws->register_path("/api/v1/users/me", std::make_shared<noop_resource>()); |
| 169 | + // (b) radix-tier target: an 8-segment parameterized path. Each |
| 170 | + // segment is a wildcard so the lookup walks all 8 levels of the |
| 171 | + // trie under wildcard children. |
| 172 | + ws->register_path( |
| 173 | + "/a/{p1}/b/{p2}/c/{p3}/d/{p4}", |
| 174 | + std::make_shared<noop_resource>()); |
| 175 | + return ws; |
| 176 | +} |
| 177 | + |
| 178 | +} // namespace |
| 179 | + |
| 180 | +int main() { |
| 181 | + if constexpr (kSanitizerBuild) { |
| 182 | + std::printf("bench_route_lookup: skipped (sanitizer build " |
| 183 | + "would distort ns/call)\n"); |
| 184 | + return 0; |
| 185 | + } |
| 186 | + |
| 187 | + // OUTER=51 gives a stable median (26th value when sorted) and a |
| 188 | + // meaningful p99 (50th value). The outer rounds cost < 100 ms |
| 189 | + // total at the inner counts below. |
| 190 | + constexpr std::size_t OUTER = 51; |
| 191 | + constexpr std::size_t INNER_CACHE = 1'000'000; // cache hit is ~ns |
| 192 | + constexpr std::size_t INNER_RADIX = 100'000; // radix walk is ~us |
| 193 | + |
| 194 | + auto ws = make_bench_webserver(); |
| 195 | + auto* impl = hs::webserver_test_access::impl(*ws); |
| 196 | + |
| 197 | + // ----- (a) cache-hit ----- |
| 198 | + // First call warms the cache. Subsequent calls hit it. |
| 199 | + { |
| 200 | + auto warm = impl->lookup_v2( |
| 201 | + hs::http_method::get, std::string("/api/v1/users/me")); |
| 202 | + do_not_optimize(warm); |
| 203 | + } |
| 204 | + |
| 205 | + std::printf("bench_route_lookup (a): cache-hit (/api/v1/users/me)\n"); |
| 206 | + const double median_cache_ns = measure_median_ns( |
| 207 | + "cache-hit", |
| 208 | + [&]() { |
| 209 | + auto r = impl->lookup_v2( |
| 210 | + hs::http_method::get, std::string("/api/v1/users/me")); |
| 211 | + do_not_optimize(r); |
| 212 | + }, |
| 213 | + OUTER, INNER_CACHE); |
| 214 | + |
| 215 | + // ----- (b) radix tier, 8 segments ----- |
| 216 | + // Vary the captured-param values across iterations so the cache |
| 217 | + // does NOT serve the lookup. The PATHS table holds 16 distinct |
| 218 | + // 8-segment URLs; the modulo index rotates through them. The |
| 219 | + // cache is sized so that 16 entries blow it out repeatedly, |
| 220 | + // forcing the radix walk on every lookup. (The path string view |
| 221 | + // dominates the visible work even if the LRU happens to keep |
| 222 | + // one or two warm; the ceiling absorbs that.) |
| 223 | + static const std::vector<std::string> kPaths = { |
| 224 | + "/a/u01/b/v01/c/w01/d/x01", "/a/u02/b/v02/c/w02/d/x02", |
| 225 | + "/a/u03/b/v03/c/w03/d/x03", "/a/u04/b/v04/c/w04/d/x04", |
| 226 | + "/a/u05/b/v05/c/w05/d/x05", "/a/u06/b/v06/c/w06/d/x06", |
| 227 | + "/a/u07/b/v07/c/w07/d/x07", "/a/u08/b/v08/c/w08/d/x08", |
| 228 | + "/a/u09/b/v09/c/w09/d/x09", "/a/u10/b/v10/c/w10/d/x10", |
| 229 | + "/a/u11/b/v11/c/w11/d/x11", "/a/u12/b/v12/c/w12/d/x12", |
| 230 | + "/a/u13/b/v13/c/w13/d/x13", "/a/u14/b/v14/c/w14/d/x14", |
| 231 | + "/a/u15/b/v15/c/w15/d/x15", "/a/u16/b/v16/c/w16/d/x16", |
| 232 | + }; |
| 233 | + std::size_t idx = 0; |
| 234 | + std::printf("bench_route_lookup (b): radix tier, 8-segment " |
| 235 | + "parameterized path\n"); |
| 236 | + const double median_radix_ns = measure_median_ns( |
| 237 | + "radix-8seg", |
| 238 | + [&]() { |
| 239 | + auto r = impl->lookup_v2( |
| 240 | + hs::http_method::get, kPaths[idx]); |
| 241 | + do_not_optimize(r); |
| 242 | + idx = (idx + 1) % kPaths.size(); |
| 243 | + }, |
| 244 | + OUTER, INNER_RADIX); |
| 245 | + |
| 246 | + // ----- Summary + gates ----- |
| 247 | + std::printf("\nbench_route_lookup summary:\n"); |
| 248 | + std::printf(" (a) cache-hit median = %.3f ns/lookup (ceiling %.1f ns)\n", |
| 249 | + median_cache_ns, kCacheHitNsCeiling); |
| 250 | + std::printf(" (b) radix-8seg median = %.3f ns/lookup (ceiling %.0f ns " |
| 251 | + "= %.1f us)\n", |
| 252 | + median_radix_ns, kRadixNsCeiling, kRadixUsCeiling); |
| 253 | + |
| 254 | + int rc = 0; |
| 255 | + if (median_cache_ns > kCacheHitNsCeiling) { |
| 256 | + std::printf("FAIL: (a) cache-hit median %.3f ns exceeds ceiling " |
| 257 | + "%.1f ns\n", |
| 258 | + median_cache_ns, kCacheHitNsCeiling); |
| 259 | + rc = 1; |
| 260 | + } else { |
| 261 | + std::printf("PASS: (a) cache-hit within %.1f ns ceiling\n", |
| 262 | + kCacheHitNsCeiling); |
| 263 | + } |
| 264 | + if (median_radix_ns > kRadixNsCeiling) { |
| 265 | + std::printf("FAIL: (b) radix-8seg median %.3f ns exceeds ceiling " |
| 266 | + "%.0f ns (%.1f us)\n", |
| 267 | + median_radix_ns, kRadixNsCeiling, kRadixUsCeiling); |
| 268 | + rc = 1; |
| 269 | + } else { |
| 270 | + std::printf("PASS: (b) radix-8seg within %.1f us ceiling\n", |
| 271 | + kRadixUsCeiling); |
| 272 | + } |
| 273 | + return rc; |
| 274 | +} |
0 commit comments