From c30662302189e0ce1227a58c90f534dc9be3b3e4 Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Wed, 12 Nov 2025 00:04:10 +0100 Subject: [PATCH] [RF] Generalize Garimas AD scaling study and add plotting script --- root/roofit/roofit/BenchmarkUtils.h | 28 +-- root/roofit/roofit/benchCodeSquashAD.cxx | 61 +++++-- .../roofit/benchCodeSquashAD_make_plot.py | 159 ++++++++++++++++++ 3 files changed, 209 insertions(+), 39 deletions(-) create mode 100644 root/roofit/roofit/benchCodeSquashAD_make_plot.py diff --git a/root/roofit/roofit/BenchmarkUtils.h b/root/roofit/roofit/BenchmarkUtils.h index dae6c19b0..9b9a43b2b 100644 --- a/root/roofit/roofit/BenchmarkUtils.h +++ b/root/roofit/roofit/BenchmarkUtils.h @@ -4,7 +4,6 @@ #include #include #include -#include #include "benchmark/benchmark.h" @@ -12,34 +11,11 @@ namespace RooFitADBenchmarksUtils { -enum backend { Reference, BatchMode, CodeSquashNumDiff, CodeSquashAD }; template -void doBenchmarks(F func, int rangeMin, int rangeMax, int step = 1, int numIterations = 1, +void doBenchmarks(const char *name, int backend, F func, std::vector const &range, int numIterations = 1, benchmark::TimeUnit unit = benchmark::kMillisecond) { - // Run the minimization with the reference NLL - benchmark::RegisterBenchmark("NllReferenceMinimization", func) - ->ArgsProduct({{Reference}, benchmark::CreateDenseRange(rangeMin, rangeMax, step)}) - ->Unit(unit) - ->Iterations(numIterations); - - // Run the minimization with the reference NLL (BatchMode) - benchmark::RegisterBenchmark("NllBatchModeMinimization", func) - ->ArgsProduct({{BatchMode}, benchmark::CreateDenseRange(rangeMin, rangeMax, step)}) - ->Unit(unit) - ->Iterations(numIterations); - - // Run the minimization with the code-squashed version with numerical-diff. - benchmark::RegisterBenchmark("NllCodeSquash_NumDiff", func) - ->ArgsProduct({{CodeSquashNumDiff}, benchmark::CreateDenseRange(rangeMin, rangeMax, step)}) - ->Unit(unit) - ->Iterations(numIterations); - - // Run the minimization with the code-squashed version with AD. - benchmark::RegisterBenchmark("NllCodeSquash_AD", func) - ->ArgsProduct({{CodeSquashAD}, benchmark::CreateDenseRange(rangeMin, rangeMax, step)}) - ->Unit(unit) - ->Iterations(numIterations); + benchmark::RegisterBenchmark(name, func)->ArgsProduct({{backend}, range})->Unit(unit)->Iterations(numIterations); } void randomizeParameters(const RooArgSet ¶meters) diff --git a/root/roofit/roofit/benchCodeSquashAD.cxx b/root/roofit/roofit/benchCodeSquashAD.cxx index 0b5ca59c7..add80b245 100644 --- a/root/roofit/roofit/benchCodeSquashAD.cxx +++ b/root/roofit/roofit/benchCodeSquashAD.cxx @@ -62,15 +62,22 @@ std::unique_ptr createModel(RooRealVar &x, std::string const &channel return std::unique_ptr{static_cast(model.cloneTree())}; } +enum backend { + Reference, + BatchMode, + CodeSquashNumDiff, + CodeSquashAD +}; + static void BM_RooFuncWrapper_ManyParams_Minimization(benchmark::State &state) { using namespace RooFit; counter++; - gInterpreter->ProcessLine("gErrorIgnoreLevel = 2001;"); - auto &msg = RooMsgService::instance(); - msg.setGlobalKillBelow(RooFit::WARNING); + // gInterpreter->ProcessLine("gErrorIgnoreLevel = 2001;"); + // auto &msg = RooMsgService::instance(); + // msg.setGlobalKillBelow(RooFit::WARNING); // Generate the same dataset for all backends. RooRandom::randomGenerator()->SetSeed(100); @@ -117,26 +124,28 @@ static void BM_RooFuncWrapper_ManyParams_Minimization(benchmark::State &state) RooArgSet origParams; params.snapshot(origParams); - std::unique_ptr nllRef{model.createNLL(data, EvalBackend::Legacy(), Offset("off"))}; - std::unique_ptr nllRefBatch{model.createNLL(data, EvalBackend::Cpu(), Offset("off"))}; - std::unique_ptr nllFunc{model.createNLL(data, EvalBackend::Codegen(), Offset("off"))}; - std::unique_ptr nllFuncNoGrad{model.createNLL(data, EvalBackend::CodegenNoGrad(), Offset("off"))}; + std::cout << "nparams: " << params.size() << std::endl; + + std::unique_ptr nllRef{model.createNLL(data, EvalBackend::Legacy(), Offset("initial"))}; + std::unique_ptr nllRefBatch{model.createNLL(data, EvalBackend::Cpu(), Offset("initial"))}; + std::unique_ptr nllFunc{model.createNLL(data, EvalBackend::Codegen(), Offset("initial"))}; + std::unique_ptr nllFuncNoGrad{model.createNLL(data, EvalBackend::CodegenNoGrad(), Offset("initial"))}; std::unique_ptr m = nullptr; int code = state.range(0); - if (code == RooFitADBenchmarksUtils::backend::Reference) { + if (code == backend::Reference) { m = std::make_unique(*nllRef); - } else if (code == RooFitADBenchmarksUtils::backend::CodeSquashNumDiff) { + } else if (code == backend::CodeSquashNumDiff) { m = std::make_unique(*nllFuncNoGrad); - } else if (code == RooFitADBenchmarksUtils::backend::BatchMode) { + } else if (code == backend::BatchMode) { m = std::make_unique(*nllRefBatch); - } else if (code == RooFitADBenchmarksUtils::backend::CodeSquashAD) { + } else if (code == backend::CodeSquashAD) { m = std::make_unique(*nllFunc); } for (auto _ : state) { - m->setPrintLevel(-1); + // m->setPrintLevel(-1); m->setStrategy(0); params.assign(origParams); @@ -146,7 +155,33 @@ static void BM_RooFuncWrapper_ManyParams_Minimization(benchmark::State &state) int main(int argc, char **argv) { - RooFitADBenchmarksUtils::doBenchmarks(BM_RooFuncWrapper_ManyParams_Minimization, 2, 2, 1, 20); + std::vector rangeLow{1, 2, 3, 4, 5, 6, 7, 8, 12, 17, 23, 28, 33, 38, 43, 49, 54, 59}; + std::vector rangeHigh{64, 70, 75, 80, 85, 90, 96, 100, 110, 120, 130, 140, + 150, 160, 170, 180, 190, 200, 225, 250, 275, 300, 325}; + std::vector range; + range.insert(range.end(), rangeLow.begin(), rangeLow.end()); + range.insert(range.end(), rangeHigh.begin(), rangeHigh.end()); + + // Run the minimization with the reference NLL + RooFitADBenchmarksUtils::doBenchmarks("NllReferenceMinimization", Reference, + BM_RooFuncWrapper_ManyParams_Minimization, range, 1); + + // Run the minimization with the reference NLL (BatchMode) + RooFitADBenchmarksUtils::doBenchmarks("NllBatchModeMinimization", BatchMode, + BM_RooFuncWrapper_ManyParams_Minimization, range, 1); + + // Run the minimization with the code-squashed version with AD. + RooFitADBenchmarksUtils::doBenchmarks("NllCodeSquash_AD", CodeSquashAD, BM_RooFuncWrapper_ManyParams_Minimization, + range, 1); + // RooFitADBenchmarksUtils::doBenchmarks("NllCodeSquash_AD", CodeSquashAD, + // BM_RooFuncWrapper_ManyParams_Minimization, {250}, 1); + + // Run the minimization with the code-squashed version with numerical-diff. + // We can't go to more than 59 channels here, because since offsetting is + // not supported, the fit will not converge anymore if the number of bins is + // sufficiently high. + RooFitADBenchmarksUtils::doBenchmarks("NllCodeSquash_NumDiff", CodeSquashNumDiff, + BM_RooFuncWrapper_ManyParams_Minimization, rangeLow, 1); benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); diff --git a/root/roofit/roofit/benchCodeSquashAD_make_plot.py b/root/roofit/roofit/benchCodeSquashAD_make_plot.py new file mode 100644 index 000000000..b960e1af0 --- /dev/null +++ b/root/roofit/roofit/benchCodeSquashAD_make_plot.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python +# coding: utf-8 + + +# Run on the output of benchCodeSquashAD piped into a file: +# $ ./benchCodeSquashAD &> benchCodeSquashAD.out +filename = "benchCodeSquashAD.out" + + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + + +steps = { + "NLL creation": "Creation of NLL object took ", + "Function JIT": "Function JIT time: ", + "Gradient generation": "Gradient generation time: ", + "Gradient to machine code": "Gradient IR to machine code time: ", + "Seeding step": "MnSeedGenerator Evaluated function and gradient in ", + "NegativeG2LineSearch": "NegativeG2LineSearch Done after ", + "Minimization": "VariableMetricBuilder Stop iterating after ", + "Hesse": "MnHesse Done after ", + "Total": "user ", +} + + +def time_string_to_seconds(s): + s = s.replace("ms", "*1e-3") + s = s.replace("s", "") + s = s.replace("min", "*60+") + s = s.replace("m", "*60+") + s = s.replace("μ", "*1e-6") + + return float(eval(s)) + + +with open(filename, "r") as f: + lines = f.read().split("\n") + + +def is_relevant(line): + if "NegativeG2LineSearch" in line: + return False + if "iterations" in line: + return True + if "nparams" in line: + return True + if "FVAL = " in line: + return True + for key, val in steps.items(): + if val in line: + return True + return False + + +lines = list(filter(is_relevant, lines)) + + +names = ["NllReferenceMinimization", "NllBatchModeMinimization", "NllCodeSquash_NumDiff", "NllCodeSquash_AD"] + +backends = { + "NllReferenceMinimization": "legacy", + "NllBatchModeMinimization": "RooFit", + "NllCodeSquash_NumDiff": "Hardcoded", + "NllCodeSquash_AD": "RooFit AD", +} + +nll_line = { + "NllReferenceMinimization": 1, + "NllBatchModeMinimization": 2, + "NllCodeSquash_NumDiff": 3, + "NllCodeSquash_AD": 7, +} + + +def analyze_block(lines): + # print(lines) + data = dict() + + name = lines[12].split("/")[0] + + data["backend"] = backends[name] + + def line_to_seconds(l): + l = l.replace(" min ", "min") + return time_string_to_seconds(" ".join(l.split(" ")[-2:])) + + data["create_nll"] = line_to_seconds(lines[nll_line[name]]) + data["seeding"] = line_to_seconds(lines[9]) + data["migrad"] = line_to_seconds(lines[10]) + data["nparams"] = int(lines[0].split(" ")[-1]) - 1 + data["fval"] = float(lines[11].split(" ")[-1]) + + data["jitting"] = sum(line_to_seconds(lines[i]) for i in [4, 5, 6]) + + return data + + +m_block = 13 + + +datas = [] +i = 0 +while i < len(lines): + datas.append(analyze_block(lines[i : i + m_block])) + i += m_block + + +df = pd.DataFrame(datas) +df = df.query("nparams > 10 and nparams < 2500 and backend != 'legacy'") + + +t = np.arange(0.01, 5.0, 0.01) +s1 = np.sin(2 * np.pi * t) +s2 = np.exp(-t) +s3 = np.sin(4 * np.pi * t) + +f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [2, 1]}) + +dfs = dict() + + +for backend, df_g in df.groupby("backend"): + dfs[backend] = df_g + nparams = df_g["nparams"] + vals = df_g.eval("migrad + seeding") + # vals = df_g.eval("fval") + if backend == "codegen_no_grad": + continue + if backend == "RooFit AD": + a0.plot(nparams, vals + 0 * df_g["jitting"], label=backend) + a0.plot(nparams, df_g["jitting"], label="JIT time", color="k", linewidth=1, linestyle="--") + else: + a0.plot(nparams, vals, label=backend) + # plt.plot(nparams, df_g["jitting"], label=backend + "_jit") + +nparams = dfs["RooFit"]["nparams"] + +a0.legend(loc="upper left") + +plt.tick_params("x", labelsize=6) + +# make these tick labels invisible +# plt.tick_params('x', labelbottom=False) +vals = dfs["RooFit"]["migrad"].values / dfs["RooFit AD"]["migrad"].values + +a1.plot(nparams, vals, color="k") + +a1.set_xlabel("Number of parameters") +a0.set_ylabel("Minimization time [s]") +a0.set_ylim(0, 30) +a0.set_xlim(0, nparams.to_numpy()[-1]) + +a1.set_ylabel("AD speedup") +a1.set_ylim(1, 4.2) +a1.set_xlim(0, nparams.to_numpy()[-1]) + +plt.savefig("scaling_study.png")