From c30662302189e0ce1227a58c90f534dc9be3b3e4 Mon Sep 17 00:00:00 2001
From: Jonas Rembser <jonas.rembser@cern.ch>
Date: Wed, 12 Nov 2025 00:04:10 +0100
Subject: [PATCH] [RF] Generalize Garimas AD scaling study and add plotting
 script

---
 root/roofit/roofit/BenchmarkUtils.h           |  28 +--
 root/roofit/roofit/benchCodeSquashAD.cxx      |  61 +++++--
 .../roofit/benchCodeSquashAD_make_plot.py     | 159 ++++++++++++++++++
 3 files changed, 209 insertions(+), 39 deletions(-)
 create mode 100644 root/roofit/roofit/benchCodeSquashAD_make_plot.py
diff --git a/root/roofit/roofit/BenchmarkUtils.h b/root/roofit/roofit/BenchmarkUtils.h
index dae6c19b0..9b9a43b2b 100644
--- a/root/roofit/roofit/BenchmarkUtils.h
+++ b/root/roofit/roofit/BenchmarkUtils.h
@@ -4,7 +4,6 @@
 #include <RooArgSet.h>
 #include <RooRandom.h>
 #include <RooAbsRealLValue.h>
-#include <RooFuncWrapper.h>
 
 #include "benchmark/benchmark.h"
 
@@ -12,34 +11,11 @@
 
 namespace RooFitADBenchmarksUtils {
 
-enum backend { Reference, BatchMode, CodeSquashNumDiff, CodeSquashAD };
 template <typename F>
-void doBenchmarks(F func, int rangeMin, int rangeMax, int step = 1, int numIterations = 1,
+void doBenchmarks(const char *name, int backend, F func, std::vector<long int> const &range, int numIterations = 1,
                   benchmark::TimeUnit unit = benchmark::kMillisecond)
 {
-   // Run the minimization with the reference NLL
-   benchmark::RegisterBenchmark("NllReferenceMinimization", func)
-      ->ArgsProduct({{Reference}, benchmark::CreateDenseRange(rangeMin, rangeMax, step)})
-      ->Unit(unit)
-      ->Iterations(numIterations);
-
-   // Run the minimization with the reference NLL (BatchMode)
-   benchmark::RegisterBenchmark("NllBatchModeMinimization", func)
-      ->ArgsProduct({{BatchMode}, benchmark::CreateDenseRange(rangeMin, rangeMax, step)})
-      ->Unit(unit)
-      ->Iterations(numIterations);
-
-   // Run the minimization with the code-squashed version with numerical-diff.
-   benchmark::RegisterBenchmark("NllCodeSquash_NumDiff", func)
-      ->ArgsProduct({{CodeSquashNumDiff}, benchmark::CreateDenseRange(rangeMin, rangeMax, step)})
-      ->Unit(unit)
-      ->Iterations(numIterations);
-
-   // Run the minimization with the code-squashed version with AD.
-   benchmark::RegisterBenchmark("NllCodeSquash_AD", func)
-      ->ArgsProduct({{CodeSquashAD}, benchmark::CreateDenseRange(rangeMin, rangeMax, step)})
-      ->Unit(unit)
-      ->Iterations(numIterations);
+   benchmark::RegisterBenchmark(name, func)->ArgsProduct({{backend}, range})->Unit(unit)->Iterations(numIterations);
 }
 
 void randomizeParameters(const RooArgSet &parameters)
diff --git a/root/roofit/roofit/benchCodeSquashAD.cxx b/root/roofit/roofit/benchCodeSquashAD.cxx
index 0b5ca59c7..add80b245 100644
--- a/root/roofit/roofit/benchCodeSquashAD.cxx
+++ b/root/roofit/roofit/benchCodeSquashAD.cxx
@@ -62,15 +62,22 @@ std::unique_ptr<RooAbsPdf> createModel(RooRealVar &x, std::string const &channel
    return std::unique_ptr<RooAbsPdf>{static_cast<RooAbsPdf *>(model.cloneTree())};
 }
 
+enum backend {
+   Reference,
+   BatchMode,
+   CodeSquashNumDiff,
+   CodeSquashAD
+};
+
 static void BM_RooFuncWrapper_ManyParams_Minimization(benchmark::State &state)
 {
    using namespace RooFit;
 
    counter++;
 
-   gInterpreter->ProcessLine("gErrorIgnoreLevel = 2001;");
-   auto &msg = RooMsgService::instance();
-   msg.setGlobalKillBelow(RooFit::WARNING);
+   // gInterpreter->ProcessLine("gErrorIgnoreLevel = 2001;");
+   // auto &msg = RooMsgService::instance();
+   // msg.setGlobalKillBelow(RooFit::WARNING);
 
    // Generate the same dataset for all backends.
    RooRandom::randomGenerator()->SetSeed(100);
@@ -117,26 +124,28 @@ static void BM_RooFuncWrapper_ManyParams_Minimization(benchmark::State &state)
    RooArgSet origParams;
    params.snapshot(origParams);
 
-   std::unique_ptr<RooAbsReal> nllRef{model.createNLL(data, EvalBackend::Legacy(), Offset("off"))};
-   std::unique_ptr<RooAbsReal> nllRefBatch{model.createNLL(data, EvalBackend::Cpu(), Offset("off"))};
-   std::unique_ptr<RooAbsReal> nllFunc{model.createNLL(data, EvalBackend::Codegen(), Offset("off"))};
-   std::unique_ptr<RooAbsReal> nllFuncNoGrad{model.createNLL(data, EvalBackend::CodegenNoGrad(), Offset("off"))};
+   std::cout << "nparams: " << params.size() << std::endl;
+
+   std::unique_ptr<RooAbsReal> nllRef{model.createNLL(data, EvalBackend::Legacy(), Offset("initial"))};
+   std::unique_ptr<RooAbsReal> nllRefBatch{model.createNLL(data, EvalBackend::Cpu(), Offset("initial"))};
+   std::unique_ptr<RooAbsReal> nllFunc{model.createNLL(data, EvalBackend::Codegen(), Offset("initial"))};
+   std::unique_ptr<RooAbsReal> nllFuncNoGrad{model.createNLL(data, EvalBackend::CodegenNoGrad(), Offset("initial"))};
 
    std::unique_ptr<RooMinimizer> m = nullptr;
 
    int code = state.range(0);
-   if (code == RooFitADBenchmarksUtils::backend::Reference) {
+   if (code == backend::Reference) {
       m = std::make_unique<RooMinimizer>(*nllRef);
-   } else if (code == RooFitADBenchmarksUtils::backend::CodeSquashNumDiff) {
+   } else if (code == backend::CodeSquashNumDiff) {
       m = std::make_unique<RooMinimizer>(*nllFuncNoGrad);
-   } else if (code == RooFitADBenchmarksUtils::backend::BatchMode) {
+   } else if (code == backend::BatchMode) {
       m = std::make_unique<RooMinimizer>(*nllRefBatch);
-   } else if (code == RooFitADBenchmarksUtils::backend::CodeSquashAD) {
+   } else if (code == backend::CodeSquashAD) {
       m = std::make_unique<RooMinimizer>(*nllFunc);
    }
 
    for (auto _ : state) {
-      m->setPrintLevel(-1);
+      // m->setPrintLevel(-1);
       m->setStrategy(0);
       params.assign(origParams);
 
@@ -146,7 +155,33 @@ static void BM_RooFuncWrapper_ManyParams_Minimization(benchmark::State &state)
 
 int main(int argc, char **argv)
 {
-   RooFitADBenchmarksUtils::doBenchmarks(BM_RooFuncWrapper_ManyParams_Minimization, 2, 2, 1, 20);
+   std::vector<long int> rangeLow{1, 2, 3, 4, 5, 6, 7, 8, 12, 17, 23, 28, 33, 38, 43, 49, 54, 59};
+   std::vector<long int> rangeHigh{64,  70,  75,  80,  85,  90,  96,  100, 110, 120, 130, 140,
+                                   150, 160, 170, 180, 190, 200, 225, 250, 275, 300, 325};
+   std::vector<long int> range;
+   range.insert(range.end(), rangeLow.begin(), rangeLow.end());
+   range.insert(range.end(), rangeHigh.begin(), rangeHigh.end());
+
+   // Run the minimization with the reference NLL
+   RooFitADBenchmarksUtils::doBenchmarks("NllReferenceMinimization", Reference,
+                                         BM_RooFuncWrapper_ManyParams_Minimization, range, 1);
+
+   // Run the minimization with the reference NLL (BatchMode)
+   RooFitADBenchmarksUtils::doBenchmarks("NllBatchModeMinimization", BatchMode,
+                                         BM_RooFuncWrapper_ManyParams_Minimization, range, 1);
+
+   // Run the minimization with the code-squashed version with AD.
+   RooFitADBenchmarksUtils::doBenchmarks("NllCodeSquash_AD", CodeSquashAD, BM_RooFuncWrapper_ManyParams_Minimization,
+                                         range, 1);
+   // RooFitADBenchmarksUtils::doBenchmarks("NllCodeSquash_AD", CodeSquashAD,
+   // BM_RooFuncWrapper_ManyParams_Minimization, {250}, 1);
+
+   // Run the minimization with the code-squashed version with numerical-diff.
+   // We can't go to more than 59 channels here, because since offsetting is
+   // not supported, the fit will not converge anymore if the number of bins is
+   // sufficiently high.
+   RooFitADBenchmarksUtils::doBenchmarks("NllCodeSquash_NumDiff", CodeSquashNumDiff,
+                                         BM_RooFuncWrapper_ManyParams_Minimization, rangeLow, 1);
 
    benchmark::Initialize(&argc, argv);
    benchmark::RunSpecifiedBenchmarks();
diff --git a/root/roofit/roofit/benchCodeSquashAD_make_plot.py b/root/roofit/roofit/benchCodeSquashAD_make_plot.py
new file mode 100644
index 000000000..b960e1af0
--- /dev/null
+++ b/root/roofit/roofit/benchCodeSquashAD_make_plot.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+
+# Run on the output of benchCodeSquashAD piped into a file:
+# $ ./benchCodeSquashAD &> benchCodeSquashAD.out
+filename = "benchCodeSquashAD.out"
+
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+
+steps = {
+    "NLL creation": "Creation of NLL object took ",
+    "Function JIT": "Function JIT time: ",
+    "Gradient generation": "Gradient generation time: ",
+    "Gradient to machine code": "Gradient IR to machine code time: ",
+    "Seeding step": "MnSeedGenerator Evaluated function and gradient in ",
+    "NegativeG2LineSearch": "NegativeG2LineSearch Done after ",
+    "Minimization": "VariableMetricBuilder Stop iterating after ",
+    "Hesse": "MnHesse Done after ",
+    "Total": "user	",
+}
+
+
+def time_string_to_seconds(s):
+    s = s.replace("ms", "*1e-3")
+    s = s.replace("s", "")
+    s = s.replace("min", "*60+")
+    s = s.replace("m", "*60+")
+    s = s.replace("μ", "*1e-6")
+
+    return float(eval(s))
+
+
+with open(filename, "r") as f:
+    lines = f.read().split("\n")
+
+
+def is_relevant(line):
+    if "NegativeG2LineSearch" in line:
+        return False
+    if "iterations" in line:
+        return True
+    if "nparams" in line:
+        return True
+    if "FVAL  = " in line:
+        return True
+    for key, val in steps.items():
+        if val in line:
+            return True
+    return False
+
+
+lines = list(filter(is_relevant, lines))
+
+
+names = ["NllReferenceMinimization", "NllBatchModeMinimization", "NllCodeSquash_NumDiff", "NllCodeSquash_AD"]
+
+backends = {
+    "NllReferenceMinimization": "legacy",
+    "NllBatchModeMinimization": "RooFit",
+    "NllCodeSquash_NumDiff": "Hardcoded",
+    "NllCodeSquash_AD": "RooFit AD",
+}
+
+nll_line = {
+    "NllReferenceMinimization": 1,
+    "NllBatchModeMinimization": 2,
+    "NllCodeSquash_NumDiff": 3,
+    "NllCodeSquash_AD": 7,
+}
+
+
+def analyze_block(lines):
+    # print(lines)
+    data = dict()
+
+    name = lines[12].split("/")[0]
+
+    data["backend"] = backends[name]
+
+    def line_to_seconds(l):
+        l = l.replace(" min ", "min")
+        return time_string_to_seconds(" ".join(l.split(" ")[-2:]))
+
+    data["create_nll"] = line_to_seconds(lines[nll_line[name]])
+    data["seeding"] = line_to_seconds(lines[9])
+    data["migrad"] = line_to_seconds(lines[10])
+    data["nparams"] = int(lines[0].split(" ")[-1]) - 1
+    data["fval"] = float(lines[11].split(" ")[-1])
+
+    data["jitting"] = sum(line_to_seconds(lines[i]) for i in [4, 5, 6])
+
+    return data
+
+
+m_block = 13
+
+
+datas = []
+i = 0
+while i < len(lines):
+    datas.append(analyze_block(lines[i : i + m_block]))
+    i += m_block
+
+
+df = pd.DataFrame(datas)
+df = df.query("nparams > 10 and nparams < 2500 and backend != 'legacy'")
+
+
+t = np.arange(0.01, 5.0, 0.01)
+s1 = np.sin(2 * np.pi * t)
+s2 = np.exp(-t)
+s3 = np.sin(4 * np.pi * t)
+
+f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [2, 1]})
+
+dfs = dict()
+
+
+for backend, df_g in df.groupby("backend"):
+    dfs[backend] = df_g
+    nparams = df_g["nparams"]
+    vals = df_g.eval("migrad + seeding")
+    # vals = df_g.eval("fval")
+    if backend == "codegen_no_grad":
+        continue
+    if backend == "RooFit AD":
+        a0.plot(nparams, vals + 0 * df_g["jitting"], label=backend)
+        a0.plot(nparams, df_g["jitting"], label="JIT time", color="k", linewidth=1, linestyle="--")
+    else:
+        a0.plot(nparams, vals, label=backend)
+        # plt.plot(nparams, df_g["jitting"], label=backend + "_jit")
+
+nparams = dfs["RooFit"]["nparams"]
+
+a0.legend(loc="upper left")
+
+plt.tick_params("x", labelsize=6)
+
+# make these tick labels invisible
+# plt.tick_params('x', labelbottom=False)
+vals = dfs["RooFit"]["migrad"].values / dfs["RooFit AD"]["migrad"].values
+
+a1.plot(nparams, vals, color="k")
+
+a1.set_xlabel("Number of parameters")
+a0.set_ylabel("Minimization time [s]")
+a0.set_ylim(0, 30)
+a0.set_xlim(0, nparams.to_numpy()[-1])
+
+a1.set_ylabel("AD speedup")
+a1.set_ylim(1, 4.2)
+a1.set_xlim(0, nparams.to_numpy()[-1])
+
+plt.savefig("scaling_study.png")