From 0ed8d4d1f71754f193323025b225c9cfc121e6f4 Mon Sep 17 00:00:00 2001
From: Sam Avis <avissam96@gmail.com>
Date: Fri, 6 Jun 2025 18:07:36 +0100
Subject: [PATCH 1/4] Added pytest-profiling dep. Created unit test folder

---
 pyproject.toml                          | 1 +
 tests/{ => unit}/conftest.py            | 0
 tests/{ => unit}/test_grid.py           | 0
 tests/{ => unit}/test_initialization.py | 0
 tests/{ => unit}/test_placeholder.py    | 0
 5 files changed, 1 insertion(+)
 rename tests/{ => unit}/conftest.py (100%)
 rename tests/{ => unit}/test_grid.py (100%)
 rename tests/{ => unit}/test_initialization.py (100%)
 rename tests/{ => unit}/test_placeholder.py (100%)

diff --git a/pyproject.toml b/pyproject.toml
index 3c8646a..e90a848 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,7 @@ dev = [
     "pre-commit>=4.2.0",
     "ruff>=0.11.4",
     "pytest>=7.0",
+    "pytest-profiling",
 ]
 
 [tool.pytest.ini_options]
diff --git a/tests/conftest.py b/tests/unit/conftest.py
similarity index 100%
rename from tests/conftest.py
rename to tests/unit/conftest.py
diff --git a/tests/test_grid.py b/tests/unit/test_grid.py
similarity index 100%
rename from tests/test_grid.py
rename to tests/unit/test_grid.py
diff --git a/tests/test_initialization.py b/tests/unit/test_initialization.py
similarity index 100%
rename from tests/test_initialization.py
rename to tests/unit/test_initialization.py
diff --git a/tests/test_placeholder.py b/tests/unit/test_placeholder.py
similarity index 100%
rename from tests/test_placeholder.py
rename to tests/unit/test_placeholder.py

From 14b5af5724a852a7abe0f1fd64cdf8e91860580e Mon Sep 17 00:00:00 2001
From: Sam Avis <avissam96@gmail.com>
Date: Mon, 9 Jun 2025 13:53:42 +0100
Subject: [PATCH 2/4] Add profiling test

---
 pyproject.toml                    |  3 ++-
 tests/{unit => }/conftest.py      |  0
 tests/profiling/test_profiling.py | 18 ++++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)
 rename tests/{unit => }/conftest.py (100%)
 create mode 100644 tests/profiling/test_profiling.py

diff --git a/pyproject.toml b/pyproject.toml
index e90a848..85bd93d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,9 +15,10 @@ dev = [
     "ruff>=0.11.4",
     "pytest>=7.0",
     "pytest-profiling",
+    "flameprof",
 ]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = "test_*.py"
-addopts = "-v"
+addopts = "-v -m 'not profiling'"
diff --git a/tests/unit/conftest.py b/tests/conftest.py
similarity index 100%
rename from tests/unit/conftest.py
rename to tests/conftest.py
diff --git a/tests/profiling/test_profiling.py b/tests/profiling/test_profiling.py
new file mode 100644
index 0000000..5a00973
--- /dev/null
+++ b/tests/profiling/test_profiling.py
@@ -0,0 +1,18 @@
+"""Test(s) to use with pytest-profiling to identify bottlenecks in the code."""
+import pytest
+
+@pytest.mark.profiling
+def test_profiling(monkeypatch):
+
+    # Define the command line arguments
+    import sys
+    argv = ['dementpy.py', 'grassland', 'output', '20250402', 'scrubland']
+    monkeypatch.setattr(sys, 'argv', argv)
+
+    # Move to subfolder so input and output folders will be correct
+    import os
+    os.chdir('src')
+
+    # Run dementpy
+    import dementpy
+    dementpy.main()

From 10731c005e8ca846008ee7a7833a51c85139724c Mon Sep 17 00:00:00 2001
From: Sam Avis <avissam96@gmail.com>
Date: Mon, 9 Jun 2025 14:13:29 +0100
Subject: [PATCH 3/4] Prevent dementpy from running automatically on import

---
 src/dementpy.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/dementpy.py b/src/dementpy.py
index c04f8b6..daa0148 100644
--- a/src/dementpy.py
+++ b/src/dementpy.py
@@ -103,4 +103,5 @@ def main():
     os.chdir('../'+output_folder)
     export(Output_init, site, outname)
     
-main()
\ No newline at end of file
+if __name__ == '__main__':
+    main()

From 905c2f8c491387f33f9bcbfa972df5c52d4a9dbc Mon Sep 17 00:00:00 2001
From: "M. A. Kowalski" <mak60@cam.ac.uk>
Date: Mon, 16 Jun 2025 13:16:35 +0100
Subject: [PATCH 4/4] test: script to track performance regression

Adapted from pyrealm. Allows to compare two profiled runs of the code
and highlights which functions performance dropped below a threshold.

Note that at the memonet DEMENTpy exhibits high variance in runtime
of individual functions (observed up to 15%) so the check with default
settings (5% tolerance) will likley fail for the same code versions.
---
 profiling/run_benchmarking.py | 594 ++++++++++++++++++++++++++++++++++
 1 file changed, 594 insertions(+)
 create mode 100644 profiling/run_benchmarking.py

diff --git a/profiling/run_benchmarking.py b/profiling/run_benchmarking.py
new file mode 100644
index 0000000..b9d311d
--- /dev/null
+++ b/profiling/run_benchmarking.py
@@ -0,0 +1,594 @@
+"""Run profile benchmarking and generate benchmarking graphics.
+
+Shamelessly adapted from `pyrealm <https://github.com/ImperialCollegeLondon/pyrealm>`_
+with some modifications:
+  - Removed hardcoded `pyrealm`-specific things (paths, docs etc.)
+  - Added Classes to AST map (turns out they can show up in profile)
+  - Refactored reading of `pstats` data. Use `stats` dictionary of `pstats.Stats`
+    directly to avoid problems with mis-formatted table and retain more precision
+
+Original licence
+----------------
+
+MIT License
+
+Copyright (c) 2023 David Orme
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+
+import ast
+import datetime
+import pstats
+import sys
+import textwrap
+from argparse import (
+    ArgumentDefaultsHelpFormatter,
+    ArgumentParser,
+    RawDescriptionHelpFormatter,
+)
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+
+def get_function_map(root: Path) -> dict[tuple[str, int, str], str]:
+    """Create an AST function map.
+
+    This function uses the AST parser to scan the codebase and identify all of
+    the function definitions that might be referenced within profiling stats. The AST
+    nodes are then used to build a dictionary that maps call information from profiling
+    - which uses line numbers to identify calls within source files - onto call ids from
+    the package AST structure, will be more stable across versions.
+
+    The dictionary is keyed using (source file name, line number, call name) and the
+    values are strings that identify functions within source files and methods within
+    classes within source files.
+
+    One added difficulty is that the line numbering differs between the two approaches.
+    The line numbers reported by `ast` nodes are the _actual_ line where the `def` or
+    `class` statement is found. The line numbers reported by `pstats` are offset if the
+    callable has decorators, so this mapping function has to find the lowest line number
+    of the function node and any decorator nodes.
+
+    Warning:
+        In order to separate class methods from functions, this function relies on
+        `ast.walk` walking `ast.ClassDef` nodes before `ast.FunctionDef` nodes and then
+        adds a `class_name` attribute to functions in the class body. That attribute can
+        then be checked when the `ast.FunctionDef` node is revisited later in the walk.
+        This is a bit of a hack.
+
+    Returns:
+        A dictionary mapping profile call ids to AST based call id.
+    """
+
+    ast_map = {}
+
+    # Visit all the python modules below the root and add FunctionDef details to the
+    # ast_map list. This relies on a hack - ClassDef are walked before FunctionDef and
+    # so methods can be discovered and annotated with the class_name attribute before
+    # they are added to the ast_map. https://stackoverflow.com/questions/64225210
+    for src_file in root.rglob("*.py"):
+        parsed_ast = ast.parse(src_file.read_text())
+
+        for node in ast.walk(parsed_ast):
+            if isinstance(node, ast.FunctionDef):
+                # Find the line number used by profiling, which includes any decorators
+                lineno = min([d.lineno for d in [*node.decorator_list, node]])  # type: ignore [attr-defined]
+
+                if hasattr(node, "class_name"):
+                    ast_map[(str(src_file), lineno, node.name)] = (
+                        f"{src_file}:{node.class_name}"  # type: ignore [attr-defined]
+                        f".{node.name}"
+                    )
+                else:
+                    ast_map[(str(src_file), lineno, node.name)] = (
+                        f"{src_file}:{node.name}"
+                    )
+            if isinstance(node, ast.ClassDef):
+                # It may happen that the class initialisation will be long enough to
+                # show in the profile! We need to keep them in the AST!
+                lineno = min([d.lineno for d in [*node.decorator_list, node]])  # type: ignore [attr-defined]
+                ast_map[(str(src_file), lineno, node.name)] = f"{src_file}:{node.name}"
+                for child in node.body:
+                    if isinstance(child, ast.FunctionDef):
+                        child.class_name = node.name  # type: ignore [attr-defined]
+
+    return ast_map
+
+
+def func_name_to_str(file_line_name: tuple[str, int, str]) -> str:
+    """Write the `print_stats` formatted function name.
+
+    Args:
+       file_line_name: `pstats.Stats.stats` function info key. `file` is '~' and `line` '0'
+         when function is build-in.
+
+    Returns:
+      Function name in the `pstats.Stats.print_stats` format.
+      Follows `pstats.func_std_string` method
+    """
+    file, line_number, name = file_line_name
+    if (file, line_number) == ("~", 0):
+        if name.startswith("<") and name.endswith(">"):
+            return f"{{{name[1:-1]}}}"
+        else:
+            return name
+    else:
+        return f"{file}:{line_number}({name})"
+
+
+def read_profiling_stats(prof_path: Path) -> pd.DataFrame:
+    """Put the profiling data into a DataFrame.
+
+    The format of the DataFrame mimics pstats.Stats.print_stats().
+    It will have the following format:
+
+    | ncalls | tottime | tottime_percall | cumtime | cumtime_percall | filename:lineno(function) |
+    |--------|---------|-----------------|---------|-----------------|---------------------------|
+    |  ...   |  ...    |       ...       |   ...   |       ...       |           ...             |
+
+    Note
+    ----
+    The times per call are calculated per "external" call for recursive functions.
+    Same as in `print_stats` output.
+    """
+    stats_dict = pstats.Stats(str(prof_path)).stats  # type: ignore [attr-defined]
+
+    column_names = [
+        "ncalls",
+        "tottime",
+        "tottime_percall",
+        "cumtime",
+        "cumtime_percall",
+        "filename:lineno(function)",
+    ]
+
+    data = list()
+    for key, item in stats_dict.items():
+        # We drop the links to the caller info
+        external_calls, total_calls, total_time, cumulative_time, _ = item
+        name = func_name_to_str(key)
+        data.append(
+            (
+                f"{total_calls}"
+                if external_calls == total_calls
+                else f"{external_calls}/{total_calls}",
+                total_time,
+                total_time / external_calls,
+                cumulative_time,
+                cumulative_time / external_calls,
+                name,
+            )
+        )
+    return pd.DataFrame(data, columns=column_names)
+
+
+def convert_and_filter_prof_file(
+    prof_path: Path,
+    label: str,
+    root_dir: Path,
+    exclude: list[str] = ["{.*}", "<.*>", "/lib/", "/tests/"],
+) -> pd.DataFrame:
+    """Convert profiling output to a standard data frame.
+
+    The function reads the contents of a ``.prof`` file (typically
+    ``prof/combined.prof``) generated by running the profiling test suite and returns
+    the profiling data as a standardised `pandas.DataFrame`.
+
+    The profiling results include a field 'filename:lineno(function)', which identifies
+    each profiled code object. Many of these will be from functions outside of the
+    codebase and these are excluded using a list of regex patterns:
+
+    * '/lib/' excludes standard and site packages,
+    * '{.*}' excludes profiling of '{built-in ... } and similar,
+    * '<.*>' excludes profiling of '<frozen ...>` and similar.
+    * '/tests/' excludes the test functions and classes calling the code.
+
+    The remaining rows should show the performance of just the codebase.
+
+    Args:
+        prof_path: Path to the profiling output.
+        label: A string used to label a particular profiling run, typically a commit SHA
+        root_dir: Path to the directory with the package sources (e.g. `src`, `mypackage`)
+        exclude: A list of patterns used to exclude rows from the profiling stats.
+    """
+
+    df = read_profiling_stats(prof_path)
+
+    # Reduce to rows not matching any of the regex exclude patterns
+    exclude_rows = pd.DataFrame(
+        [df["filename:lineno(function)"].str.contains(excl) for excl in exclude]
+    ).any()
+    df = df[~exclude_rows]
+
+    # Add a timestamp from the file creation date
+    m_time = datetime.datetime.fromtimestamp(prof_path.stat().st_mtime)
+    df["timestamp"] = m_time.isoformat(timespec="seconds")
+
+    # Split the filename, lineno, and function
+    df[["filename", "lineno", "function"]] = df.pop(
+        "filename:lineno(function)"
+    ).str.extract(r"(.*):(.*?)\((.*)\)", expand=True)
+
+    # Make all source paths relative from parent of root_directory
+    # i.e. root_dir/subfolder/file.py
+    df["filename"] = df["filename"].str.replace(
+        str(root_dir.resolve()) + "/", str(root_dir) + "/"
+    )
+
+    # Map the profiling data onto the AST structure for the package
+    ast_map = get_function_map(root_dir)
+
+    # Use the profile data to look up the AST callable id
+    df["process_id"] = df.apply(
+        lambda rw: ast_map[(rw.filename, int(rw.lineno), rw.function)], axis=1
+    )
+
+    # Add the provided git commit SHA for information
+    df["label"] = label
+
+    # Add fields to ignore particular benchmarks and allow regression
+    df["ignore_result"] = False
+    df["ignore_justification"] = ""
+
+    return df
+
+
+def run_benchmark(
+    incoming: pd.DataFrame,
+    database_path: Path,
+    fail_data_path: Path,
+    plot_path: Path | None = None,
+    tolerance: float = 0.05,
+    n_runs: int = 5,
+    update_on_pass: bool = False,
+) -> bool:
+    """Run benchmark checks on profiling results.
+
+    This function takes new profiling results and benchmarks key performance indicators
+    against the best results from the N most recent runs in the profiling database. The
+    incoming data must have all performance indicators within the given tolerance of the
+    fastest of the past N runs for all functionality that is present in both performance
+    datasets.
+
+    If any functionality fails the performance testing, the failing functions are
+    written to data file along with the observed ('incoming') and target timings. If the
+    performance benchmarking passes, the database can optionally be updated to include
+    the incoming performance data. This is typically only be used when a pull request is
+    merged into the develop or main branches, to extend the test database ready for the
+    next benchmarking run.
+
+    Args:
+        incoming: A data frame of incoming profiling data.
+        database_path: The path to the database of previous profiling data
+        fail_data_path: A path to write out data on functions that fail profiling
+        plot_path: An optional path for creating a benchmarking plot.
+        tolerance: Fractional acceptable change in performance
+        n_runs: The number of most recent runs to use
+        update_on_pass: Should the incoming data be added to the database.
+        new_database: Should the incoming data be used to create a new profiling
+            database file.
+    """
+
+    if not database_path.exists():
+        print(f"Creating new database: {database_path}")
+        incoming.to_csv(database_path, index=False)
+        return True
+
+    # Read database and reduce to most recent n runs.
+    database = pd.read_csv(database_path)
+    n_recent_timestamps = sorted(database["timestamp"].unique())[-n_runs:]
+    database = database.loc[database["timestamp"].isin(n_recent_timestamps)]
+
+    # Find the best (minimum) previous indicator values for each label from those runs
+    # in the database that have not explicitly been ignored.  Note that min() here takes
+    # the minimum value for each column within the group.
+    kpis = ["tottime_percall", "cumtime_percall", "tottime", "cumtime"]
+    targets = (
+        database.loc[~database["ignore_result"], ["process_id", *kpis]]
+        .groupby("process_id")
+        .min()
+    )
+    targets.columns = targets.columns + "_target"
+
+    # Combine the incoming and database profiling and merge with targets on label to get
+    # the observed and target values for each function
+    incoming_sha = incoming["label"].unique()[0]
+    combined = pd.concat([database, incoming])
+    combined["is_incoming"] = combined["label"] == incoming_sha
+    combined = combined.merge(targets, how="left", on="process_id")
+
+    # Calculate the relative KPIs for each kpi
+    # TODO - handle zeros
+    for kpi in kpis:
+        combined["relative_" + kpi] = combined[kpi] / combined[kpi + "_target"]
+
+    # Find rows where performance change of incoming profiling is outside tolerance
+    threshold = 1 + tolerance
+    kpi_fail = [combined[f"relative_{kpi}"] > threshold for kpi in kpis]
+    failing_rows = combined[pd.DataFrame(kpi_fail).any() & combined["is_incoming"]]
+
+    # Plot before handling failure
+    if plot_path is not None:
+        create_benchmark_plot(
+            plot_path=plot_path, combined=combined, threshold=threshold
+        )
+
+    # If any rows fail then save out the rows for all results on failing labels and
+    # return False
+    if not failing_rows.empty:
+        failure_info = combined[combined["process_id"].isin(failing_rows["process_id"])]
+        failure_info.to_csv(fail_data_path, index=False)
+        return False
+
+    # Should the
+    if update_on_pass:
+        # Drop the fields added by this function and save the updated combined data
+        combined.drop(
+            columns=["is_incoming"]
+            + [f"relative_{kpi}" for kpi in kpis]
+            + [f"{kpi}_target" for kpi in kpis],
+            inplace=True,
+        )
+        combined.to_csv(database_path, index=False)
+
+    return True
+
+
+def create_benchmark_plot(
+    plot_path: Path,
+    combined: pd.DataFrame,
+    threshold: float = 1.05,
+    kpi: str = "cumtime_percall",
+) -> None:
+    """Plot the benchmarking results.
+
+    This function generates a PNG plot that plots the relative performance of each
+    labelled code object. Performance is always relative to the fastest performance
+    found in previous profiling runs and this is shown in square brackets alongside the
+    label.
+
+    The incoming performance is shown as circles and previous versions are shown as
+    numbers. A vertical line indicates the threshold for failing the benchmarking.
+    Previous values that have been explicitly set as `ignore_result` are shown in light
+    grey for information.
+
+    Args:
+        plot_path: An output path for the plot
+        combined: The combined profiling data to be plotted
+        threshold: The upper threshold to pass benchmarking
+        kpi: A string specifying the performance metric to plot
+    """
+
+    # Sort labels to display in order from most time consuming to least and split into
+    # previous and incoming values.
+    combined = combined.sort_values(by=f"{kpi}_target")
+    incoming = combined[combined["is_incoming"]]
+    database = combined[~combined["is_incoming"]]
+
+    # Get the code version labels for the previous and incoming versions
+    previous_versions = (
+        database[["timestamp", "label"]]
+        .drop_duplicates()
+        .sort_values(by="timestamp", ascending=False)
+    )
+    incoming_version = incoming["label"].unique()[0]
+
+    # A4 portrait
+    plt.figure(figsize=(11.69, 8.27))
+
+    # Plot each previous version using text 1...n as plot symbols, with lighter grey for
+    # values that have been set as ignore_result.
+    for idx, label in enumerate(previous_versions["label"]):
+        subset = combined[combined["label"] == label]
+
+        plt.scatter(
+            subset[f"relative_{kpi}"],
+            subset["process_id"]
+            + subset[f"{kpi}_target"].map(lambda x: f" [{x:0.3f}]"),
+            marker=f"${idx + 1}$",
+            color=[
+                "lightgray" if val else "dimgrey" for val in subset["ignore_result"]
+            ],
+            label=label,
+        )
+
+    # Plot the incoming data as open circles, blue points when inside tolerance and red
+    # if outside tolerance.
+    fail = [True if val > threshold else False for val in incoming[f"relative_{kpi}"]]
+    plt.scatter(
+        incoming[f"relative_{kpi}"],
+        incoming["process_id"]
+        + incoming[f"{kpi}_target"].map(lambda x: f" [{x:0.3f}]"),
+        marker="o",
+        facecolors="none",
+        color=["firebrick" if val else "royalblue" for val in fail],
+        label=f"{incoming_version} [incoming]",
+    )
+
+    # Add a vertical line for the threshold value and a legend
+    plt.axvline(threshold, linestyle="--", color="indianred", linewidth=0.3)
+    plt.legend(loc="lower right", bbox_to_anchor=(1.0, 1.05), ncol=6, frameon=False)
+    plt.xlabel(f"Relative {kpi}")
+    plt.tight_layout()
+
+    # Colour the labels of failing rows.
+    labels = plt.gca().get_yticklabels()
+    labels = np.array([label.get_text().split()[0] for label in labels])
+    for idx_incoming, process_id in enumerate(incoming["process_id"]):
+        if fail[idx_incoming]:
+            idx_label = np.argwhere(labels == process_id).squeeze()
+            plt.gca().get_yticklabels()[idx_label].set_color("firebrick")
+
+    # Save to file
+    plt.savefig(plot_path, dpi=600)
+
+
+def generate_test_database() -> None:
+    """Function to create a test database.
+
+    This function takes a single profiling output (`prof/combined.prof`) and duplicates
+    the profiling data 5 times with some minor variation to create a local test database
+    for checking the benchmarking processes.
+
+    The function can be run from the package root at the command line using:
+
+    python -c "from profiling.run_benchmarking import *; _generate_test_database()"
+    """
+
+    # Create testing database
+    prof_path = Path("prof/combined.prof")
+    df = convert_and_filter_prof_file(prof_path=prof_path, label="test2")
+    copies: list = []
+
+    for test in range(1, 6):
+        df2 = df.copy()
+        df2["label"] = f"test_{test}"
+        df["timestamp"] = df["timestamp"].astype("datetime64[ns]") + np.timedelta64(
+            1, "D"
+        )
+        kpis = ["tottime_percall", "cumtime_percall", "tottime", "cumtime"]
+
+        for kpi in kpis:
+            df2[kpi] = df[kpi] * np.random.uniform(
+                low=0.98, high=1.03, size=len(df[kpi])
+            )
+        copies.append(df2)
+
+    test_db = pd.concat(copies)
+    test_db.to_csv("profiling/test-profiling-database.csv")
+
+
+class RawDescAndDefaultsHelpFormatter(
+    RawDescriptionHelpFormatter, ArgumentDefaultsHelpFormatter
+):
+    """Combine argparse formatter helpers and restrict width."""
+
+    def __init__(self, prog: str) -> None:
+        RawDescriptionHelpFormatter.__init__(self, prog, width=88, max_help_position=30)
+
+
+def run_benchmarking_cli() -> None:
+    """Run the package benchmarking.
+
+    This function runs the standard benchmarking for the package. The profiling
+    tests in the test suite generate a set of combined profile data across the package
+    functionality. This command then reads in a set of combined profile data and
+    compares it to previous benchmark data.
+
+    The profiling excludes all profiled code objects matching regex patterns provided
+    using the `--exclude` argument. The defaults exclude standard and site packages,
+    built in code and various other standard code, and are intended to reduce the
+    benchmarking to only code objects within the package.
+    """
+
+    if run_benchmarking_cli.__doc__ is not None:
+        doc = "    " + run_benchmarking_cli.__doc__
+    else:
+        doc = "Python in -OO mode"
+
+    parser = ArgumentParser(
+        description=textwrap.dedent(doc),
+        formatter_class=RawDescAndDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "prof_path",
+        type=Path,
+        help="Path to pytest-profiling output",
+    )
+    parser.add_argument(
+        "database_path",
+        type=Path,
+        help="Path to benchmarking database",
+    )
+    parser.add_argument(
+        "fail_data_path",
+        type=Path,
+        help="Output path for data on benchmark fails",
+    )
+    parser.add_argument(
+        "label",
+        help="A text label for the incoming profiling results, typically a commit SHA",
+    )
+    parser.add_argument(
+        "--exclude",
+        action="append",
+        help="Exclude profiled code matching a regex pattern, can be repeated",
+        default=["{.*}", "<.*>", "/lib/", "/tests/"],
+    )
+    parser.add_argument(
+        "--n-runs",
+        help="Number of most recent runs to use in benchmarking",
+        type=int,
+        default=5,
+    )
+    parser.add_argument(
+        "--tolerance",
+        help="Tolerance of time cost increase in benchmarking",
+        type=float,
+        default=0.05,
+    )
+    parser.add_argument(
+        "--update-on-pass",
+        help="Update the profiling database if benchmarking passes",
+        action="store_true",
+    )
+    parser.add_argument(
+        "--plot-path", help="Generate a benchmarking plot to this path", type=Path
+    )
+    parser.add_argument(
+        "--root-dir", help="Path to the package directory", type=Path, default="src"
+    )
+
+    args = parser.parse_args()
+
+    # Copy the profiling results to the current folder
+    if not args.prof_path.exists():
+        raise FileNotFoundError(f"Cannot find the profiling file at {args.prof_path}.")
+
+    incoming = convert_and_filter_prof_file(
+        prof_path=args.prof_path,
+        label=args.label,
+        root_dir=args.root_dir,
+        exclude=args.exclude,
+    )
+
+    success = run_benchmark(
+        incoming=incoming,
+        database_path=args.database_path,
+        fail_data_path=args.fail_data_path,
+        tolerance=args.tolerance,
+        n_runs=args.n_runs,
+        update_on_pass=args.update_on_pass,
+        plot_path=args.plot_path,
+    )
+
+    if not success:
+        print("Benchmarking failed.")
+        sys.exit(1)
+
+    print("Benchmarking passed.")
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    run_benchmarking_cli()