From 0ed8d4d1f71754f193323025b225c9cfc121e6f4 Mon Sep 17 00:00:00 2001 From: Sam Avis Date: Fri, 6 Jun 2025 18:07:36 +0100 Subject: [PATCH 1/4] Added pytest-profiling dep. Created unit test folder --- pyproject.toml | 1 + tests/{ => unit}/conftest.py | 0 tests/{ => unit}/test_grid.py | 0 tests/{ => unit}/test_initialization.py | 0 tests/{ => unit}/test_placeholder.py | 0 5 files changed, 1 insertion(+) rename tests/{ => unit}/conftest.py (100%) rename tests/{ => unit}/test_grid.py (100%) rename tests/{ => unit}/test_initialization.py (100%) rename tests/{ => unit}/test_placeholder.py (100%) diff --git a/pyproject.toml b/pyproject.toml index 3c8646a..e90a848 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dev = [ "pre-commit>=4.2.0", "ruff>=0.11.4", "pytest>=7.0", + "pytest-profiling", ] [tool.pytest.ini_options] diff --git a/tests/conftest.py b/tests/unit/conftest.py similarity index 100% rename from tests/conftest.py rename to tests/unit/conftest.py diff --git a/tests/test_grid.py b/tests/unit/test_grid.py similarity index 100% rename from tests/test_grid.py rename to tests/unit/test_grid.py diff --git a/tests/test_initialization.py b/tests/unit/test_initialization.py similarity index 100% rename from tests/test_initialization.py rename to tests/unit/test_initialization.py diff --git a/tests/test_placeholder.py b/tests/unit/test_placeholder.py similarity index 100% rename from tests/test_placeholder.py rename to tests/unit/test_placeholder.py From 14b5af5724a852a7abe0f1fd64cdf8e91860580e Mon Sep 17 00:00:00 2001 From: Sam Avis Date: Mon, 9 Jun 2025 13:53:42 +0100 Subject: [PATCH 2/4] Add profiling test --- pyproject.toml | 3 ++- tests/{unit => }/conftest.py | 0 tests/profiling/test_profiling.py | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) rename tests/{unit => }/conftest.py (100%) create mode 100644 tests/profiling/test_profiling.py diff --git a/pyproject.toml b/pyproject.toml index e90a848..85bd93d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,9 +15,10 @@ dev = [ "ruff>=0.11.4", "pytest>=7.0", "pytest-profiling", + "flameprof", ] [tool.pytest.ini_options] testpaths = ["tests"] python_files = "test_*.py" -addopts = "-v" +addopts = "-v -m 'not profiling'" diff --git a/tests/unit/conftest.py b/tests/conftest.py similarity index 100% rename from tests/unit/conftest.py rename to tests/conftest.py diff --git a/tests/profiling/test_profiling.py b/tests/profiling/test_profiling.py new file mode 100644 index 0000000..5a00973 --- /dev/null +++ b/tests/profiling/test_profiling.py @@ -0,0 +1,18 @@ +"""Test(s) to use with pytest-profiling to identify bottlenecks in the code.""" +import pytest + +@pytest.mark.profiling +def test_profiling(monkeypatch): + + # Define the command line arguments + import sys + argv = ['dementpy.py', 'grassland', 'output', '20250402', 'scrubland'] + monkeypatch.setattr(sys, 'argv', argv) + + # Move to subfolder so input and output folders will be correct + import os + os.chdir('src') + + # Run dementpy + import dementpy + dementpy.main() From 10731c005e8ca846008ee7a7833a51c85139724c Mon Sep 17 00:00:00 2001 From: Sam Avis Date: Mon, 9 Jun 2025 14:13:29 +0100 Subject: [PATCH 3/4] Prevent dementpy from running automatically on import --- src/dementpy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dementpy.py b/src/dementpy.py index c04f8b6..daa0148 100644 --- a/src/dementpy.py +++ b/src/dementpy.py @@ -103,4 +103,5 @@ def main(): os.chdir('../'+output_folder) export(Output_init, site, outname) -main() \ No newline at end of file +if __name__ == '__main__': + main() From 905c2f8c491387f33f9bcbfa972df5c52d4a9dbc Mon Sep 17 00:00:00 2001 From: "M. A. Kowalski" Date: Mon, 16 Jun 2025 13:16:35 +0100 Subject: [PATCH 4/4] test: script to track performance regression Adapted from pyrealm. Allows to compare two profiled runs of the code and highlights which functions performance dropped below a threshold. Note that at the memonet DEMENTpy exhibits high variance in runtime of individual functions (observed up to 15%) so the check with default settings (5% tolerance) will likley fail for the same code versions. --- profiling/run_benchmarking.py | 594 ++++++++++++++++++++++++++++++++++ 1 file changed, 594 insertions(+) create mode 100644 profiling/run_benchmarking.py diff --git a/profiling/run_benchmarking.py b/profiling/run_benchmarking.py new file mode 100644 index 0000000..b9d311d --- /dev/null +++ b/profiling/run_benchmarking.py @@ -0,0 +1,594 @@ +"""Run profile benchmarking and generate benchmarking graphics. + +Shamelessly adapted from `pyrealm `_ +with some modifications: + - Removed hardcoded `pyrealm`-specific things (paths, docs etc.) + - Added Classes to AST map (turns out they can show up in profile) + - Refactored reading of `pstats` data. Use `stats` dictionary of `pstats.Stats` + directly to avoid problems with mis-formatted table and retain more precision + +Original licence +---------------- + +MIT License + +Copyright (c) 2023 David Orme + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +import ast +import datetime +import pstats +import sys +import textwrap +from argparse import ( + ArgumentDefaultsHelpFormatter, + ArgumentParser, + RawDescriptionHelpFormatter, +) +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + + +def get_function_map(root: Path) -> dict[tuple[str, int, str], str]: + """Create an AST function map. + + This function uses the AST parser to scan the codebase and identify all of + the function definitions that might be referenced within profiling stats. The AST + nodes are then used to build a dictionary that maps call information from profiling + - which uses line numbers to identify calls within source files - onto call ids from + the package AST structure, will be more stable across versions. + + The dictionary is keyed using (source file name, line number, call name) and the + values are strings that identify functions within source files and methods within + classes within source files. + + One added difficulty is that the line numbering differs between the two approaches. + The line numbers reported by `ast` nodes are the _actual_ line where the `def` or + `class` statement is found. The line numbers reported by `pstats` are offset if the + callable has decorators, so this mapping function has to find the lowest line number + of the function node and any decorator nodes. + + Warning: + In order to separate class methods from functions, this function relies on + `ast.walk` walking `ast.ClassDef` nodes before `ast.FunctionDef` nodes and then + adds a `class_name` attribute to functions in the class body. That attribute can + then be checked when the `ast.FunctionDef` node is revisited later in the walk. + This is a bit of a hack. + + Returns: + A dictionary mapping profile call ids to AST based call id. + """ + + ast_map = {} + + # Visit all the python modules below the root and add FunctionDef details to the + # ast_map list. This relies on a hack - ClassDef are walked before FunctionDef and + # so methods can be discovered and annotated with the class_name attribute before + # they are added to the ast_map. https://stackoverflow.com/questions/64225210 + for src_file in root.rglob("*.py"): + parsed_ast = ast.parse(src_file.read_text()) + + for node in ast.walk(parsed_ast): + if isinstance(node, ast.FunctionDef): + # Find the line number used by profiling, which includes any decorators + lineno = min([d.lineno for d in [*node.decorator_list, node]]) # type: ignore [attr-defined] + + if hasattr(node, "class_name"): + ast_map[(str(src_file), lineno, node.name)] = ( + f"{src_file}:{node.class_name}" # type: ignore [attr-defined] + f".{node.name}" + ) + else: + ast_map[(str(src_file), lineno, node.name)] = ( + f"{src_file}:{node.name}" + ) + if isinstance(node, ast.ClassDef): + # It may happen that the class initialisation will be long enough to + # show in the profile! We need to keep them in the AST! + lineno = min([d.lineno for d in [*node.decorator_list, node]]) # type: ignore [attr-defined] + ast_map[(str(src_file), lineno, node.name)] = f"{src_file}:{node.name}" + for child in node.body: + if isinstance(child, ast.FunctionDef): + child.class_name = node.name # type: ignore [attr-defined] + + return ast_map + + +def func_name_to_str(file_line_name: tuple[str, int, str]) -> str: + """Write the `print_stats` formatted function name. + + Args: + file_line_name: `pstats.Stats.stats` function info key. `file` is '~' and `line` '0' + when function is build-in. + + Returns: + Function name in the `pstats.Stats.print_stats` format. + Follows `pstats.func_std_string` method + """ + file, line_number, name = file_line_name + if (file, line_number) == ("~", 0): + if name.startswith("<") and name.endswith(">"): + return f"{{{name[1:-1]}}}" + else: + return name + else: + return f"{file}:{line_number}({name})" + + +def read_profiling_stats(prof_path: Path) -> pd.DataFrame: + """Put the profiling data into a DataFrame. + + The format of the DataFrame mimics pstats.Stats.print_stats(). + It will have the following format: + + | ncalls | tottime | tottime_percall | cumtime | cumtime_percall | filename:lineno(function) | + |--------|---------|-----------------|---------|-----------------|---------------------------| + | ... | ... | ... | ... | ... | ... | + + Note + ---- + The times per call are calculated per "external" call for recursive functions. + Same as in `print_stats` output. + """ + stats_dict = pstats.Stats(str(prof_path)).stats # type: ignore [attr-defined] + + column_names = [ + "ncalls", + "tottime", + "tottime_percall", + "cumtime", + "cumtime_percall", + "filename:lineno(function)", + ] + + data = list() + for key, item in stats_dict.items(): + # We drop the links to the caller info + external_calls, total_calls, total_time, cumulative_time, _ = item + name = func_name_to_str(key) + data.append( + ( + f"{total_calls}" + if external_calls == total_calls + else f"{external_calls}/{total_calls}", + total_time, + total_time / external_calls, + cumulative_time, + cumulative_time / external_calls, + name, + ) + ) + return pd.DataFrame(data, columns=column_names) + + +def convert_and_filter_prof_file( + prof_path: Path, + label: str, + root_dir: Path, + exclude: list[str] = ["{.*}", "<.*>", "/lib/", "/tests/"], +) -> pd.DataFrame: + """Convert profiling output to a standard data frame. + + The function reads the contents of a ``.prof`` file (typically + ``prof/combined.prof``) generated by running the profiling test suite and returns + the profiling data as a standardised `pandas.DataFrame`. + + The profiling results include a field 'filename:lineno(function)', which identifies + each profiled code object. Many of these will be from functions outside of the + codebase and these are excluded using a list of regex patterns: + + * '/lib/' excludes standard and site packages, + * '{.*}' excludes profiling of '{built-in ... } and similar, + * '<.*>' excludes profiling of '` and similar. + * '/tests/' excludes the test functions and classes calling the code. + + The remaining rows should show the performance of just the codebase. + + Args: + prof_path: Path to the profiling output. + label: A string used to label a particular profiling run, typically a commit SHA + root_dir: Path to the directory with the package sources (e.g. `src`, `mypackage`) + exclude: A list of patterns used to exclude rows from the profiling stats. + """ + + df = read_profiling_stats(prof_path) + + # Reduce to rows not matching any of the regex exclude patterns + exclude_rows = pd.DataFrame( + [df["filename:lineno(function)"].str.contains(excl) for excl in exclude] + ).any() + df = df[~exclude_rows] + + # Add a timestamp from the file creation date + m_time = datetime.datetime.fromtimestamp(prof_path.stat().st_mtime) + df["timestamp"] = m_time.isoformat(timespec="seconds") + + # Split the filename, lineno, and function + df[["filename", "lineno", "function"]] = df.pop( + "filename:lineno(function)" + ).str.extract(r"(.*):(.*?)\((.*)\)", expand=True) + + # Make all source paths relative from parent of root_directory + # i.e. root_dir/subfolder/file.py + df["filename"] = df["filename"].str.replace( + str(root_dir.resolve()) + "/", str(root_dir) + "/" + ) + + # Map the profiling data onto the AST structure for the package + ast_map = get_function_map(root_dir) + + # Use the profile data to look up the AST callable id + df["process_id"] = df.apply( + lambda rw: ast_map[(rw.filename, int(rw.lineno), rw.function)], axis=1 + ) + + # Add the provided git commit SHA for information + df["label"] = label + + # Add fields to ignore particular benchmarks and allow regression + df["ignore_result"] = False + df["ignore_justification"] = "" + + return df + + +def run_benchmark( + incoming: pd.DataFrame, + database_path: Path, + fail_data_path: Path, + plot_path: Path | None = None, + tolerance: float = 0.05, + n_runs: int = 5, + update_on_pass: bool = False, +) -> bool: + """Run benchmark checks on profiling results. + + This function takes new profiling results and benchmarks key performance indicators + against the best results from the N most recent runs in the profiling database. The + incoming data must have all performance indicators within the given tolerance of the + fastest of the past N runs for all functionality that is present in both performance + datasets. + + If any functionality fails the performance testing, the failing functions are + written to data file along with the observed ('incoming') and target timings. If the + performance benchmarking passes, the database can optionally be updated to include + the incoming performance data. This is typically only be used when a pull request is + merged into the develop or main branches, to extend the test database ready for the + next benchmarking run. + + Args: + incoming: A data frame of incoming profiling data. + database_path: The path to the database of previous profiling data + fail_data_path: A path to write out data on functions that fail profiling + plot_path: An optional path for creating a benchmarking plot. + tolerance: Fractional acceptable change in performance + n_runs: The number of most recent runs to use + update_on_pass: Should the incoming data be added to the database. + new_database: Should the incoming data be used to create a new profiling + database file. + """ + + if not database_path.exists(): + print(f"Creating new database: {database_path}") + incoming.to_csv(database_path, index=False) + return True + + # Read database and reduce to most recent n runs. + database = pd.read_csv(database_path) + n_recent_timestamps = sorted(database["timestamp"].unique())[-n_runs:] + database = database.loc[database["timestamp"].isin(n_recent_timestamps)] + + # Find the best (minimum) previous indicator values for each label from those runs + # in the database that have not explicitly been ignored. Note that min() here takes + # the minimum value for each column within the group. + kpis = ["tottime_percall", "cumtime_percall", "tottime", "cumtime"] + targets = ( + database.loc[~database["ignore_result"], ["process_id", *kpis]] + .groupby("process_id") + .min() + ) + targets.columns = targets.columns + "_target" + + # Combine the incoming and database profiling and merge with targets on label to get + # the observed and target values for each function + incoming_sha = incoming["label"].unique()[0] + combined = pd.concat([database, incoming]) + combined["is_incoming"] = combined["label"] == incoming_sha + combined = combined.merge(targets, how="left", on="process_id") + + # Calculate the relative KPIs for each kpi + # TODO - handle zeros + for kpi in kpis: + combined["relative_" + kpi] = combined[kpi] / combined[kpi + "_target"] + + # Find rows where performance change of incoming profiling is outside tolerance + threshold = 1 + tolerance + kpi_fail = [combined[f"relative_{kpi}"] > threshold for kpi in kpis] + failing_rows = combined[pd.DataFrame(kpi_fail).any() & combined["is_incoming"]] + + # Plot before handling failure + if plot_path is not None: + create_benchmark_plot( + plot_path=plot_path, combined=combined, threshold=threshold + ) + + # If any rows fail then save out the rows for all results on failing labels and + # return False + if not failing_rows.empty: + failure_info = combined[combined["process_id"].isin(failing_rows["process_id"])] + failure_info.to_csv(fail_data_path, index=False) + return False + + # Should the + if update_on_pass: + # Drop the fields added by this function and save the updated combined data + combined.drop( + columns=["is_incoming"] + + [f"relative_{kpi}" for kpi in kpis] + + [f"{kpi}_target" for kpi in kpis], + inplace=True, + ) + combined.to_csv(database_path, index=False) + + return True + + +def create_benchmark_plot( + plot_path: Path, + combined: pd.DataFrame, + threshold: float = 1.05, + kpi: str = "cumtime_percall", +) -> None: + """Plot the benchmarking results. + + This function generates a PNG plot that plots the relative performance of each + labelled code object. Performance is always relative to the fastest performance + found in previous profiling runs and this is shown in square brackets alongside the + label. + + The incoming performance is shown as circles and previous versions are shown as + numbers. A vertical line indicates the threshold for failing the benchmarking. + Previous values that have been explicitly set as `ignore_result` are shown in light + grey for information. + + Args: + plot_path: An output path for the plot + combined: The combined profiling data to be plotted + threshold: The upper threshold to pass benchmarking + kpi: A string specifying the performance metric to plot + """ + + # Sort labels to display in order from most time consuming to least and split into + # previous and incoming values. + combined = combined.sort_values(by=f"{kpi}_target") + incoming = combined[combined["is_incoming"]] + database = combined[~combined["is_incoming"]] + + # Get the code version labels for the previous and incoming versions + previous_versions = ( + database[["timestamp", "label"]] + .drop_duplicates() + .sort_values(by="timestamp", ascending=False) + ) + incoming_version = incoming["label"].unique()[0] + + # A4 portrait + plt.figure(figsize=(11.69, 8.27)) + + # Plot each previous version using text 1...n as plot symbols, with lighter grey for + # values that have been set as ignore_result. + for idx, label in enumerate(previous_versions["label"]): + subset = combined[combined["label"] == label] + + plt.scatter( + subset[f"relative_{kpi}"], + subset["process_id"] + + subset[f"{kpi}_target"].map(lambda x: f" [{x:0.3f}]"), + marker=f"${idx + 1}$", + color=[ + "lightgray" if val else "dimgrey" for val in subset["ignore_result"] + ], + label=label, + ) + + # Plot the incoming data as open circles, blue points when inside tolerance and red + # if outside tolerance. + fail = [True if val > threshold else False for val in incoming[f"relative_{kpi}"]] + plt.scatter( + incoming[f"relative_{kpi}"], + incoming["process_id"] + + incoming[f"{kpi}_target"].map(lambda x: f" [{x:0.3f}]"), + marker="o", + facecolors="none", + color=["firebrick" if val else "royalblue" for val in fail], + label=f"{incoming_version} [incoming]", + ) + + # Add a vertical line for the threshold value and a legend + plt.axvline(threshold, linestyle="--", color="indianred", linewidth=0.3) + plt.legend(loc="lower right", bbox_to_anchor=(1.0, 1.05), ncol=6, frameon=False) + plt.xlabel(f"Relative {kpi}") + plt.tight_layout() + + # Colour the labels of failing rows. + labels = plt.gca().get_yticklabels() + labels = np.array([label.get_text().split()[0] for label in labels]) + for idx_incoming, process_id in enumerate(incoming["process_id"]): + if fail[idx_incoming]: + idx_label = np.argwhere(labels == process_id).squeeze() + plt.gca().get_yticklabels()[idx_label].set_color("firebrick") + + # Save to file + plt.savefig(plot_path, dpi=600) + + +def generate_test_database() -> None: + """Function to create a test database. + + This function takes a single profiling output (`prof/combined.prof`) and duplicates + the profiling data 5 times with some minor variation to create a local test database + for checking the benchmarking processes. + + The function can be run from the package root at the command line using: + + python -c "from profiling.run_benchmarking import *; _generate_test_database()" + """ + + # Create testing database + prof_path = Path("prof/combined.prof") + df = convert_and_filter_prof_file(prof_path=prof_path, label="test2") + copies: list = [] + + for test in range(1, 6): + df2 = df.copy() + df2["label"] = f"test_{test}" + df["timestamp"] = df["timestamp"].astype("datetime64[ns]") + np.timedelta64( + 1, "D" + ) + kpis = ["tottime_percall", "cumtime_percall", "tottime", "cumtime"] + + for kpi in kpis: + df2[kpi] = df[kpi] * np.random.uniform( + low=0.98, high=1.03, size=len(df[kpi]) + ) + copies.append(df2) + + test_db = pd.concat(copies) + test_db.to_csv("profiling/test-profiling-database.csv") + + +class RawDescAndDefaultsHelpFormatter( + RawDescriptionHelpFormatter, ArgumentDefaultsHelpFormatter +): + """Combine argparse formatter helpers and restrict width.""" + + def __init__(self, prog: str) -> None: + RawDescriptionHelpFormatter.__init__(self, prog, width=88, max_help_position=30) + + +def run_benchmarking_cli() -> None: + """Run the package benchmarking. + + This function runs the standard benchmarking for the package. The profiling + tests in the test suite generate a set of combined profile data across the package + functionality. This command then reads in a set of combined profile data and + compares it to previous benchmark data. + + The profiling excludes all profiled code objects matching regex patterns provided + using the `--exclude` argument. The defaults exclude standard and site packages, + built in code and various other standard code, and are intended to reduce the + benchmarking to only code objects within the package. + """ + + if run_benchmarking_cli.__doc__ is not None: + doc = " " + run_benchmarking_cli.__doc__ + else: + doc = "Python in -OO mode" + + parser = ArgumentParser( + description=textwrap.dedent(doc), + formatter_class=RawDescAndDefaultsHelpFormatter, + ) + parser.add_argument( + "prof_path", + type=Path, + help="Path to pytest-profiling output", + ) + parser.add_argument( + "database_path", + type=Path, + help="Path to benchmarking database", + ) + parser.add_argument( + "fail_data_path", + type=Path, + help="Output path for data on benchmark fails", + ) + parser.add_argument( + "label", + help="A text label for the incoming profiling results, typically a commit SHA", + ) + parser.add_argument( + "--exclude", + action="append", + help="Exclude profiled code matching a regex pattern, can be repeated", + default=["{.*}", "<.*>", "/lib/", "/tests/"], + ) + parser.add_argument( + "--n-runs", + help="Number of most recent runs to use in benchmarking", + type=int, + default=5, + ) + parser.add_argument( + "--tolerance", + help="Tolerance of time cost increase in benchmarking", + type=float, + default=0.05, + ) + parser.add_argument( + "--update-on-pass", + help="Update the profiling database if benchmarking passes", + action="store_true", + ) + parser.add_argument( + "--plot-path", help="Generate a benchmarking plot to this path", type=Path + ) + parser.add_argument( + "--root-dir", help="Path to the package directory", type=Path, default="src" + ) + + args = parser.parse_args() + + # Copy the profiling results to the current folder + if not args.prof_path.exists(): + raise FileNotFoundError(f"Cannot find the profiling file at {args.prof_path}.") + + incoming = convert_and_filter_prof_file( + prof_path=args.prof_path, + label=args.label, + root_dir=args.root_dir, + exclude=args.exclude, + ) + + success = run_benchmark( + incoming=incoming, + database_path=args.database_path, + fail_data_path=args.fail_data_path, + tolerance=args.tolerance, + n_runs=args.n_runs, + update_on_pass=args.update_on_pass, + plot_path=args.plot_path, + ) + + if not success: + print("Benchmarking failed.") + sys.exit(1) + + print("Benchmarking passed.") + sys.exit(0) + + +if __name__ == "__main__": + run_benchmarking_cli()