From 620599f63a2b95d54664d4fcad313e8d8d758906 Mon Sep 17 00:00:00 2001 From: Kevin Boyd Date: Fri, 29 May 2026 14:12:13 -0400 Subject: [PATCH 1/2] Add --skip-nvmolkit (tfd) and --no-rdkit/--no-nvmolkit (cross_similarity) tfd_bench: `--skip-nvmolkit` to mirror the pre-existing `--skip-rdkit`, allowing RDKit-only runs (faster wall time on large sweeps). cross_similarity_bench: `--no-rdkit`/`--no-nvmolkit` plumbed via direct sys.argv inspection at import time, since pyperf's child worker processes re-import this module and re-parse argv (registered argparse flags do not propagate consistently to them). --- benchmarks/cross_similarity_bench.py | 48 ++++++++++----- benchmarks/tfd_bench.py | 89 +++++++++++++++++----------- 2 files changed, 88 insertions(+), 49 deletions(-) diff --git a/benchmarks/cross_similarity_bench.py b/benchmarks/cross_similarity_bench.py index 9a8564ea..578a2132 100644 --- a/benchmarks/cross_similarity_bench.py +++ b/benchmarks/cross_similarity_bench.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys + import pyperf import torch from bench_utils import load_smiles @@ -42,6 +44,21 @@ def nvmolkit_sim_gpu_only(fps, sim_type): torch.cuda.synchronize() +# Strip --no-rdkit / --no-nvmolkit from argv before handing the rest to +# pyperf. pyperf's own argparser passes registered flags through correctly on +# the parent's parse_args() but does not consistently propagate them to its +# child worker processes (each child re-imports this module and re-parses +# sys.argv). Doing the checks here once, at import time, gives every process +# the same answer. +NO_RDKIT = "--no-rdkit" in sys.argv +if NO_RDKIT: + sys.argv = [a for a in sys.argv if a != "--no-rdkit"] +NO_NVMOLKIT = "--no-nvmolkit" in sys.argv +if NO_NVMOLKIT: + sys.argv = [a for a in sys.argv if a != "--no-nvmolkit"] +if NO_RDKIT and NO_NVMOLKIT: + raise SystemExit("cross_similarity_bench: cannot pass both --no-rdkit and --no-nvmolkit") + runner = pyperf.Runner(min_time=0.01, values=3, processes=1, loops=3) runner.metadata["description"] = "Cross Similarity benchmark" runner.argparser.add_argument( @@ -63,20 +80,23 @@ def nvmolkit_sim_gpu_only(fps, sim_type): mols += mols mols = mols[:max_size] -rdkit_fpgen = rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=fpsize) -rdkit_fps_all = [rdkit_fpgen.GetFingerprint(mol) for mol in mols] -nvmolkit_fpgen = MorganFingerprintGenerator(radius=3, fpSize=fpsize) -nvmolkit_fps_all = torch.as_tensor(nvmolkit_fpgen.GetFingerprints(mols), device="cuda") +if not NO_RDKIT: + rdkit_fpgen = rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=fpsize) + rdkit_fps_all = [rdkit_fpgen.GetFingerprint(mol) for mol in mols] +if not NO_NVMOLKIT: + nvmolkit_fpgen = MorganFingerprintGenerator(radius=3, fpSize=fpsize) + nvmolkit_fps_all = torch.as_tensor(nvmolkit_fpgen.GetFingerprints(mols), device="cuda") for sim_type in sim_types: for molNum in SIZES: - fps = rdkit_fps_all[:molNum] - nvmolkit_fps_cu = nvmolkit_fps_all[:molNum].contiguous() - - runner.args.values = 1 if molNum > CPU_SINGLE_VALUE_ABOVE else default_values - name = f"rdkit_{sim_type}sim_fpsize_{fpsize}_{molNum}mols" - runner.bench_func(name, rdkit_sim, fps, sim_type, metadata={"name": name}) - - runner.args.values = default_values - name2 = f"nvmolkit_gpu-only_{sim_type}sim_fpsize_{fpsize}_{molNum}mols_gpu_only" - runner.bench_func(name2, nvmolkit_sim_gpu_only, nvmolkit_fps_cu, sim_type, metadata={"name": name2}) + if not NO_RDKIT: + fps = rdkit_fps_all[:molNum] + runner.args.values = 1 if molNum > CPU_SINGLE_VALUE_ABOVE else default_values + name = f"rdkit_{sim_type}sim_fpsize_{fpsize}_{molNum}mols" + runner.bench_func(name, rdkit_sim, fps, sim_type, metadata={"name": name}) + + if not NO_NVMOLKIT: + nvmolkit_fps_cu = nvmolkit_fps_all[:molNum].contiguous() + runner.args.values = default_values + name2 = f"nvmolkit_gpu-only_{sim_type}sim_fpsize_{fpsize}_{molNum}mols_gpu_only" + runner.bench_func(name2, nvmolkit_sim_gpu_only, nvmolkit_fps_cu, sim_type, metadata={"name": name2}) diff --git a/benchmarks/tfd_bench.py b/benchmarks/tfd_bench.py index 036fdc79..b0b23979 100644 --- a/benchmarks/tfd_bench.py +++ b/benchmarks/tfd_bench.py @@ -226,6 +226,7 @@ def load_pkl_files(pkl_paths: List[str]) -> List[Chem.Mol]: def run_benchmarks( input_mols: List[Chem.Mol] | None = None, skip_rdkit: bool = False, + skip_nvmolkit: bool = False, output_file: str = "tfd_results.csv", smiles_file: str = None, mol_counts: List[int] = None, @@ -238,6 +239,7 @@ def run_benchmarks( Args: input_mols: Parsed RDKit molecules without conformers (unused when preloaded_mols given). skip_rdkit: If True, skip RDKit benchmarks (faster for large runs) + skip_nvmolkit: If True, skip nvMolKit GPU benchmarks (RDKit-only mode) output_file: Output CSV file path smiles_file: Path to SMILES file (used to locate sibling pickle files) mol_counts: List of molecule counts to benchmark @@ -250,6 +252,9 @@ def run_benchmarks( Returns: DataFrame with benchmark results """ + if skip_rdkit and skip_nvmolkit: + raise ValueError("cannot disable both RDKit and nvMolKit") + if mol_counts is None: mol_counts = [1, 5, 10, 25, 50, 100] @@ -322,45 +327,50 @@ def run_benchmarks( result["rdkit_time_ms"] = None result["rdkit_std_ms"] = None - try: - t, s = time_it(lambda: bench_nvmol_gpu_list(mols)) - result["nvmol_gpu_list_time_ms"] = t - result["nvmol_gpu_list_std_ms"] = s - print(f" nvMolKit (GPU list): {t:8.2f} ms (+/- {s:.2f})") - except Exception as e: - print(f" nvMolKit GPU list failed: {e}") - result["nvmol_gpu_list_time_ms"] = None + if not skip_nvmolkit: + try: + t, s = time_it(lambda: bench_nvmol_gpu_list(mols)) + result["nvmol_gpu_list_time_ms"] = t + result["nvmol_gpu_list_std_ms"] = s + print(f" nvMolKit (GPU list): {t:8.2f} ms (+/- {s:.2f})") + except Exception as e: + print(f" nvMolKit GPU list failed: {e}") + result["nvmol_gpu_list_time_ms"] = None - try: - t, s = time_it(lambda: bench_nvmol_gpu_numpy(mols)) - result["nvmol_gpu_numpy_time_ms"] = t - result["nvmol_gpu_numpy_std_ms"] = s - print(f" nvMolKit (GPU numpy): {t:8.2f} ms (+/- {s:.2f})") - except Exception as e: - print(f" nvMolKit GPU numpy failed: {e}") - result["nvmol_gpu_numpy_time_ms"] = None + try: + t, s = time_it(lambda: bench_nvmol_gpu_numpy(mols)) + result["nvmol_gpu_numpy_time_ms"] = t + result["nvmol_gpu_numpy_std_ms"] = s + print(f" nvMolKit (GPU numpy): {t:8.2f} ms (+/- {s:.2f})") + except Exception as e: + print(f" nvMolKit GPU numpy failed: {e}") + result["nvmol_gpu_numpy_time_ms"] = None - try: - t, s = time_it(lambda: bench_nvmol_gpu_tensor(mols)) - result["nvmol_gpu_tensor_time_ms"] = t - result["nvmol_gpu_tensor_std_ms"] = s - print(f" nvMolKit (GPU ten): {t:8.2f} ms (+/- {s:.2f})") - except Exception as e: - print(f" nvMolKit GPU tensor failed: {e}") + try: + t, s = time_it(lambda: bench_nvmol_gpu_tensor(mols)) + result["nvmol_gpu_tensor_time_ms"] = t + result["nvmol_gpu_tensor_std_ms"] = s + print(f" nvMolKit (GPU ten): {t:8.2f} ms (+/- {s:.2f})") + except Exception as e: + print(f" nvMolKit GPU tensor failed: {e}") + result["nvmol_gpu_tensor_time_ms"] = None + + speedups = {} + for key, label in [ + ("nvmol_gpu_list_time_ms", "GPU list"), + ("nvmol_gpu_numpy_time_ms", "GPU numpy"), + ("nvmol_gpu_tensor_time_ms", "GPU tensor"), + ]: + if result.get("rdkit_time_ms") and result.get(key): + speedups[label] = result["rdkit_time_ms"] / result[key] + + for label, val in speedups.items(): + print(f" Speedup {label:>10s} vs RDKit: {val:.1f}x") + else: + result["nvmol_gpu_list_time_ms"] = None + result["nvmol_gpu_numpy_time_ms"] = None result["nvmol_gpu_tensor_time_ms"] = None - speedups = {} - for key, label in [ - ("nvmol_gpu_list_time_ms", "GPU list"), - ("nvmol_gpu_numpy_time_ms", "GPU numpy"), - ("nvmol_gpu_tensor_time_ms", "GPU tensor"), - ]: - if result.get("rdkit_time_ms") and result.get(key): - speedups[label] = result["rdkit_time_ms"] / result[key] - - for label, val in speedups.items(): - print(f" Speedup {label:>10s} vs RDKit: {val:.1f}x") - results.append(result) # Create DataFrame and save @@ -408,6 +418,11 @@ def main(): action="store_true", help="Skip RDKit benchmarks (faster)", ) + parser.add_argument( + "--skip-nvmolkit", + action="store_true", + help="Skip nvMolKit GPU benchmarks (RDKit-only mode)", + ) parser.add_argument( "--pkl-file", type=str, @@ -434,6 +449,9 @@ def main(): ) args = parser.parse_args() + if args.skip_rdkit and args.skip_nvmolkit: + parser.error("cannot pass both --skip-rdkit and --skip-nvmolkit") + preloaded_mols = None input_mols = None @@ -485,6 +503,7 @@ def main(): run_benchmarks( input_mols=input_mols, skip_rdkit=args.skip_rdkit, + skip_nvmolkit=args.skip_nvmolkit, output_file=args.output, smiles_file=args.smiles_file, mol_counts=args.num_mols, From 3cde28247c17d0c5af11e3ed0f02998f32ef283d Mon Sep 17 00:00:00 2001 From: Kevin Boyd Date: Fri, 29 May 2026 17:35:53 -0400 Subject: [PATCH 2/2] cross_similarity: clarify why --no-rdkit/--no-nvmolkit are stripped from argv --- benchmarks/cross_similarity_bench.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/benchmarks/cross_similarity_bench.py b/benchmarks/cross_similarity_bench.py index 578a2132..6b32f1e7 100644 --- a/benchmarks/cross_similarity_bench.py +++ b/benchmarks/cross_similarity_bench.py @@ -44,12 +44,10 @@ def nvmolkit_sim_gpu_only(fps, sim_type): torch.cuda.synchronize() -# Strip --no-rdkit / --no-nvmolkit from argv before handing the rest to -# pyperf. pyperf's own argparser passes registered flags through correctly on -# the parent's parse_args() but does not consistently propagate them to its -# child worker processes (each child re-imports this module and re-parses -# sys.argv). Doing the checks here once, at import time, gives every process -# the same answer. +# --no-rdkit / --no-nvmolkit gate the module-level fingerprint setup below, +# which runs at import time before pyperf's Runner parses args. Read them +# directly from argv and strip them so pyperf's argparser doesn't reject the +# unknown flags. NO_RDKIT = "--no-rdkit" in sys.argv if NO_RDKIT: sys.argv = [a for a in sys.argv if a != "--no-rdkit"]