Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 32 additions & 14 deletions benchmarks/cross_similarity_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import sys

import pyperf
import torch
from bench_utils import load_smiles
Expand Down Expand Up @@ -42,6 +44,19 @@ def nvmolkit_sim_gpu_only(fps, sim_type):
torch.cuda.synchronize()


# --no-rdkit / --no-nvmolkit gate the module-level fingerprint setup below,
# which runs at import time before pyperf's Runner parses args. Read them
# directly from argv and strip them so pyperf's argparser doesn't reject the
# unknown flags.
NO_RDKIT = "--no-rdkit" in sys.argv
if NO_RDKIT:
sys.argv = [a for a in sys.argv if a != "--no-rdkit"]
NO_NVMOLKIT = "--no-nvmolkit" in sys.argv
if NO_NVMOLKIT:
sys.argv = [a for a in sys.argv if a != "--no-nvmolkit"]
if NO_RDKIT and NO_NVMOLKIT:
raise SystemExit("cross_similarity_bench: cannot pass both --no-rdkit and --no-nvmolkit")

runner = pyperf.Runner(min_time=0.01, values=3, processes=1, loops=3)
runner.metadata["description"] = "Cross Similarity benchmark"
runner.argparser.add_argument(
Expand All @@ -63,20 +78,23 @@ def nvmolkit_sim_gpu_only(fps, sim_type):
mols += mols
mols = mols[:max_size]

rdkit_fpgen = rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=fpsize)
rdkit_fps_all = [rdkit_fpgen.GetFingerprint(mol) for mol in mols]
nvmolkit_fpgen = MorganFingerprintGenerator(radius=3, fpSize=fpsize)
nvmolkit_fps_all = torch.as_tensor(nvmolkit_fpgen.GetFingerprints(mols), device="cuda")
if not NO_RDKIT:
rdkit_fpgen = rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=fpsize)
rdkit_fps_all = [rdkit_fpgen.GetFingerprint(mol) for mol in mols]
if not NO_NVMOLKIT:
nvmolkit_fpgen = MorganFingerprintGenerator(radius=3, fpSize=fpsize)
nvmolkit_fps_all = torch.as_tensor(nvmolkit_fpgen.GetFingerprints(mols), device="cuda")

for sim_type in sim_types:
for molNum in SIZES:
fps = rdkit_fps_all[:molNum]
nvmolkit_fps_cu = nvmolkit_fps_all[:molNum].contiguous()

runner.args.values = 1 if molNum > CPU_SINGLE_VALUE_ABOVE else default_values
name = f"rdkit_{sim_type}sim_fpsize_{fpsize}_{molNum}mols"
runner.bench_func(name, rdkit_sim, fps, sim_type, metadata={"name": name})

runner.args.values = default_values
name2 = f"nvmolkit_gpu-only_{sim_type}sim_fpsize_{fpsize}_{molNum}mols_gpu_only"
runner.bench_func(name2, nvmolkit_sim_gpu_only, nvmolkit_fps_cu, sim_type, metadata={"name": name2})
if not NO_RDKIT:
fps = rdkit_fps_all[:molNum]
runner.args.values = 1 if molNum > CPU_SINGLE_VALUE_ABOVE else default_values
name = f"rdkit_{sim_type}sim_fpsize_{fpsize}_{molNum}mols"
runner.bench_func(name, rdkit_sim, fps, sim_type, metadata={"name": name})

if not NO_NVMOLKIT:
nvmolkit_fps_cu = nvmolkit_fps_all[:molNum].contiguous()
runner.args.values = default_values
name2 = f"nvmolkit_gpu-only_{sim_type}sim_fpsize_{fpsize}_{molNum}mols_gpu_only"
runner.bench_func(name2, nvmolkit_sim_gpu_only, nvmolkit_fps_cu, sim_type, metadata={"name": name2})
89 changes: 54 additions & 35 deletions benchmarks/tfd_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ def load_pkl_files(pkl_paths: List[str]) -> List[Chem.Mol]:
def run_benchmarks(
input_mols: List[Chem.Mol] | None = None,
skip_rdkit: bool = False,
skip_nvmolkit: bool = False,
output_file: str = "tfd_results.csv",
smiles_file: str = None,
mol_counts: List[int] = None,
Expand All @@ -238,6 +239,7 @@ def run_benchmarks(
Args:
input_mols: Parsed RDKit molecules without conformers (unused when preloaded_mols given).
skip_rdkit: If True, skip RDKit benchmarks (faster for large runs)
skip_nvmolkit: If True, skip nvMolKit GPU benchmarks (RDKit-only mode)
output_file: Output CSV file path
smiles_file: Path to SMILES file (used to locate sibling pickle files)
mol_counts: List of molecule counts to benchmark
Expand All @@ -250,6 +252,9 @@ def run_benchmarks(
Returns:
DataFrame with benchmark results
"""
if skip_rdkit and skip_nvmolkit:
raise ValueError("cannot disable both RDKit and nvMolKit")

if mol_counts is None:
mol_counts = [1, 5, 10, 25, 50, 100]

Expand Down Expand Up @@ -322,45 +327,50 @@ def run_benchmarks(
result["rdkit_time_ms"] = None
result["rdkit_std_ms"] = None

try:
t, s = time_it(lambda: bench_nvmol_gpu_list(mols))
result["nvmol_gpu_list_time_ms"] = t
result["nvmol_gpu_list_std_ms"] = s
print(f" nvMolKit (GPU list): {t:8.2f} ms (+/- {s:.2f})")
except Exception as e:
print(f" nvMolKit GPU list failed: {e}")
result["nvmol_gpu_list_time_ms"] = None
if not skip_nvmolkit:
try:
t, s = time_it(lambda: bench_nvmol_gpu_list(mols))
result["nvmol_gpu_list_time_ms"] = t
result["nvmol_gpu_list_std_ms"] = s
print(f" nvMolKit (GPU list): {t:8.2f} ms (+/- {s:.2f})")
except Exception as e:
print(f" nvMolKit GPU list failed: {e}")
result["nvmol_gpu_list_time_ms"] = None

try:
t, s = time_it(lambda: bench_nvmol_gpu_numpy(mols))
result["nvmol_gpu_numpy_time_ms"] = t
result["nvmol_gpu_numpy_std_ms"] = s
print(f" nvMolKit (GPU numpy): {t:8.2f} ms (+/- {s:.2f})")
except Exception as e:
print(f" nvMolKit GPU numpy failed: {e}")
result["nvmol_gpu_numpy_time_ms"] = None
try:
t, s = time_it(lambda: bench_nvmol_gpu_numpy(mols))
result["nvmol_gpu_numpy_time_ms"] = t
result["nvmol_gpu_numpy_std_ms"] = s
print(f" nvMolKit (GPU numpy): {t:8.2f} ms (+/- {s:.2f})")
except Exception as e:
print(f" nvMolKit GPU numpy failed: {e}")
result["nvmol_gpu_numpy_time_ms"] = None

try:
t, s = time_it(lambda: bench_nvmol_gpu_tensor(mols))
result["nvmol_gpu_tensor_time_ms"] = t
result["nvmol_gpu_tensor_std_ms"] = s
print(f" nvMolKit (GPU ten): {t:8.2f} ms (+/- {s:.2f})")
except Exception as e:
print(f" nvMolKit GPU tensor failed: {e}")
try:
t, s = time_it(lambda: bench_nvmol_gpu_tensor(mols))
result["nvmol_gpu_tensor_time_ms"] = t
result["nvmol_gpu_tensor_std_ms"] = s
print(f" nvMolKit (GPU ten): {t:8.2f} ms (+/- {s:.2f})")
except Exception as e:
print(f" nvMolKit GPU tensor failed: {e}")
result["nvmol_gpu_tensor_time_ms"] = None

speedups = {}
for key, label in [
("nvmol_gpu_list_time_ms", "GPU list"),
("nvmol_gpu_numpy_time_ms", "GPU numpy"),
("nvmol_gpu_tensor_time_ms", "GPU tensor"),
]:
if result.get("rdkit_time_ms") and result.get(key):
speedups[label] = result["rdkit_time_ms"] / result[key]

for label, val in speedups.items():
print(f" Speedup {label:>10s} vs RDKit: {val:.1f}x")
else:
result["nvmol_gpu_list_time_ms"] = None
result["nvmol_gpu_numpy_time_ms"] = None
result["nvmol_gpu_tensor_time_ms"] = None

speedups = {}
for key, label in [
("nvmol_gpu_list_time_ms", "GPU list"),
("nvmol_gpu_numpy_time_ms", "GPU numpy"),
("nvmol_gpu_tensor_time_ms", "GPU tensor"),
]:
if result.get("rdkit_time_ms") and result.get(key):
speedups[label] = result["rdkit_time_ms"] / result[key]

for label, val in speedups.items():
print(f" Speedup {label:>10s} vs RDKit: {val:.1f}x")

results.append(result)

# Create DataFrame and save
Expand Down Expand Up @@ -408,6 +418,11 @@ def main():
action="store_true",
help="Skip RDKit benchmarks (faster)",
)
parser.add_argument(
"--skip-nvmolkit",
action="store_true",
help="Skip nvMolKit GPU benchmarks (RDKit-only mode)",
)
parser.add_argument(
"--pkl-file",
type=str,
Expand All @@ -434,6 +449,9 @@ def main():
)
args = parser.parse_args()

if args.skip_rdkit and args.skip_nvmolkit:
parser.error("cannot pass both --skip-rdkit and --skip-nvmolkit")

preloaded_mols = None
input_mols = None

Expand Down Expand Up @@ -485,6 +503,7 @@ def main():
run_benchmarks(
input_mols=input_mols,
skip_rdkit=args.skip_rdkit,
skip_nvmolkit=args.skip_nvmolkit,
output_file=args.output,
smiles_file=args.smiles_file,
mol_counts=args.num_mols,
Expand Down
Loading