From 971620440617c8e478c08520327007e45361344d Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Mon, 8 Jun 2026 16:41:17 +0800 Subject: [PATCH 1/2] feat: support rxn39 --- lambench/metrics/downstream_tasks_metrics.yml | 4 + lambench/metrics/post_process.py | 1 + lambench/models/ase_models.py | 12 ++ .../tasks/calculator/calculator_tasks.yml | 3 + .../tasks/calculator/rxn_path39/rxn_path39.py | 105 ++++++++++++++++++ 5 files changed, 125 insertions(+) create mode 100644 lambench/tasks/calculator/rxn_path39/rxn_path39.py diff --git a/lambench/metrics/downstream_tasks_metrics.yml b/lambench/metrics/downstream_tasks_metrics.yml index caf8cab..eb9b361 100644 --- a/lambench/metrics/downstream_tasks_metrics.yml +++ b/lambench/metrics/downstream_tasks_metrics.yml @@ -32,6 +32,10 @@ rxn_barrier: domain: Molecules metrics: [MAE] dummy: {"MAE": 20.975} +rxn_path39: + domain: Molecules + metrics: [MAE] # RMSE is not used for calculating metrics + dummy: {"MAE": 34.109} # "RMSE": 43.150 pressure: domain: Inorganic Materials metrics: [MAE] diff --git a/lambench/metrics/post_process.py b/lambench/metrics/post_process.py index 7a8e769..f72092e 100644 --- a/lambench/metrics/post_process.py +++ b/lambench/metrics/post_process.py @@ -120,6 +120,7 @@ def process_domain_specific_for_one_model(model: BaseLargeAtomModel): "vacancy", "binding_energy", "rxn_barrier", + "rxn_path39", "pressure", "stacking_fault", "interface", diff --git a/lambench/models/ase_models.py b/lambench/models/ase_models.py index e1f20b4..bcfdd39 100644 --- a/lambench/models/ase_models.py +++ b/lambench/models/ase_models.py @@ -280,6 +280,18 @@ def evaluate( elif task.task_name == "wiggle150": from lambench.tasks.calculator.wiggle150.wiggle150 import run_inference + assert task.test_data is not None + return { + "metrics": run_inference( + self, + task.test_data, + ) + } + elif task.task_name == "rxn_path39": + from lambench.tasks.calculator.rxn_path39.rxn_path39 import ( + run_inference, + ) + assert task.test_data is not None return { "metrics": run_inference( diff --git a/lambench/tasks/calculator/calculator_tasks.yml b/lambench/tasks/calculator/calculator_tasks.yml index 24bcdb5..39272d9 100644 --- a/lambench/tasks/calculator/calculator_tasks.yml +++ b/lambench/tasks/calculator/calculator_tasks.yml @@ -23,6 +23,9 @@ neb: wiggle150: test_data: /bohr/lambench-wiggle150-yazy/v1/Wiggle150.traj calculator_params: null +rxn_path39: + test_data: /bohr/lambench-rxn39-755z/v2/trajs + calculator_params: null elastic: test_data: /bohr/lambench-elastic-9qdt/v1/elastic.json calculator_params: diff --git a/lambench/tasks/calculator/rxn_path39/rxn_path39.py b/lambench/tasks/calculator/rxn_path39/rxn_path39.py new file mode 100644 index 0000000..6f94174 --- /dev/null +++ b/lambench/tasks/calculator/rxn_path39/rxn_path39.py @@ -0,0 +1,105 @@ +""" +RXN-Path-39: 13 organic reactions (wB97M-V/def2-TZVPD), 3 path-sampling +trajectories each, 11 arc-length-equidistant frames per trajectory. + +For each trajectory the first frame (index 0) is chosen as the reference. +The task measures how accurately a LAM reproduces the relative energies of all +other frames with respect to that reference, i.e. + + ΔE_DFT(i) = E_DFT(i) − E_DFT(frame 0) [kcal/mol] + ΔE_LAM(i) = E_LAM(i) − E_LAM(frame 0) [kcal/mol] + +and reports MAE and RMSE over all 39 × 10 = 390 (reaction, frame) pairs. +""" + +from pathlib import Path +import logging + +import numpy as np +from ase.io import Trajectory +from sklearn.metrics import mean_absolute_error, root_mean_squared_error + +from lambench.models.ase_models import ASEModel + +EV_TO_KCAL = 23.0609 # 1 eV = 23.0609 kcal/mol + + +def run_inference(model: ASEModel, test_data: Path) -> dict[str, float]: + """ + Parameters + ---------- + model : ASEModel + test_data : Path + Root of the trajectory tree. Expected layout:: + + test_data/ + / + traj_0.traj + traj_1.traj + traj_2.traj + ... + + Returns + ------- + dict with keys "MAE" and "RMSE" in kcal/mol. + """ + calc = model.calc + label_diffs: list[float] = [] + pred_diffs: list[float] = [] + + traj_files = sorted(test_data.rglob("traj_*.traj")) + if not traj_files: + raise FileNotFoundError(f"No traj_*.traj files found under {test_data}") + + for traj_path in traj_files: + frames = list(Trajectory(traj_path)) + + # DFT reference energies (eV, stored by SinglePointCalculator) + dft_energies = np.array([a.get_potential_energy() for a in frames]) + ref_dft_kcal = dft_energies[0] * EV_TO_KCAL + + # LAM energy for the first frame (reference) + frames[0].calc = calc + try: + ref_pred_kcal = frames[0].get_potential_energy() * EV_TO_KCAL + except Exception as e: + logging.error( + f"Failed predicting reference frame (idx=0) in {traj_path}: {e}" + ) + continue # skip this trajectory entirely + + # Relative energies for every non-reference frame + for i, atoms in enumerate(frames): + if i == 0: + continue + + label_diffs.append(dft_energies[i] * EV_TO_KCAL - ref_dft_kcal) + + atoms.calc = calc + try: + pred_kcal = atoms.get_potential_energy() * EV_TO_KCAL + except Exception as e: + logging.error( + f"Failed predicting frame {i} of {traj_path}: {e}" + ) + pred_kcal = np.nan + pred_diffs.append(pred_kcal - ref_pred_kcal) + + label_arr = np.array(label_diffs) + pred_arr = np.array(pred_diffs) + valid = np.isfinite(pred_arr) + + if not valid.any(): + logging.error("All predictions failed; returning NaN metrics.") + return {"MAE": np.nan, "RMSE": np.nan} + + if not valid.all(): + n_failed = int((~valid).sum()) + logging.warning( + f"{n_failed} frame(s) failed inference and were excluded from metrics." + ) + + return { + "MAE": float(mean_absolute_error(label_arr[valid], pred_arr[valid])), + "RMSE": float(root_mean_squared_error(label_arr[valid], pred_arr[valid])), + } From 6cbf4b5d53b385b79b15e9920f5c4fa229ef4cf5 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Mon, 8 Jun 2026 16:42:58 +0800 Subject: [PATCH 2/2] chore: lint --- lambench/tasks/calculator/rxn_path39/rxn_path39.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lambench/tasks/calculator/rxn_path39/rxn_path39.py b/lambench/tasks/calculator/rxn_path39/rxn_path39.py index 6f94174..bba8834 100644 --- a/lambench/tasks/calculator/rxn_path39/rxn_path39.py +++ b/lambench/tasks/calculator/rxn_path39/rxn_path39.py @@ -79,9 +79,7 @@ def run_inference(model: ASEModel, test_data: Path) -> dict[str, float]: try: pred_kcal = atoms.get_potential_energy() * EV_TO_KCAL except Exception as e: - logging.error( - f"Failed predicting frame {i} of {traj_path}: {e}" - ) + logging.error(f"Failed predicting frame {i} of {traj_path}: {e}") pred_kcal = np.nan pred_diffs.append(pred_kcal - ref_pred_kcal)