From db1f1f03e6992fcda50b37d99c89170fd93272c1 Mon Sep 17 00:00:00 2001 From: sfluegel Date: Thu, 7 May 2026 12:57:40 +0200 Subject: [PATCH 1/2] if sanitation fails, raise a warning and return none --- chebi_utils/sdf_extractor.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/chebi_utils/sdf_extractor.py b/chebi_utils/sdf_extractor.py index 187e1a9..527abaa 100644 --- a/chebi_utils/sdf_extractor.py +++ b/chebi_utils/sdf_extractor.py @@ -3,6 +3,7 @@ from __future__ import annotations import gzip +from typing import Optional import warnings from pathlib import Path @@ -12,21 +13,16 @@ from chebi_utils.obo_extractor import _chebi_id_to_str -def _sanitize_molecule(mol: Chem.Mol) -> Chem.Mol: - """Sanitize molecule, mirroring the ChEBI molecule processing.""" +def _sanitize_molecule(mol: Chem.Mol) -> Optional[Chem.Mol]: + """Sanitize molecule""" from chembl_structure_pipeline.standardizer import update_mol_valences mol = update_mol_valences(mol) - Chem.SanitizeMol( - mol, - sanitizeOps=Chem.SanitizeFlags.SANITIZE_FINDRADICALS - | Chem.SanitizeFlags.SANITIZE_KEKULIZE - | Chem.SanitizeFlags.SANITIZE_SETAROMATICITY - | Chem.SanitizeFlags.SANITIZE_SETCONJUGATION - | Chem.SanitizeFlags.SANITIZE_SETHYBRIDIZATION - | Chem.SanitizeFlags.SANITIZE_SYMMRINGS, - catchErrors=True, - ) + try: + Chem.SanitizeMol(mol) + except Exception as e: + warnings.warn(f"Failed to sanitize molecule: {e}", stacklevel=2) + mol = None return mol @@ -53,6 +49,9 @@ def _parse_molblock(molblock: str, chebi_id: str | None = None) -> Chem.Mol | No warnings.warn(f"Failed to parse molblock for {chebi_id}", stacklevel=2) return None mol = _sanitize_molecule(mol) + if mol is None: + warnings.warn(f"Failed to sanitize molblock for {chebi_id}", stacklevel=2) + return mol From f7d3921a83542be13878be37cb0d7c645ae5ea7d Mon Sep 17 00:00:00 2001 From: sfluegel Date: Thu, 7 May 2026 13:34:59 +0200 Subject: [PATCH 2/2] fix ruff --- chebi_utils/sdf_extractor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chebi_utils/sdf_extractor.py b/chebi_utils/sdf_extractor.py index 527abaa..5d961d7 100644 --- a/chebi_utils/sdf_extractor.py +++ b/chebi_utils/sdf_extractor.py @@ -3,9 +3,9 @@ from __future__ import annotations import gzip -from typing import Optional import warnings from pathlib import Path +from typing import Optional import pandas as pd from rdkit import Chem @@ -51,7 +51,7 @@ def _parse_molblock(molblock: str, chebi_id: str | None = None) -> Chem.Mol | No mol = _sanitize_molecule(mol) if mol is None: warnings.warn(f"Failed to sanitize molblock for {chebi_id}", stacklevel=2) - + return mol