From 7ca8c7513fbe5d32112de6dd01f5c8ec12d174b0 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 24 Feb 2026 12:08:18 +0000 Subject: [PATCH 1/3] Fix ring-break/change detection logic by using paths only. --- src/BioSimSpace/Align/_merge.py | 157 +++++------------- .../Sandpit/Exscientia/Align/_merge.py | 157 +++++------------- 2 files changed, 84 insertions(+), 230 deletions(-) diff --git a/src/BioSimSpace/Align/_merge.py b/src/BioSimSpace/Align/_merge.py index 835286fb..a556c82f 100644 --- a/src/BioSimSpace/Align/_merge.py +++ b/src/BioSimSpace/Align/_merge.py @@ -1217,11 +1217,15 @@ def merge( return mol -def _is_ring_broken(conn0, conn1, idx0, idy0, idx1, idy1): +def _is_ring_broken(conn0, conn1, idx0, idy0, idx1, idy1, max_path=50): """ Internal function to test whether a perturbation changes the connectivity around two atoms such that a ring is broken. + Two atoms share a ring if and only if there are at least two distinct paths + between them in the connectivity graph. A ring is broken when this changes + between end states. + Parameters ---------- @@ -1242,74 +1246,38 @@ def _is_ring_broken(conn0, conn1, idx0, idy0, idx1, idy1): idy1 : Sire.Mol.AtomIdx The index of the second atom in the second state. - """ - # Have we opened/closed a ring? This means that both atoms are part of a - # ring in one end state (either in it, or on it), whereas at least one - # isn't in the other state. - - # Whether each atom is in a ring in both end states. - in_ring_idx0 = conn0.in_ring(idx0) - in_ring_idy0 = conn0.in_ring(idy0) - in_ring_idx1 = conn1.in_ring(idx1) - in_ring_idy1 = conn1.in_ring(idy1) + max_path : int + Maximum path length used when searching for rings. The default of 50 + covers typical macrocycles. Increase if larger rings need to be + detected. + """ - # Whether each atom is on a ring in both end states. - on_ring_idx0 = _is_on_ring(idx0, conn0) - on_ring_idy0 = _is_on_ring(idy0, conn0) - on_ring_idx1 = _is_on_ring(idx1, conn1) - on_ring_idy1 = _is_on_ring(idy1, conn1) + # Two atoms share a ring iff there are ≥2 distinct paths between them. + # This also handles atoms adjacent to a ring (not members themselves): + # substituents on neighbouring ring atoms have ≥2 paths between them + # through the ring, which collapses to 1 when the ring is broken. + n0 = len(conn0.find_paths(idx0, idy0, max_path)) + n1 = len(conn1.find_paths(idx1, idy1, max_path)) - # Both atoms are in a ring in one end state and at least one isn't in the other. - if (in_ring_idx0 & in_ring_idy0) ^ (in_ring_idx1 & in_ring_idy1): + # Ring break/open: one state has ≥2 paths, the other doesn't. + if (n0 >= 2) != (n1 >= 2): return True - # Both atoms are on a ring in one end state and at least one isn't in the other. - if (on_ring_idx0 & on_ring_idy0 & (conn0.connection_type(idx0, idy0) == 4)) ^ ( - on_ring_idx1 & on_ring_idy1 & (conn1.connection_type(idx1, idy1) == 4) + # Supplementary check for rings larger than max_path: find_paths may only + # find the direct-bond path and miss the long way around the ring, giving + # n=1 instead of n≥2. Sire's in_ring has no path-length limit and + # correctly identifies ring membership in macrocycles. + if (conn0.in_ring(idx0) and conn0.in_ring(idy0)) != ( + conn1.in_ring(idx1) and conn1.in_ring(idy1) ): - # Make sure that the change isn't a result of ring growth, i.e. one of - # the atoms isn't in a ring in one end state, while its "on" ring status - # has changed between states. - if not ( - (in_ring_idx0 | in_ring_idx1) & (on_ring_idx0 ^ on_ring_idx1) - or (in_ring_idy0 | in_ring_idy1) & (on_ring_idy0 ^ on_ring_idy1) - ): - return True - - # Both atoms are in or on a ring in one state and at least one isn't in the other. - if ( - (in_ring_idx0 | on_ring_idx0) - & (in_ring_idy0 | on_ring_idy0) - & (conn0.connection_type(idx0, idy0) == 3) - ) ^ ( - (in_ring_idx1 | on_ring_idx1) - & (in_ring_idy1 | on_ring_idy1) - & (conn1.connection_type(idx1, idy1) == 3) - ): - iscn0 = set(conn0.connections_to(idx0)).intersection( - set(conn0.connections_to(idy0)) - ) - if len(iscn0) != 1: - return True - common_idx = iscn0.pop() - in_ring_bond0 = conn0.in_ring(idx0, common_idx) | conn0.in_ring( - idy0, common_idx - ) - iscn1 = set(conn1.connections_to(idx1)).intersection( - set(conn1.connections_to(idy1)) - ) - if len(iscn1) != 1: - return True - common_idx = iscn1.pop() - in_ring_bond1 = conn1.in_ring(idx1, common_idx) | conn1.in_ring( - idy1, common_idx - ) - if in_ring_bond0 ^ in_ring_bond1: - return True + return True - # If we get this far, then a ring wasn't broken. - return False + # A direct bond was replaced by a ring path (or vice versa), leaving the + # atoms completely disconnected in one topology (0 paths). This occurs + # when a new ring forms and the old direct bond between two atoms is not + # present in the ring topology. + return (n0 == 0) != (n1 == 0) def _is_ring_size_changed(conn0, conn1, idx0, idy0, idx1, idy1, max_ring_size=12): @@ -1317,6 +1285,10 @@ def _is_ring_size_changed(conn0, conn1, idx0, idy0, idx1, idy1, max_ring_size=12 Internal function to test whether a perturbation changes the connectivity around two atoms such that a ring changes size. + The size of the smallest ring containing two atoms equals the length of + the shortest path between them. If the shortest path changes between end + states, the ring has changed size. + Parameters ---------- @@ -1342,69 +1314,24 @@ def _is_ring_size_changed(conn0, conn1, idx0, idy0, idx1, idy1, max_ring_size=12 The maximum size of what is considered to be a ring. """ - # Have a ring changed size? If so, then the minimum path size between - # two atoms will have changed. - # Work out the paths connecting the atoms in the two end states. paths0 = conn0.find_paths(idx0, idy0, max_ring_size) paths1 = conn1.find_paths(idx1, idy1, max_ring_size) - # Initialise the ring size in each end state. - ring0 = None - ring1 = None - - # Determine the minimum path in the lambda = 0 state. - if len(paths0) > 1: - path_lengths0 = [] - for path in paths0: - path_lengths0.append(len(path)) - ring0 = min(path_lengths0) - - if ring0 is None: + # No ring in state 0 — nothing to compare. + if len(paths0) < 2: return False - # Determine the minimum path in the lambda = 1 state. - if len(paths1) > 1: - path_lengths1 = [] - for path in paths1: - path_lengths1.append(len(path)) - ring1 = min(path_lengths1) + ring0 = min(len(p) for p in paths0) - # Return whether the ring has changed size. - if ring1: - return ring0 != ring1 - else: + # No ring in state 1 — the ring was broken rather than resized; let + # _is_ring_broken handle this case. + if len(paths1) < 2: return False + ring1 = min(len(p) for p in paths1) -def _is_on_ring(idx, conn): - """ - Internal function to test whether an atom is adjacent to a ring. - - Parameters - ---------- - - idx : Sire.Mol.AtomIdx - The index of the atom - - conn : Sire.Mol.Connectivity - The connectivity object. - - Returns - ------- - - is_on_ring : bool - Whether the atom is adjacent to a ring. - """ - - # Loop over all atoms connected to this atom. - for x in conn.connections_to(idx): - # The neighbour is in a ring. - if conn.in_ring(x) and (not conn.in_ring(x, idx)): - return True - - # If we get this far, then the atom is not adjacent to a ring. - return False + return ring0 != ring1 def _removeDummies(molecule, is_lambda1): diff --git a/src/BioSimSpace/Sandpit/Exscientia/Align/_merge.py b/src/BioSimSpace/Sandpit/Exscientia/Align/_merge.py index acded140..949f0c3c 100644 --- a/src/BioSimSpace/Sandpit/Exscientia/Align/_merge.py +++ b/src/BioSimSpace/Sandpit/Exscientia/Align/_merge.py @@ -1389,11 +1389,15 @@ def merge( return mol -def _is_ring_broken(conn0, conn1, idx0, idy0, idx1, idy1): +def _is_ring_broken(conn0, conn1, idx0, idy0, idx1, idy1, max_path=50): """ Internal function to test whether a perturbation changes the connectivity around two atoms such that a ring is broken. + Two atoms share a ring if and only if there are at least two distinct paths + between them in the connectivity graph. A ring is broken when this changes + between end states. + Parameters ---------- @@ -1414,74 +1418,38 @@ def _is_ring_broken(conn0, conn1, idx0, idy0, idx1, idy1): idy1 : Sire.Mol.AtomIdx The index of the second atom in the second state. - """ - # Have we opened/closed a ring? This means that both atoms are part of a - # ring in one end state (either in it, or on it), whereas at least one - # isn't in the other state. - - # Whether each atom is in a ring in both end states. - in_ring_idx0 = conn0.in_ring(idx0) - in_ring_idy0 = conn0.in_ring(idy0) - in_ring_idx1 = conn1.in_ring(idx1) - in_ring_idy1 = conn1.in_ring(idy1) + max_path : int + Maximum path length used when searching for rings. The default of 50 + covers typical macrocycles. Increase if larger rings need to be + detected. + """ - # Whether each atom is on a ring in both end states. - on_ring_idx0 = _is_on_ring(idx0, conn0) - on_ring_idy0 = _is_on_ring(idy0, conn0) - on_ring_idx1 = _is_on_ring(idx1, conn1) - on_ring_idy1 = _is_on_ring(idy1, conn1) + # Two atoms share a ring iff there are ≥2 distinct paths between them. + # This also handles atoms adjacent to a ring (not members themselves): + # substituents on neighbouring ring atoms have ≥2 paths between them + # through the ring, which collapses to 1 when the ring is broken. + n0 = len(conn0.find_paths(idx0, idy0, max_path)) + n1 = len(conn1.find_paths(idx1, idy1, max_path)) - # Both atoms are in a ring in one end state and at least one isn't in the other. - if (in_ring_idx0 & in_ring_idy0) ^ (in_ring_idx1 & in_ring_idy1): + # Ring break/open: one state has ≥2 paths, the other doesn't. + if (n0 >= 2) != (n1 >= 2): return True - # Both atoms are on a ring in one end state and at least one isn't in the other. - if (on_ring_idx0 & on_ring_idy0 & (conn0.connection_type(idx0, idy0) == 4)) ^ ( - on_ring_idx1 & on_ring_idy1 & (conn1.connection_type(idx1, idy1) == 4) + # Supplementary check for rings larger than max_path: find_paths may only + # find the direct-bond path and miss the long way around the ring, giving + # n=1 instead of n≥2. Sire's in_ring has no path-length limit and + # correctly identifies ring membership in macrocycles. + if (conn0.in_ring(idx0) and conn0.in_ring(idy0)) != ( + conn1.in_ring(idx1) and conn1.in_ring(idy1) ): - # Make sure that the change isn't a result of ring growth, i.e. one of - # the atoms isn't in a ring in one end state, while its "on" ring status - # has changed between states. - if not ( - (in_ring_idx0 | in_ring_idx1) & (on_ring_idx0 ^ on_ring_idx1) - or (in_ring_idy0 | in_ring_idy1) & (on_ring_idy0 ^ on_ring_idy1) - ): - return True - - # Both atoms are in or on a ring in one state and at least one isn't in the other. - if ( - (in_ring_idx0 | on_ring_idx0) - & (in_ring_idy0 | on_ring_idy0) - & (conn0.connection_type(idx0, idy0) == 3) - ) ^ ( - (in_ring_idx1 | on_ring_idx1) - & (in_ring_idy1 | on_ring_idy1) - & (conn1.connection_type(idx1, idy1) == 3) - ): - iscn0 = set(conn0.connections_to(idx0)).intersection( - set(conn0.connections_to(idy0)) - ) - if len(iscn0) != 1: - return True - common_idx = iscn0.pop() - in_ring_bond0 = conn0.in_ring(idx0, common_idx) | conn0.in_ring( - idy0, common_idx - ) - iscn1 = set(conn1.connections_to(idx1)).intersection( - set(conn1.connections_to(idy1)) - ) - if len(iscn1) != 1: - return True - common_idx = iscn1.pop() - in_ring_bond1 = conn1.in_ring(idx1, common_idx) | conn1.in_ring( - idy1, common_idx - ) - if in_ring_bond0 ^ in_ring_bond1: - return True + return True - # If we get this far, then a ring wasn't broken. - return False + # A direct bond was replaced by a ring path (or vice versa), leaving the + # atoms completely disconnected in one topology (0 paths). This occurs + # when a new ring forms and the old direct bond between two atoms is not + # present in the ring topology. + return (n0 == 0) != (n1 == 0) def _is_ring_size_changed(conn0, conn1, idx0, idy0, idx1, idy1, max_ring_size=12): @@ -1489,6 +1457,10 @@ def _is_ring_size_changed(conn0, conn1, idx0, idy0, idx1, idy1, max_ring_size=12 Internal function to test whether a perturbation changes the connectivity around two atoms such that a ring changes size. + The size of the smallest ring containing two atoms equals the length of + the shortest path between them. If the shortest path changes between end + states, the ring has changed size. + Parameters ---------- @@ -1514,69 +1486,24 @@ def _is_ring_size_changed(conn0, conn1, idx0, idy0, idx1, idy1, max_ring_size=12 The maximum size of what is considered to be a ring. """ - # Have a ring changed size? If so, then the minimum path size between - # two atoms will have changed. - # Work out the paths connecting the atoms in the two end states. paths0 = conn0.find_paths(idx0, idy0, max_ring_size) paths1 = conn1.find_paths(idx1, idy1, max_ring_size) - # Initialise the ring size in each end state. - ring0 = None - ring1 = None - - # Determine the minimum path in the lambda = 0 state. - if len(paths0) > 1: - path_lengths0 = [] - for path in paths0: - path_lengths0.append(len(path)) - ring0 = min(path_lengths0) - - if ring0 is None: + # No ring in state 0 — nothing to compare. + if len(paths0) < 2: return False - # Determine the minimum path in the lambda = 1 state. - if len(paths1) > 1: - path_lengths1 = [] - for path in paths1: - path_lengths1.append(len(path)) - ring1 = min(path_lengths1) + ring0 = min(len(p) for p in paths0) - # Return whether the ring has changed size. - if ring1: - return ring0 != ring1 - else: + # No ring in state 1 — the ring was broken rather than resized; let + # _is_ring_broken handle this case. + if len(paths1) < 2: return False + ring1 = min(len(p) for p in paths1) -def _is_on_ring(idx, conn): - """ - Internal function to test whether an atom is adjacent to a ring. - - Parameters - ---------- - - idx : Sire.Mol.AtomIdx - The index of the atom - - conn : Sire.Mol.Connectivity - The connectivity object. - - Returns - ------- - - is_on_ring : bool - Whether the atom is adjacent to a ring. - """ - - # Loop over all atoms connected to this atom. - for x in conn.connections_to(idx): - # The neighbour is in a ring. - if conn.in_ring(x) and (not conn.in_ring(x, idx)): - return True - - # If we get this far, then the atom is not adjacent to a ring. - return False + return ring0 != ring1 def _removeDummies(molecule, is_lambda1): From 0d46769a91f12de9b3d62094753f7c85dacb021a Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 24 Feb 2026 13:05:07 +0000 Subject: [PATCH 2/3] Add unit tests for ring break and size change detection. --- tests/Align/test_align.py | 136 ++++++++++++++++++- tests/Sandpit/Exscientia/Align/test_align.py | 134 ++++++++++++++++++ 2 files changed, 269 insertions(+), 1 deletion(-) diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index b154b5d6..e7b0c490 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -5,7 +5,7 @@ from sire.legacy.Mol import AtomIdx, Element, PartialMolecule import BioSimSpace as BSS -from tests.conftest import has_amber +from tests.conftest import has_amber, has_openff # Store the tutorial URL. url = BSS.tutorialUrl() @@ -695,3 +695,137 @@ def test_ion_merge(system): water_coords = water._sire_object.property("coordinates").to_vector()[0] assert coords0 == coords1 assert coords0 == water_coords + + +@pytest.mark.skipif(has_openff is False, reason="Requires OpenFF to be installed.") +@pytest.mark.parametrize( + "ligands, mapping", + [ + ( + ("Schindler_SYK_CHEMBL3265035", "Schindler_SYK_CHEMBL3265033"), + { + 5: 8, + 6: 32, + 7: 31, + 8: 11, + 9: 12, + 10: 13, + 11: 14, + 12: 15, + 13: 16, + 14: 17, + 15: 18, + 16: 19, + 17: 20, + 18: 21, + 19: 22, + 20: 23, + 21: 24, + 22: 25, + 23: 26, + 24: 27, + 25: 28, + 26: 29, + 27: 30, + 28: 10, + 29: 9, + 38: 53, + 39: 52, + 40: 43, + 41: 44, + 42: 45, + 43: 46, + 44: 47, + 45: 48, + 46: 49, + 47: 50, + 48: 51, + 49: 42, + 50: 41, + 1: 1, + 2: 4, + 34: 36, + 33: 35, + 3: 3, + 4: 34, + 35: 1, + 0: 6, + 32: 37, + 31: 7, + 36: 33, + }, + ), + ( + ("Schindler_SYK_CHEMBL3265035", "Schindler_SYK_CHEMBL3265036"), + { + 5: 7, + 6: 31, + 7: 30, + 8: 10, + 9: 11, + 10: 12, + 11: 13, + 12: 14, + 13: 15, + 14: 16, + 15: 17, + 16: 18, + 17: 19, + 18: 20, + 19: 21, + 20: 22, + 21: 23, + 22: 24, + 23: 25, + 24: 26, + 25: 27, + 26: 28, + 27: 29, + 28: 9, + 29: 8, + 38: 54, + 39: 53, + 40: 44, + 41: 45, + 42: 46, + 43: 47, + 44: 48, + 45: 49, + 46: 50, + 47: 51, + 48: 52, + 49: 43, + 50: 42, + 0: 4, + 1: 5, + 2: 6, + 3: 0, + 4: 1, + 30: 3, + 31: 39, + 32: 38, + 33: 40, + 34: 41, + 35: 32, + 36: 33, + 37: 34, + }, + ), + ], +) +def test_ring_opening_and_size_change(ligands, mapping): + # These perturbations involve ring formation (acyclic atoms in mol0 become + # ring members in mol1) combined with ring size changes in the existing + # fused bicyclic core. Check that the merge succeeds when both flags are + # allowed. + m0 = BSS.IO.readMolecules(f"{url}/{ligands[0]}.sdf.bz2")[0] + m1 = BSS.IO.readMolecules(f"{url}/{ligands[1]}.sdf.bz2")[0] + + m0 = BSS.Parameters.openff_unconstrained_2_1_1(m0).getMolecule() + m1 = BSS.Parameters.openff_unconstrained_2_1_1(m1).getMolecule() + + m0 = BSS.Align.rmsdAlign(m0, m1, mapping) + + BSS.Align.merge( + m0, m1, mapping, allow_ring_breaking=True, allow_ring_size_change=True + ) diff --git a/tests/Sandpit/Exscientia/Align/test_align.py b/tests/Sandpit/Exscientia/Align/test_align.py index 0fe2547d..b97f99d2 100644 --- a/tests/Sandpit/Exscientia/Align/test_align.py +++ b/tests/Sandpit/Exscientia/Align/test_align.py @@ -750,3 +750,137 @@ def test_ion_merge(): water_coords = water._sire_object.property("coordinates").to_vector()[0] assert coords0 == coords1 assert coords0 == water_coords + + +@pytest.mark.skipif(has_openff is False, reason="Requires OpenFF to be installed.") +@pytest.mark.parametrize( + "ligands, mapping", + [ + ( + ("Schindler_SYK_CHEMBL3265035", "Schindler_SYK_CHEMBL3265033"), + { + 5: 8, + 6: 32, + 7: 31, + 8: 11, + 9: 12, + 10: 13, + 11: 14, + 12: 15, + 13: 16, + 14: 17, + 15: 18, + 16: 19, + 17: 20, + 18: 21, + 19: 22, + 20: 23, + 21: 24, + 22: 25, + 23: 26, + 24: 27, + 25: 28, + 26: 29, + 27: 30, + 28: 10, + 29: 9, + 38: 53, + 39: 52, + 40: 43, + 41: 44, + 42: 45, + 43: 46, + 44: 47, + 45: 48, + 46: 49, + 47: 50, + 48: 51, + 49: 42, + 50: 41, + 1: 1, + 2: 4, + 34: 36, + 33: 35, + 3: 3, + 4: 34, + 35: 1, + 0: 6, + 32: 37, + 31: 7, + 36: 33, + }, + ), + ( + ("Schindler_SYK_CHEMBL3265035", "Schindler_SYK_CHEMBL3265036"), + { + 5: 7, + 6: 31, + 7: 30, + 8: 10, + 9: 11, + 10: 12, + 11: 13, + 12: 14, + 13: 15, + 14: 16, + 15: 17, + 16: 18, + 17: 19, + 18: 20, + 19: 21, + 20: 22, + 21: 23, + 22: 24, + 23: 25, + 24: 26, + 25: 27, + 26: 28, + 27: 29, + 28: 9, + 29: 8, + 38: 54, + 39: 53, + 40: 44, + 41: 45, + 42: 46, + 43: 47, + 44: 48, + 45: 49, + 46: 50, + 47: 51, + 48: 52, + 49: 43, + 50: 42, + 0: 4, + 1: 5, + 2: 6, + 3: 0, + 4: 1, + 30: 3, + 31: 39, + 32: 38, + 33: 40, + 34: 41, + 35: 32, + 36: 33, + 37: 34, + }, + ), + ], +) +def test_ring_opening_and_size_change(ligands, mapping): + # These perturbations involve ring formation (acyclic atoms in mol0 become + # ring members in mol1) combined with ring size changes in the existing + # fused bicyclic core. Check that the merge succeeds when both flags are + # allowed. + m0 = BSS.IO.readMolecules(f"{url}/{ligands[0]}.sdf.bz2")[0] + m1 = BSS.IO.readMolecules(f"{url}/{ligands[1]}.sdf.bz2")[0] + + m0 = BSS.Parameters.openff_unconstrained_2_1_1(m0).getMolecule() + m1 = BSS.Parameters.openff_unconstrained_2_1_1(m1).getMolecule() + + m0 = BSS.Align.rmsdAlign(m0, m1, mapping) + + BSS.Align.merge( + m0, m1, mapping, allow_ring_breaking=True, allow_ring_size_change=True + ) From 7ea22b1a5beaa8db65a80e3b1f0d697e58d09452 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 25 Feb 2026 09:05:42 +0000 Subject: [PATCH 3/3] Add missing test guards. --- tests/Align/test_align.py | 1 + tests/Sandpit/Exscientia/Align/test_align.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index e7b0c490..0e24d972 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -697,6 +697,7 @@ def test_ion_merge(system): assert coords0 == water_coords +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") @pytest.mark.skipif(has_openff is False, reason="Requires OpenFF to be installed.") @pytest.mark.parametrize( "ligands, mapping", diff --git a/tests/Sandpit/Exscientia/Align/test_align.py b/tests/Sandpit/Exscientia/Align/test_align.py index b97f99d2..6b2da75d 100644 --- a/tests/Sandpit/Exscientia/Align/test_align.py +++ b/tests/Sandpit/Exscientia/Align/test_align.py @@ -752,6 +752,9 @@ def test_ion_merge(): assert coords0 == water_coords +@pytest.mark.skipif( + has_antechamber is False, reason="Requires AnteChamber to be installed." +) @pytest.mark.skipif(has_openff is False, reason="Requires OpenFF to be installed.") @pytest.mark.parametrize( "ligands, mapping",