From 3ba12dd59e86f6d3f8784fe81280bd40a5b34648 Mon Sep 17 00:00:00 2001 From: sfluegel Date: Thu, 7 May 2026 13:18:07 +0200 Subject: [PATCH 1/2] add top class option for chebi graph, molecular entity is the default --- chebi_utils/obo_extractor.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/chebi_utils/obo_extractor.py b/chebi_utils/obo_extractor.py index 1fc427f..faec97e 100644 --- a/chebi_utils/obo_extractor.py +++ b/chebi_utils/obo_extractor.py @@ -63,7 +63,7 @@ def _term_data(doc: "fastobo.term.TermFrame") -> dict | None: } -def build_chebi_graph(filepath: str | Path) -> nx.DiGraph: +def build_chebi_graph(filepath: str | Path, top_class: str | None = "23367") -> nx.DiGraph: """Parse a ChEBI OBO file and build a directed graph of ontology terms. ``xref:`` lines are stripped before parsing as they can cause fastobo @@ -82,6 +82,9 @@ def build_chebi_graph(filepath: str | Path) -> nx.DiGraph: ---------- filepath : str or Path Path to the ChEBI OBO file. + top_class : str or None + CHEBI ID of the top-class (default "23367" for "molecular entity"). This will only return direct or indirect subclasses of the top-class (excluding the top-class). + If None, the full graph is returned without subgraph extraction. Returns ------- @@ -113,7 +116,13 @@ def build_chebi_graph(filepath: str | Path) -> nx.DiGraph: for part_id in parts: graph.add_edge(node_id, part_id, relation=relation) - return graph + if top_class is None: + return graph + + molecular_entity_subgraph = graph.subgraph(nx.ancestors(get_hierarchy_subgraph(graph), top_class)) + assert isinstance(molecular_entity_subgraph, nx.DiGraph) + + return molecular_entity_subgraph def get_hierarchy_subgraph(chebi_graph: nx.DiGraph) -> nx.DiGraph: @@ -122,3 +131,13 @@ def get_hierarchy_subgraph(chebi_graph: nx.DiGraph) -> nx.DiGraph: return chebi_graph.edge_subgraph( (u, v) for u, v, d in chebi_graph.edges(data=True) if d.get("relation") == "is_a" ) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Build a ChEBI graph from an OBO file.") + parser.add_argument("obo_file", type=Path, help="Path to the ChEBI OBO file.") + args = parser.parse_args() + + graph = build_chebi_graph(args.obo_file) + print(f"Final graph: {len(graph.nodes)} nodes, {len(graph.edges)} edges") \ No newline at end of file From a4642078cd3c1f39b2be0cb54b02e54395fbf988 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 7 May 2026 11:29:52 +0000 Subject: [PATCH 2/2] fix graph build when top class missing in parsed ontology Agent-Logs-Url: https://github.com/ChEB-AI/python-chebi-utils/sessions/5b167856-711a-4140-86d3-17827d69a2d7 Co-authored-by: sfluegel05 <43573433+sfluegel05@users.noreply.github.com> --- chebi_utils/obo_extractor.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/chebi_utils/obo_extractor.py b/chebi_utils/obo_extractor.py index faec97e..fd427ca 100644 --- a/chebi_utils/obo_extractor.py +++ b/chebi_utils/obo_extractor.py @@ -83,7 +83,10 @@ def build_chebi_graph(filepath: str | Path, top_class: str | None = "23367") -> filepath : str or Path Path to the ChEBI OBO file. top_class : str or None - CHEBI ID of the top-class (default "23367" for "molecular entity"). This will only return direct or indirect subclasses of the top-class (excluding the top-class). + CHEBI ID of the top-class (default "23367" for "molecular entity"). + This will only return direct or indirect subclasses of the + top-class (excluding the top-class). If ``top_class`` is not + present in the parsed graph, the full graph is returned. If None, the full graph is returned without subgraph extraction. Returns @@ -119,10 +122,13 @@ def build_chebi_graph(filepath: str | Path, top_class: str | None = "23367") -> if top_class is None: return graph - molecular_entity_subgraph = graph.subgraph(nx.ancestors(get_hierarchy_subgraph(graph), top_class)) - assert isinstance(molecular_entity_subgraph, nx.DiGraph) - - return molecular_entity_subgraph + hierarchy = get_hierarchy_subgraph(graph) + if top_class not in hierarchy: + return graph + + chebi_subgraph = graph.subgraph(nx.ancestors(hierarchy, top_class)) + assert isinstance(chebi_subgraph, nx.DiGraph) + return chebi_subgraph def get_hierarchy_subgraph(chebi_graph: nx.DiGraph) -> nx.DiGraph: @@ -132,6 +138,7 @@ def get_hierarchy_subgraph(chebi_graph: nx.DiGraph) -> nx.DiGraph: (u, v) for u, v, d in chebi_graph.edges(data=True) if d.get("relation") == "is_a" ) + if __name__ == "__main__": import argparse @@ -140,4 +147,4 @@ def get_hierarchy_subgraph(chebi_graph: nx.DiGraph) -> nx.DiGraph: args = parser.parse_args() graph = build_chebi_graph(args.obo_file) - print(f"Final graph: {len(graph.nodes)} nodes, {len(graph.edges)} edges") \ No newline at end of file + print(f"Final graph: {len(graph.nodes)} nodes, {len(graph.edges)} edges")