From 6ae1518bce2b7617a7438c76798a251d45ef94a6 Mon Sep 17 00:00:00 2001 From: sfluegel Date: Mon, 23 Feb 2026 17:06:16 +0100 Subject: [PATCH 1/3] add chebiFromList dataset --- chebai/preprocessing/datasets/chebi.py | 40 ++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py index e82a83a8..2a84b9b3 100644 --- a/chebai/preprocessing/datasets/chebi.py +++ b/chebai/preprocessing/datasets/chebi.py @@ -17,6 +17,7 @@ from itertools import cycle, permutations, product from typing import TYPE_CHECKING, Any, Dict, Generator, List, Literal, Optional, Union +from networkx import DiGraph import numpy as np import pandas as pd import torch @@ -706,6 +707,45 @@ def select_classes(self, g, *args, **kwargs): return JCI_500_COLUMNS_INT +class ChEBIFromList(_ChEBIDataExtractor): + """ + A ChEBI dataset where labels are selected from a predefined list of classes. + + """ + + READER = dr.ChemDataReader + + def __init__( + self, + class_list, + **kwargs, + ): + """ + Initializes the ChEBIFromList dataset. + + Args: + class_list: Path to a list of class IDs to be used as labels in the dataset. + **kwargs: Additional keyword arguments passed to the superclass initializer + """ + with open(class_list, "r") as f: + class_list = [line.strip() for line in f if line.strip()] + self.class_list = class_list + super().__init__(**kwargs) + + @property + def _name(self) -> str: + """ + Returns the name of the dataset. + + Returns: + str: The dataset name. + """ + return "ChEBI_from_list" + + def select_classes(self, g: DiGraph, *args, **kwargs) -> List: + return self.class_list + + class ChEBIOverX(_ChEBIDataExtractor): """ A class for extracting data from the ChEBI dataset with a threshold for selecting classes. From eeccaa7dc1b500a35a3bc1e4f0890748a97dc1bf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 5 May 2026 13:01:52 +0000 Subject: [PATCH 2/3] fix: add List to typing imports in chebi.py to resolve NameError Agent-Logs-Url: https://github.com/ChEB-AI/python-chebai/sessions/01b8ab79-aafe-46c9-9a4c-27e6ae576e91 Co-authored-by: sfluegel05 <43573433+sfluegel05@users.noreply.github.com> --- chebai/preprocessing/datasets/chebi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py index 6e452d6b..98f2d563 100644 --- a/chebai/preprocessing/datasets/chebi.py +++ b/chebai/preprocessing/datasets/chebi.py @@ -7,7 +7,7 @@ import random from abc import ABC from itertools import cycle, permutations, product -from typing import TYPE_CHECKING, Any, Generator, Literal, Optional +from typing import TYPE_CHECKING, Any, Generator, List, Literal, Optional from networkx import DiGraph import numpy as np From bf51645c962d28aa0d73056e85aab0c83b8dfba8 Mon Sep 17 00:00:00 2001 From: sfluegel Date: Tue, 5 May 2026 15:15:00 +0200 Subject: [PATCH 3/3] fix formatting --- chebai/preprocessing/datasets/chebi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py index 98f2d563..698835fd 100644 --- a/chebai/preprocessing/datasets/chebi.py +++ b/chebai/preprocessing/datasets/chebi.py @@ -557,8 +557,8 @@ def _name(self) -> str: def select_classes(self, g: DiGraph, *args, **kwargs) -> List: return self.class_list - - + + class ChEBIOverX(_ChEBIDataExtractor): """ A class for extracting data from the ChEBI dataset with a threshold for selecting classes.