From cec2eb8e377834be3aa5447f37c1364bbd02776a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 17 Feb 2026 20:39:28 +0000 Subject: [PATCH 1/2] Initial plan From ccd7da6b3f53252103b6cf7cb189c84c5ac0c349 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 17 Feb 2026 20:45:32 +0000 Subject: [PATCH 2/2] Move jsonargparse to main dependencies and implement lazy imports for sklearn/scipy Co-authored-by: aditya0by0 <65857172+aditya0by0@users.noreply.github.com> --- chebai/preprocessing/datasets/pubchem.py | 13 ++++++++++--- pyproject.toml | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/chebai/preprocessing/datasets/pubchem.py b/chebai/preprocessing/datasets/pubchem.py index 8cc208b9..db6e1459 100644 --- a/chebai/preprocessing/datasets/pubchem.py +++ b/chebai/preprocessing/datasets/pubchem.py @@ -23,9 +23,6 @@ import tqdm from rdkit import Chem, DataStructs from rdkit.Chem import AllChem -from scipy import spatial -from sklearn.cluster import KMeans -from sklearn.model_selection import train_test_split from chebai.preprocessing import reader as dr from chebai.preprocessing.datasets.base import DataLoader, XYBaseDataModule @@ -150,6 +147,8 @@ def setup_processed(self): """ Prepares processed data and saves them as Torch tensors. """ + from sklearn.model_selection import train_test_split + filename = os.path.join(self.raw_dir, self.raw_file_names[0]) print("Load data from file", filename) data = self._load_data_from_file(filename) @@ -294,6 +293,8 @@ def setup_processed(self): """ Prepares processed data and saves them as Torch tensors. """ + from sklearn.model_selection import train_test_split + filename = os.path.join(self.raw_dir, self.raw_file_names[0]) print("Load data from file", filename) data_not_tokenized = [entry for entry in self._load_dict(filename)] @@ -557,6 +558,8 @@ def _build_clusters(self) -> tuple[pd.DataFrame, pd.DataFrame]: Returns: tuple: Tuple containing cluster centers DataFrame and clustered fingerprints DataFrame. """ + from sklearn.cluster import KMeans + fingerprints_clustered_path = os.path.join( self.raw_dir, "fingerprints_clustered.pkl" ) @@ -603,6 +606,8 @@ def _exclude_clusters(self, cluster_centers: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: DataFrame of filtered cluster centers. """ + from scipy import spatial + exclusion_data_path = os.path.join(self.raw_dir, "exclusion_data_clustered.pkl") cluster_centers_np = np.array( [ @@ -701,6 +706,8 @@ def cluster_centers_superclustered(self) -> pd.DataFrame: Returns: pd.DataFrame: DataFrame of superclustered cluster centers. """ + from sklearn.cluster import KMeans + cluster_centers_path = os.path.join( self.raw_dir, "cluster_centers_superclustered.pkl" ) diff --git a/pyproject.toml b/pyproject.toml index b3652b00..a4953bd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "pysmiles==1.1.2", "rdkit==2024.3.6", "lightning==2.5.1", + "jsonargparse[signatures]>=4.17", ] [project.optional-dependencies] @@ -36,7 +37,6 @@ dev = [ "scipy", "fastobo", "selfies", - "jsonargparse[signatures]>=4.17", "omegaconf", "deepsmiles", "iterative-stratification",