ChEB-AI · Copilot · Feb 17, 2026 · Feb 17, 2026
diff --git a/chebai/preprocessing/datasets/pubchem.py b/chebai/preprocessing/datasets/pubchem.py
@@ -23,9 +23,6 @@
 import tqdm
 from rdkit import Chem, DataStructs
 from rdkit.Chem import AllChem
-from scipy import spatial
-from sklearn.cluster import KMeans
-from sklearn.model_selection import train_test_split
 
 from chebai.preprocessing import reader as dr
 from chebai.preprocessing.datasets.base import DataLoader, XYBaseDataModule
@@ -150,6 +147,8 @@ def setup_processed(self):
         """
         Prepares processed data and saves them as Torch tensors.
         """
+        from sklearn.model_selection import train_test_split
+
         filename = os.path.join(self.raw_dir, self.raw_file_names[0])
         print("Load data from file", filename)
         data = self._load_data_from_file(filename)
@@ -294,6 +293,8 @@ def setup_processed(self):
         """
         Prepares processed data and saves them as Torch tensors.
         """
+        from sklearn.model_selection import train_test_split
+
         filename = os.path.join(self.raw_dir, self.raw_file_names[0])
         print("Load data from file", filename)
         data_not_tokenized = [entry for entry in self._load_dict(filename)]
@@ -557,6 +558,8 @@ def _build_clusters(self) -> tuple[pd.DataFrame, pd.DataFrame]:
         Returns:
             tuple: Tuple containing cluster centers DataFrame and clustered fingerprints DataFrame.
         """
+        from sklearn.cluster import KMeans
+
         fingerprints_clustered_path = os.path.join(
             self.raw_dir, "fingerprints_clustered.pkl"
         )
@@ -603,6 +606,8 @@ def _exclude_clusters(self, cluster_centers: pd.DataFrame) -> pd.DataFrame:
         Returns:
             pd.DataFrame: DataFrame of filtered cluster centers.
         """
+        from scipy import spatial
+
         exclusion_data_path = os.path.join(self.raw_dir, "exclusion_data_clustered.pkl")
         cluster_centers_np = np.array(
             [
@@ -701,6 +706,8 @@ def cluster_centers_superclustered(self) -> pd.DataFrame:
         Returns:
             pd.DataFrame: DataFrame of superclustered cluster centers.
         """
+        from sklearn.cluster import KMeans
+
         cluster_centers_path = os.path.join(
             self.raw_dir, "cluster_centers_superclustered.pkl"
         )

diff --git a/pyproject.toml b/pyproject.toml
@@ -23,6 +23,7 @@ dependencies = [
     "pysmiles==1.1.2",
     "rdkit==2024.3.6",
     "lightning==2.5.1",
+    "jsonargparse[signatures]>=4.17",
 ]
 
 [project.optional-dependencies]
@@ -36,7 +37,6 @@ dev = [
     "scipy",
     "fastobo",
     "selfies",
-    "jsonargparse[signatures]>=4.17",
     "omegaconf",
     "deepsmiles",
     "iterative-stratification",