add dummy labels to explain smiles

aditya0by0 · aditya0by0 · commit 544aed8fd54a · 2026-02-02T16:57:16.000+01:00
diff --git a/chebifier/prediction_models/electra_predictor.py b/chebifier/prediction_models/electra_predictor.py
@@ -43,16 +43,23 @@ def __init__(self, model_name: str, ckpt_path: str, **kwargs):
     def explain_smiles(self, smiles) -> dict:
         from chebai.preprocessing.reader import EMBEDDING_OFFSET
 
+        # Add dummy labels because the collate function requires them.
+        # Note: If labels are set to `None`, the collator will insert a `non_null_labels` entry into `loss_kwargs`,
+        # which later causes `_get_prediction_and_labels` method in the prediction pipeline to treat the data as empty.
+        # Note: With New changes from https://github.com/ChEB-AI/python-chebai/pull/130, when labels are None, it also
+        # causes problems with `missing_labels` handling. Hence using dummy labels.
+        dummy_labels: list = list(range(1, self.predictor._model.out_dim + 1))
+
         token_dict = self.predictor._dm.reader.to_data(
-            dict(features=smiles, labels=None)
+            dict(features=smiles, labels=dummy_labels)
         )
         tokens = np.array(token_dict["features"]).astype(int).tolist()
         result = self.calculate_results([token_dict])
 
         token_labels = (
             ["[CLR]"]
             + [None for _ in range(EMBEDDING_OFFSET - 1)]
-            + list(self._predictor._dm.reader.cache.keys())
+            + list(self.predictor._dm.reader.cache.keys())
         )
 
         graphs = [