Add_parameters (#141)

manujosephv · web-flow · commit d925a89c5b91 · 2023-01-18T09:05:56.000+05:30
* added early_stopping kwargs

* added data aware batch size

* added device to prediction

* added checkpoint kwargs

* pre-commit fix

* pushing a linting error correction
diff --git a/examples/__only_for_dev__/adhoc_scaffold.py b/examples/__only_for_dev__/adhoc_scaffold.py
@@ -85,7 +85,11 @@ def print_metrics(y_true, y_pred, tag):
 tabular_model.fit(train=train, validation=val)
 test.drop(columns=["target"], inplace=True)
 pred_df = tabular_model.predict(test)
+pred_df = tabular_model.predict(test, device="cpu")
+pred_df = tabular_model.predict(test, device="cuda")
+import torch  # noqa: E402
 
+pred_df = tabular_model.predict(test, device=torch.device("cuda"))
 # tabular_model.fit(train=train, validation=val)
 # tabular_model.fit(train=train, validation=val, max_epochs=5)
 # tabular_model.fit(train=train, validation=val, max_epochs=5, reset=True)
diff --git a/src/pytorch_tabular/config/config.py b/src/pytorch_tabular/config/config.py
@@ -237,6 +237,8 @@ class TrainerConfig:
     Args:
         batch_size (int): Number of samples in each batch of training
 
+        data_aware_init_batch_size (int): Number of samples in each batch of training for the data-aware initialization, when applicable. Defaults to 2000
+
         fast_dev_run (bool): runs n if set to ``n`` (int) else 1 if set to ``True`` batch(es) of train, val
                 and test to find any bugs (ie: a sort of unit test).
 
@@ -296,6 +298,9 @@ class TrainerConfig:
         early_stopping_patience (int): The number of epochs to wait until there is no further improvements
                 in loss/metric
 
+        early_stopping_kwargs (Optional[Dict]): Additional keyword arguments for the early stopping callback.
+                See the documentation for the PyTorch Lightning EarlyStopping callback for more details.
+
         checkpoints (Optional[str]): The loss/metric that needed to be monitored for checkpoints. If None,
                 there will be no checkpoints
 
@@ -311,6 +316,9 @@ class TrainerConfig:
 
         checkpoints_save_top_k (int): The number of best models to save
 
+        checkpoints_kwargs (Optional[Dict]): Additional keyword arguments for the checkpoints callback.
+                See the documentation for the PyTorch Lightning ModelCheckpoint callback for more details.
+
         load_best (bool): Flag to load the best model saved during training
 
         track_grad_norm (int): Track and Log Gradient Norms in the logger. -1 by default means no tracking.
@@ -328,6 +336,12 @@ class TrainerConfig:
     """
 
     batch_size: int = field(default=64, metadata={"help": "Number of samples in each batch of training"})
+    data_aware_init_batch_size: int = field(
+        default=2000,
+        metadata={
+            "help": "Number of samples in each batch of training for the data-aware initialization, when applicable. Defaults to 2000"
+        },
+    )
     fast_dev_run: bool = field(
         default=False,
         metadata={
@@ -429,6 +443,12 @@ class TrainerConfig:
         default=3,
         metadata={"help": "The number of epochs to wait until there is no further improvements in loss/metric"},
     )
+    early_stopping_kwargs: Optional[Dict[str, Any]] = field(
+        default_factory=lambda: dict(),
+        metadata={
+            "help": "Additional keyword arguments for the early stopping callback. See the documentation for the PyTorch Lightning EarlyStopping callback for more details."
+        },
+    )
     checkpoints: Optional[str] = field(
         default="valid_loss",
         metadata={
@@ -457,6 +477,12 @@ class TrainerConfig:
         default=1,
         metadata={"help": "The number of best models to save"},
     )
+    checkpoints_kwargs: Optional[Dict[str, Any]] = field(
+        default_factory=lambda: dict(),
+        metadata={
+            "help": "Additional keyword arguments for the checkpoints callback. See the documentation for the PyTorch Lightning ModelCheckpoint callback for more details."
+        },
+    )
     load_best: bool = field(
         default=True,
         metadata={"help": "Flag to load the best model saved during training"},
@@ -508,6 +534,16 @@ def __post_init__(self):
             warnings.warn("Ignoring devices in favor of devices_list")
             self.devices = self.devices_list
         delattr(self, "devices_list")
+        for key in self.early_stopping_kwargs.keys():
+            if key in ["min_delta", "mode", "patience"]:
+                raise ValueError(
+                    f"Cannot override {key} in early_stopping_kwargs. Please use the appropriate argument in `TrainerConfig`"
+                )
+        for key in self.checkpoints_kwargs.keys():
+            if key in ["dirpath", "filename", "monitor", "save_top_k", "mode", "every_n_epochs"]:
+                raise ValueError(
+                    f"Cannot override {key} in checkpoints_kwargs. Please use the appropriate argument in `TrainerConfig`"
+                )
 
 
 @dataclass
diff --git a/src/pytorch_tabular/models/gate/gate_model.py b/src/pytorch_tabular/models/gate/gate_model.py
@@ -220,6 +220,6 @@ def data_aware_initialization(self, datamodule):
         if self.hparams.task == "regression":
             logger.info("Data Aware Initialization of T0")
             # Need a big batch to initialize properly
-            alt_loader = datamodule.train_dataloader(batch_size=2000)
+            alt_loader = datamodule.train_dataloader(batch_size=self.hparams.data_aware_init_batch_size)
             batch = next(iter(alt_loader))
             self.head.T0.data = torch.mean(batch["target"], dim=0)
diff --git a/src/pytorch_tabular/models/node/node_model.py b/src/pytorch_tabular/models/node/node_model.py
@@ -81,7 +81,7 @@ def data_aware_initialization(self, datamodule):
         """Performs data-aware initialization for NODE"""
         logger.info("Data Aware Initialization of NODE using a forward pass with 2000 batch size....")
         # Need a big batch to initialize properly
-        alt_loader = datamodule.train_dataloader(batch_size=2000)
+        alt_loader = datamodule.train_dataloader(batch_size=self.hparams.data_aware_init_batch_size)
         batch = next(iter(alt_loader))
         for k, v in batch.items():
             if isinstance(v, list) and (len(v) == 0):
diff --git a/src/pytorch_tabular/tabular_model.py b/src/pytorch_tabular/tabular_model.py
@@ -225,8 +225,8 @@ def _prepare_callbacks(self, callbacks=None) -> List:
                 monitor=self.config.early_stopping,
                 min_delta=self.config.early_stopping_min_delta,
                 patience=self.config.early_stopping_patience,
-                verbose=False,
                 mode=self.config.early_stopping_mode,
+                **self.config.early_stopping_kwargs,
             )
             callbacks.append(early_stop_callback)
         if self.config.checkpoints:
@@ -239,6 +239,7 @@ def _prepare_callbacks(self, callbacks=None) -> List:
                 save_top_k=self.config.checkpoints_save_top_k,
                 mode=self.config.checkpoints_mode,
                 every_n_epochs=self.config.checkpoints_every_n_epochs,
+                **self.config.checkpoints_kwargs,
             )
             callbacks.append(model_checkpoint)
             self.config.enable_checkpointing = True
@@ -1061,6 +1062,7 @@ def predict(
         n_samples: Optional[int] = 100,
         ret_logits=False,
         include_input_features: bool = True,
+        device: Optional[torch.device] = None,
     ) -> pd.DataFrame:
         """Uses the trained model to predict on new data and return as a dataframe
 
@@ -1085,26 +1087,35 @@ def predict(
             DeprecationWarning,
         )
         assert all([q <= 1 and q >= 0 for q in quantiles]), "Quantiles should be a decimal between 0 and 1"
-        self.model.eval()
+        if device is not None:
+            if isinstance(device, str):
+                device = torch.device(device)
+            if self.model.device != device:
+                model = self.model.to(device)
+            else:
+                model = self.model
+        else:
+            model = self.model
+        model.eval()
         inference_dataloader = self.datamodule.prepare_inference_dataloader(test)
         point_predictions = []
         quantile_predictions = []
         logits_predictions = defaultdict(list)
-        is_probabilistic = hasattr(self.model.hparams, "_probabilistic") and self.model.hparams._probabilistic
+        is_probabilistic = hasattr(model.hparams, "_probabilistic") and model.hparams._probabilistic
         for batch in track(inference_dataloader, description="Generating Predictions..."):
             for k, v in batch.items():
                 if isinstance(v, list) and (len(v) == 0):
                     # Skipping empty list
                     continue
-                batch[k] = v.to(self.model.device)
+                batch[k] = v.to(model.device)
             if is_probabilistic:
-                samples, ret_value = self.model.sample(batch, n_samples, ret_model_output=True)
+                samples, ret_value = model.sample(batch, n_samples, ret_model_output=True)
                 y_hat = torch.mean(samples, dim=-1)
                 quantile_preds = []
                 for q in quantiles:
                     quantile_preds.append(torch.quantile(samples, q=q, dim=-1).unsqueeze(1))
             else:
-                y_hat, ret_value = self.model.predict(batch, ret_model_output=True)
+                y_hat, ret_value = model.predict(batch, ret_model_output=True)
             if ret_logits:
                 for k, v in ret_value.items():
                     # if k == "backbone_features":
@@ -1121,7 +1132,7 @@ def predict(
             if quantile_predictions.ndim == 2:
                 quantile_predictions = quantile_predictions.unsqueeze(-1)
         if include_input_features:
-            pred_df = test.copy()  # TODO Add option to switch between including the entire input DF or not.
+            pred_df = test.copy()
         else:
             pred_df = pd.DataFrame(index=test.index)
         if self.config.task == "regression":