rdhakan13 · rdhakan13 · Jul 10, 2025 · Jun 25, 2025 · Jun 25, 2025 · Jul 10, 2025
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -4,6 +4,7 @@ on:
   push:
     branches:
       - main
+      - ec2-experiments
     paths:
       - configs/**
       - src/**
@@ -28,7 +29,25 @@ on:
       - poetry.lock
 
 jobs:
+  reject-ec2:
+    if: github.event_name == 'push' && github.ref != 'refs/heads/ec2-experiments'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check if pushed by EC2 Deploy Key
+        run: |
+          AUTHOR_NAME=$(jq -r .pusher.name "$GITHUB_EVENT_PATH")
+          AUTHOR_EMAIL=$(jq -r .pusher.email "$GITHUB_EVENT_PATH")
+
+          echo "Commit by $AUTHOR_NAME <$AUTHOR_EMAIL>"
+
+          if [[ "$AUTHOR_NAME" == "ec2-deploy-bot" ]] || [[ "$AUTHOR_EMAIL" == "ec2@myinfra.local" ]]; then
+            echo "Blocked deploy key push to main."
+            exit 1
+          fi
+
+          echo "Push allowed."
   unit-tests:
+    # if: (github.event_name == 'push' || github.event_name == 'pull_request') && github.ref == 'refs/heads/main'
     runs-on: ubuntu-latest
     env:
       PYTHONPATH: ${{ github.workspace }}/src

diff --git a/src/common/stats.py b/src/common/stats.py
@@ -1,5 +1,6 @@
 import logging
 import pandas as pd
+import numpy as np
 from statsmodels.tsa.stattools import adfuller, grangercausalitytests
 from arch.unitroot import PhillipsPerron
 from typing import Any
@@ -109,3 +110,34 @@ def grangers_causality_test(
         }
 
     return results
+
+def root_mean_squared_percentage_error(y_true:Any, y_pred: Any) -> float:
+    """
+    Calculate the Root Mean Squared Percentage Error (RMSPE) between actual and predicted values.
+
+    Parameters:
+        y_true (pd.Series, np.ndarray, pd.DataFrame): Actual values.
+        y_pred (pd.Series, np.ndarray, pd.DataFrame): Predicted values.
+
+    Returns:
+        float: The RMSPE value.
+    """
+    if not isinstance(y_true, (pd.Series, np.ndarray, pd.DataFrame)):
+        raise TypeError("Actual values must be a pandas Series, numpy array, or DataFrame.")
+    if not isinstance(y_pred, (pd.Series, np.ndarray, pd.DataFrame)):
+        raise TypeError("Predicted values must be a pandas Series, numpy array, or DataFrame.")
+    if len(y_true) != len(y_pred):
+        raise ValueError("Actual and predicted series must have the same length.")
+
+    y_true = np.asarray(y_true)
+    y_pred = np.asarray(y_pred)
+
+    if np.any(y_true == 0):
+        raise ValueError("Actual values must not contain zero to avoid division by zero in percentage error calculation.")
+    if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
+        raise ValueError("Actual and predicted values must not contain NaN values.")
+
+    percentage_error = (y_true - y_pred) / y_true
+
+    rmspe = np.sqrt(np.mean(percentage_error ** 2)) * 100
+    return rmspe
diff --git a/src/preprocessing/data_loader.py b/src/preprocessing/data_loader.py
@@ -275,7 +275,7 @@ def split_data(
             logger.error("Please set the train data first.")
             raise ValueError("Please set the train data first.")
 
-    def generate_X_y_tensors(self, df:pd.DataFrame, target_col:str="ETH_D_AvgPrc", lags:int=5, horizon:int=7, data_split:Optional[str]=None, format:str="xarray") -> tuple[np.ndarray, np.ndarray]:
+    def generate_X_y_tensors(self, df:pd.DataFrame, target_col:str="ETH_D_AvgPrc", lags:int=5, horizon:int=7, data_split:Optional[str]=None, tensor_format:str="xarray") -> tuple[np.ndarray, np.ndarray]:
         """
         Generate X and y tensors for time series forecasting.
 
@@ -341,9 +341,9 @@ def generate_X_y_tensors(self, df:pd.DataFrame, target_col:str="ETH_D_AvgPrc", l
             name="X"
         )
 
-        if format == "xarray":
+        if tensor_format == "xarray":
             return y_xr, X_xr
-        elif format == "numpy":
+        elif tensor_format == "numpy":
             return np.array(y), np.array(X)
         else:
             logger.error("Invalid format specified. Use 'xarray' or 'numpy'.")
@@ -356,4 +356,4 @@ def get_scalers(self) -> tuple[Optional[Any], Optional[Any]]:
         Returns:
             tuple: X scaler and y scaler.
         """
-        return self.x_scaler, self.y_scaler
+        return self.y_scaler, self.x_scaler