Merge pull request #344 from KhiopsML/342-eliminate-pandas-warning-in-samples

folmos-at-orange · web-flow · commit 42957ea3d489 · 2025-02-06T09:59:42.000+01:00
Avoid pandas warning in samples when reading Accidents/Places table
diff --git a/doc/samples/samples_sklearn.rst b/doc/samples/samples_sklearn.rst
@@ -218,7 +218,9 @@ Samples
     accidents_df = pd.read_csv(os.path.join(accidents_data_dir, "Accidents.txt"), sep="\t")
     users_df = pd.read_csv(os.path.join(accidents_data_dir, "Users.txt"), sep="\t")
     vehicles_df = pd.read_csv(os.path.join(accidents_data_dir, "Vehicles.txt"), sep="\t")
-    places_df = pd.read_csv(os.path.join(accidents_data_dir, "Places.txt"), sep="\t")
+    places_df = pd.read_csv(
+        os.path.join(accidents_data_dir, "Places.txt"), sep="\t", low_memory=False
+    )
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
@@ -588,7 +590,9 @@ Samples
     accidents_df = pd.read_csv(os.path.join(accidents_data_dir, "Accidents.txt"), sep="\t")
     users_df = pd.read_csv(os.path.join(accidents_data_dir, "Users.txt"), sep="\t")
     vehicles_df = pd.read_csv(os.path.join(accidents_data_dir, "Vehicles.txt"), sep="\t")
-    places_df = pd.read_csv(os.path.join(accidents_data_dir, "Places.txt"), sep="\t")
+    places_df = pd.read_csv(
+        os.path.join(accidents_data_dir, "Places.txt"), sep="\t", low_memory=False
+    )
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
diff --git a/khiops/samples/samples_sklearn.ipynb b/khiops/samples/samples_sklearn.ipynb
@@ -243,7 +243,9 @@
     "accidents_df = pd.read_csv(os.path.join(accidents_data_dir, \"Accidents.txt\"), sep=\"\\t\")\n",
     "users_df = pd.read_csv(os.path.join(accidents_data_dir, \"Users.txt\"), sep=\"\\t\")\n",
     "vehicles_df = pd.read_csv(os.path.join(accidents_data_dir, \"Vehicles.txt\"), sep=\"\\t\")\n",
-    "places_df = pd.read_csv(os.path.join(accidents_data_dir, \"Places.txt\"), sep=\"\\t\")\n",
+    "places_df = pd.read_csv(\n",
+    "    os.path.join(accidents_data_dir, \"Places.txt\"), sep=\"\\t\", low_memory=False\n",
+    ")\n",
     "\n",
     "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n",
     "X = {\n",
@@ -704,7 +706,9 @@
     "accidents_df = pd.read_csv(os.path.join(accidents_data_dir, \"Accidents.txt\"), sep=\"\\t\")\n",
     "users_df = pd.read_csv(os.path.join(accidents_data_dir, \"Users.txt\"), sep=\"\\t\")\n",
     "vehicles_df = pd.read_csv(os.path.join(accidents_data_dir, \"Vehicles.txt\"), sep=\"\\t\")\n",
-    "places_df = pd.read_csv(os.path.join(accidents_data_dir, \"Places.txt\"), sep=\"\\t\")\n",
+    "places_df = pd.read_csv(\n",
+    "    os.path.join(accidents_data_dir, \"Places.txt\"), sep=\"\\t\", low_memory=False\n",
+    ")\n",
     "\n",
     "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n",
     "X = {\n",
diff --git a/khiops/samples/samples_sklearn.py b/khiops/samples/samples_sklearn.py
@@ -218,7 +218,9 @@ def khiops_classifier_multitable_snowflake():
     vehicles_df = pd.read_csv(
         os.path.join(accidents_data_dir, "Vehicles.txt"), sep="\t"
     )
-    places_df = pd.read_csv(os.path.join(accidents_data_dir, "Places.txt"), sep="\t")
+    places_df = pd.read_csv(
+        os.path.join(accidents_data_dir, "Places.txt"), sep="\t", low_memory=False
+    )
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
@@ -614,7 +616,9 @@ def khiops_encoder_multitable_snowflake():
     vehicles_df = pd.read_csv(
         os.path.join(accidents_data_dir, "Vehicles.txt"), sep="\t"
     )
-    places_df = pd.read_csv(os.path.join(accidents_data_dir, "Places.txt"), sep="\t")
+    places_df = pd.read_csv(
+        os.path.join(accidents_data_dir, "Places.txt"), sep="\t", low_memory=False
+    )
 
     # Build the multi-table dataset spec (drop the target column "Gravity")
     X = {
diff --git a/khiops/sklearn/estimators.py b/khiops/sklearn/estimators.py
@@ -677,8 +677,8 @@ def _create_computation_dir(self, method_name):
     def _assert_is_fitted(self):
         try:
             check_is_fitted(self)
-        except NotFittedError:
-            raise AssertionError("Model not fitted")
+        except NotFittedError as exc:
+            raise AssertionError("Model not fitted") from exc
 
 
 # Note: scikit-learn **requires** inherit first the mixins and then other classes
diff --git a/tests/test_remote_access.py b/tests/test_remote_access.py
@@ -129,8 +129,10 @@ def is_in_a_conda_env():
             if not isinstance(kh.get_runner(), KhiopsLocalRunner):
                 return False
 
-            # Get path to the Khiops executable
+            # Get path to the Khiops executable (temporarily disable pylint warning)
+            # pylint: disable=protected-access
             khiops_path = kh.get_runner()._khiops_path
+            # pylint: enable=protected-access
 
             # If $(dirname khiops_path) is identical to $CONDA_PREFIX/bin,
             # then return True
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
@@ -16,7 +16,7 @@
 import numpy as np
 from sklearn.exceptions import NotFittedError
 from sklearn.utils.estimator_checks import check_estimator
-from sklearn.utils.validation import NotFittedError, check_is_fitted
+from sklearn.utils.validation import check_is_fitted
 
 import khiops.core as kh
 from khiops.sklearn.estimators import (