Merge pull request #410 from KhiopsML/407-separate-intepretation-dictionary-build-from-the-lever-variables-part

popescu-v · web-flow · commit 9758a3d617cd · 2025-05-28T16:10:31.000+02:00
Update interpretation support to the Khiops Core alpha 10.7.3-a.0
diff --git a/doc/samples/samples.rst b/doc/samples/samples.rst
@@ -128,7 +128,7 @@ Samples
 
     # Set the file paths
     dictionary_file_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.kdic")
-    output_dir = os.path.join("kh_samples", "export_dictionary_file")
+    output_dir = os.path.join("kh_samples", "export_dictionary_files")
     output_dictionary_file_path = os.path.join(output_dir, "ModifiedAdult.kdic")
     output_dictionary_json_path = os.path.join(output_dir, "ModifiedAdult.kdicj")
     alt_output_dictionary_json_path = os.path.join(output_dir, "AltModifiedAdult.kdicj")
@@ -686,6 +686,37 @@ Samples
     kh.interpret_predictor(predictor_file_path, "SNB_Adult", interpretor_file_path)
 
     print(f"The interpretation model is '{interpretor_file_path}'")
+.. autofunction:: reinforce_predictor
+.. code-block:: python
+
+    # Imports
+    import os
+    from khiops import core as kh
+
+    dictionary_file_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.kdic")
+    data_table_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.txt")
+    output_dir = os.path.join("kh_samples", "reinforce_predictor")
+    analysis_report_file_path = os.path.join(output_dir, "AnalysisResults.khj")
+    reinforced_predictor_file_path = os.path.join(output_dir, "ReinforcedAdultModel.kdic")
+
+    # Build prediction model
+    _, predictor_file_path = kh.train_predictor(
+        dictionary_file_path,
+        "Adult",
+        data_table_path,
+        "class",
+        analysis_report_file_path,
+    )
+
+    # Build reinforced predictor
+    kh.reinforce_predictor(
+        predictor_file_path,
+        "SNB_Adult",
+        reinforced_predictor_file_path,
+        reinforcement_lever_variables=["occupation"],
+    )
+
+    print(f"The reinforced predictor is '{reinforced_predictor_file_path}'")
 .. autofunction:: multiple_train_predictor
 .. code-block:: python
 
@@ -1064,7 +1095,7 @@ Samples
     dictionary_file_path = os.path.join(accidents_dir, "Accidents.kdic")
     accidents_table_path = os.path.join(accidents_dir, "Accidents.txt")
     vehicles_table_path = os.path.join(accidents_dir, "Vehicles.txt")
-    output_dir = os.path.join("kh_samples", "deploy_model_mt")
+    output_dir = os.path.join("kh_samples", "deploy_model_mt_with_interpretation")
     report_file_path = os.path.join(output_dir, "AnalysisResults.khj")
     interpretor_file_path = os.path.join(output_dir, "InterpretationModel.kdic")
     output_data_table_path = os.path.join(output_dir, "InterpretedAccidents.txt")
@@ -1088,7 +1119,8 @@ Samples
         model_dictionary_file_path,
         "SNB_Accident",
         interpretor_file_path,
-        reinforcement_target_value="NonLethal",
+        max_variable_importances=3,
+        importance_ranking="Individual",
     )
 
     # Deploy the interpretation model on the database
@@ -1101,6 +1133,53 @@ Samples
         output_data_table_path,
         additional_data_tables={"Vehicles": vehicles_table_path},
     )
+.. autofunction:: deploy_reinforced_model_mt
+.. code-block:: python
+
+    # Imports
+    import os
+    from khiops import core as kh
+
+    # Set the file paths
+    accidents_dir = os.path.join(kh.get_samples_dir(), "AccidentsSummary")
+    dictionary_file_path = os.path.join(accidents_dir, "Accidents.kdic")
+    accidents_table_path = os.path.join(accidents_dir, "Accidents.txt")
+    vehicles_table_path = os.path.join(accidents_dir, "Vehicles.txt")
+    output_dir = os.path.join("kh_samples", "deploy_reinforced_model_mt")
+    report_file_path = os.path.join(output_dir, "AnalysisResults.khj")
+    reinforced_predictor_file_path = os.path.join(output_dir, "ReinforcedModel.kdic")
+    output_data_table_path = os.path.join(output_dir, "ReinforcedAccidents.txt")
+
+    # Train the predictor (see train_predictor_mt for details)
+    _, model_dictionary_file_path = kh.train_predictor(
+        dictionary_file_path,
+        "Accident",
+        accidents_table_path,
+        "Gravity",
+        report_file_path,
+        additional_data_tables={"Vehicles": vehicles_table_path},
+        max_trees=0,
+    )
+
+    # Reinforce the predictor
+    kh.reinforce_predictor(
+        model_dictionary_file_path,
+        "SNB_Accident",
+        reinforced_predictor_file_path,
+        reinforcement_target_value="NonLethal",
+        reinforcement_lever_variables=["InAgglomeration", "CollisionType"],
+    )
+
+    # Deploy the reinforced model on the database
+    # Besides the mandatory parameters, it is specified:
+    # - A python dictionary linking data paths to file paths for non-root tables
+    kh.deploy_model(
+        reinforced_predictor_file_path,
+        "Reinforcement_SNB_Accident",
+        accidents_table_path,
+        output_data_table_path,
+        additional_data_tables={"Vehicles": vehicles_table_path},
+    )
 .. autofunction:: deploy_model_mt_snowflake
 .. code-block:: python
 
diff --git a/khiops/core/api.py b/khiops/core/api.py
@@ -375,16 +375,6 @@ def _preprocess_task_arguments(task_args):
         if isinstance(task_args["selection_value"], (int, float)):
             task_args["selection_value"] = str(task_args["selection_value"])
 
-    # Discard the max_variable_importances interpretation parameters
-    if "max_variable_importances" in task_args:
-        if task_args["max_variable_importances"] is not None:
-            warnings.warn(
-                "The 'max_variable_importances' parameter of the "
-                "'khiops.core.api.interpret_predictor' function is not supported "
-                " yet. All model variables' importances are computed."
-            )
-        del task_args["max_variable_importances"]
-
     # Detect and replace deprecated data-path syntax on additional_data_tables
     # Mutate task_args in the process
     for data_path_task_arg_name in (
@@ -880,9 +870,8 @@ def interpret_predictor(
     dictionary_file_path_or_domain,
     predictor_dictionary_name,
     interpretor_file_path,
-    max_variable_importances=None,
-    reinforcement_target_value="",
-    reinforcement_lever_variables=None,
+    max_variable_importances=100,
+    importance_ranking="Global",
     log_file_path=None,
     output_scenario_path=None,
     task_file_path=None,
@@ -905,18 +894,80 @@ def interpret_predictor(
         Name of the predictor dictionary used while building the interpretation model.
     interpretor_file_path : str
         Path to the interpretor dictionary file.
-    max_variable_importances : int, optional
+    max_variable_importances : int, default 100
         Maximum number of variable importances to be selected in the interpretation
-        model. If not set, then all the variables in the prediction model are
-        considered.
-        ..note:: Not currently supported; not taken into account if set.
+        model. If the predictor contains fewer variables than this number, then
+        all the variables of the predictor are considered.
+    importance_ranking : str, default "Global"
+        Ranking of the Shapley values produced by the interpretor. Ca be one of:
+
+        - "Global": predictor variables are ranked by decreasing global importance.
+
+        - "Individual": predictor variables are ranked by decreasing individual
+          Shapley value.
+    ... :
+        See :ref:`core-api-common-params`.
+
+    Raises
+    ------
+    `ValueError`
+        Invalid values of an argument
+    `TypeError`
+        Invalid type of an argument
+
+    Examples
+    --------
+    See the following functions of the ``samples.py`` documentation script:
+        - `samples.interpret_predictor()`
+        - `samples.deploy_model_mt_with_interpretation()`
+    """
+    # Save the task arguments
+    # WARNING: Do not move this line, see the top of the "tasks" section for details
+    task_args = locals()
+
+    # Run the task
+    _run_task("interpret_predictor", task_args)
+
+
+def reinforce_predictor(
+    dictionary_file_path_or_domain,
+    predictor_dictionary_name,
+    reinforced_predictor_file_path,
+    reinforcement_target_value="",
+    reinforcement_lever_variables=None,
+    log_file_path=None,
+    output_scenario_path=None,
+    task_file_path=None,
+    trace=False,
+    stdout_file_path="",
+    stderr_file_path="",
+    max_cores=None,
+    memory_limit_mb=None,
+    temp_dir="",
+    scenario_prologue="",
+    **kwargs,
+):
+    r"""Builds a reinforced predictor from a predictor
+
+    A reinforced predictor is a model which increases the importance of specified lever
+    variables in order to increase the probability of occurrence of the specified target
+    value.
+
+    Parameters
+    ----------
+    dictionary_file_path_or_domain : str or `.DictionaryDomain`
+        Path of a Khiops dictionary file or a DictionaryDomain object.
+    predictor_dictionary_name : str
+        Name of the predictor dictionary used while building the reinforced predictor.
+    reinforced_predictor_file_path : str
+        Path to the reinforced predictor dictionary file.
     reinforcement_target_value : str, default ""
         If this target value is specified, then its probability of occurrence is
         tentatively increased.
-    reinforcement_lever_variables : list of str, optional
+    reinforcement_lever_variables : list of str
         The names of variables to use as lever variables while building the
-        interpretation model. Min length: 0. Max length: the total number of variables
-        in the prediction model. If not specified, all variables are used.
+        reinforced predictor. Min length: 1. Max length: the total number of variables
+        in the prediction model.
     ... :
         See :ref:`core-api-common-params`.
 
@@ -930,14 +981,15 @@ def interpret_predictor(
     Examples
     --------
     See the following functions of the ``samples.py`` documentation script:
-        - `samples.interpret_predictor()`
+        - `samples.reinforce_predictor()`
+        - `samples.deploy_reinforced_model_mt()`
     """
     # Save the task arguments
     # WARNING: Do not move this line, see the top of the "tasks" section for details
     task_args = locals()
 
     # Run the task
-    _run_task("interpret_predictor", task_args)
+    _run_task("reinforce_predictor", task_args)
 
 
 def evaluate_predictor(
diff --git a/khiops/core/internals/tasks/__init__.py b/khiops/core/internals/tasks/__init__.py
@@ -22,6 +22,7 @@
     extract_keys_from_data_table,
     interpret_predictor,
     prepare_coclustering_deployment,
+    reinforce_predictor,
     simplify_coclustering,
     sort_data_table,
     train_coclustering,
@@ -43,6 +44,7 @@
     extract_clusters,
     extract_keys_from_data_table,
     interpret_predictor,
+    reinforce_predictor,
     prepare_coclustering_deployment,
     simplify_coclustering,
     sort_data_table,
diff --git a/khiops/core/internals/tasks/interpret_predictor.py b/khiops/core/internals/tasks/interpret_predictor.py
@@ -6,7 +6,7 @@
 ######################################################################################
 """interpret_predictor task family"""
 from khiops.core.internals import task as tm
-from khiops.core.internals.types import ListType, StringLikeType
+from khiops.core.internals.types import IntType, StringLikeType
 
 # Disable long lines to have readable scenarios
 # pylint: disable=line-too-long
@@ -21,8 +21,8 @@
             ("interpretor_file_path", StringLikeType),
         ],
         [
-            ("reinforcement_target_value", StringLikeType, ""),
-            ("reinforcement_lever_variables", ListType(StringLikeType), None),
+            ("max_variable_importances", IntType, 100),
+            ("importance_ranking", StringLikeType, "Global"),
         ],
         ["dictionary_file_path", "interpretor_file_path"],
         # pylint: disable=line-too-long
@@ -38,14 +38,12 @@
 
         // Interpret model
         LearningTools.InterpretPredictor
-        HowParameter.HowClass __reinforcement_target_value__
 
-        __DICT__
-        __reinforcement_lever_variables__
-        HowParameter.leverVariablesSpecView.UnselectAll
-        HowParameter.leverVariablesSpecView.AttributeSpecs.List.Key
-        HowParameter.leverVariablesSpecView.AttributeSpecs.Used
-        __END_DICT__
+        // Number of predictor variables exploited in the interpretation model
+        ContributionAttributeNumber __max_variable_importances__
+
+        // Ranking of the Shapley value produced by the interpretation model
+        ShapleyValueRanking __importance_ranking__
 
         // Build interpretation dictionary
         BuildInterpretationClass
diff --git a/khiops/core/internals/tasks/reinforce_predictor.py b/khiops/core/internals/tasks/reinforce_predictor.py
@@ -0,0 +1,60 @@
+######################################################################################
+# Copyright (c) 2023-2025 Orange. All rights reserved.                               #
+# This software is distributed under the BSD 3-Clause-clear License, the text of     #
+# which is available at https://spdx.org/licenses/BSD-3-Clause-Clear.html or         #
+# see the "LICENSE.md" file for more details.                                        #
+######################################################################################
+"""reinforce_predictor task family"""
+from khiops.core.internals import task as tm
+from khiops.core.internals.types import ListType, StringLikeType
+
+# Disable long lines to have readable scenarios
+# pylint: disable=line-too-long
+TASKS = [
+    tm.KhiopsTask(
+        "reinforce_predictor",
+        "khiops",
+        "10.7.3-a.0",
+        [
+            ("dictionary_file_path", StringLikeType),
+            ("predictor_dictionary_name", StringLikeType),
+            ("reinforced_predictor_file_path", StringLikeType),
+        ],
+        [
+            ("reinforcement_target_value", StringLikeType, ""),
+            ("reinforcement_lever_variables", ListType(StringLikeType), None),
+        ],
+        ["dictionary_file_path", "reinforced_predictor_file_path"],
+        # pylint: disable=line-too-long
+        # fmt: off
+        """
+        // Dictionary file and class settings
+        ClassManagement.OpenFile
+        ClassFileName __dictionary_file_path__
+        OK
+
+        // Reinforcement settings
+        TrainDatabase.ClassName __predictor_dictionary_name__
+
+        // Reinforce model
+        LearningTools.ReinforcePredictor
+        ReinforcedTargetValue __reinforcement_target_value__
+
+        LeverAttributes.UnselectAll
+        __DICT__
+        __reinforcement_lever_variables__
+        LeverAttributes.List.Key
+        LeverAttributes.Used
+        __END_DICT__
+
+        // Build reinforced predictor
+        BuildReinforcementClass
+
+        // Output settings
+        ClassFileName __reinforced_predictor_file_path__
+        OK
+        Exit
+        """,
+        # fmt: on
+    ),
+]
diff --git a/khiops/samples/samples.ipynb b/khiops/samples/samples.ipynb
diff --git a/khiops/samples/samples.py b/khiops/samples/samples.py