Skip to content

Commit e325efd

Browse files
authored
Update Core API docstring and default values (#412)
- replace "None" with "none" as acceptable values for discretization_method and grouping_method, following Khiops Core PR KhiopsML/khiops#695 - use "MODL" as default value instead of Python None for the same two parameters - stop removing the discretization_method and grouping_method arguments in case of supervised analysis: they are ignored by Khiops Core in the scenarios anyway, and removing them generated spurious scenario entries (default values substituted in the templates in case of absence). - in train_recoder, fix documented default value of keep_initial_categorical_variables and keep_initial_numerical_variables to False, according to the function signature.
1 parent 9758a3d commit e325efd

File tree

1 file changed

+20
-41
lines changed

1 file changed

+20
-41
lines changed

khiops/core/api.py

Lines changed: 20 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -321,27 +321,6 @@ def _preprocess_task_arguments(task_args):
321321
task_args["dictionary_file_path_or_domain"], task_args["trace"]
322322
)
323323

324-
# Set the default discretization method for unsupervised analysis
325-
# "target_variable" is mandatory if "discretization_method" or
326-
# "grouping_method" are present
327-
if "discretization_method" in task_args and task_args["target_variable"] == "":
328-
if task_args["discretization_method"] is None:
329-
task_args["discretization_method"] = "MODL"
330-
331-
# Remove discretization method if specified for supervised analysis:
332-
# it is always MODL
333-
if "discretization_method" in task_args and task_args["target_variable"] != "":
334-
del task_args["discretization_method"]
335-
336-
# Set the default grouping method for unsupervised analysis
337-
if "grouping_method" in task_args and task_args["target_variable"] == "":
338-
if task_args["grouping_method"] is None:
339-
task_args["grouping_method"] = "MODL"
340-
341-
# Remove grouping method if specified for supervised analysis: it is always MODL
342-
if "grouping_method" in task_args and task_args["target_variable"] != "":
343-
del task_args["grouping_method"]
344-
345324
# Transform the use_complement_as_test bool parameter to its string counterpart
346325
if "use_complement_as_test" in task_args:
347326
if task_args["use_complement_as_test"]:
@@ -691,8 +670,8 @@ def train_predictor(
691670
all_possible_pairs=True,
692671
specific_pairs=None,
693672
group_target_value=False,
694-
discretization_method=None,
695-
grouping_method=None,
673+
discretization_method="MODL",
674+
grouping_method="MODL",
696675
max_parts=0,
697676
log_file_path=None,
698677
output_scenario_path=None,
@@ -797,13 +776,13 @@ def train_predictor(
797776
group_target_value : bool, default ``False``
798777
Allows grouping of the target variable values in classification. It can
799778
substantially increase the training time.
800-
discretization_method : str
801-
Name of the discretization method, for unsupervised analysis only.
802-
Its valid values are: "MODL" (default), "EqualWidth", "EqualFrequency"
803-
or "None". Ignored for supervised analysis.
804-
grouping_method : str
805-
Name of the grouping method, for unsupervised analysis only.
806-
Its valid values are: "MODL" (default), "BasicGrouping" or "None".
779+
discretization_method : str, default "MODL"
780+
Name of the discretization method in case of unsupervised analysis.
781+
Its valid values are: "MODL", "EqualWidth", "EqualFrequency" or "none".
782+
Ignored for supervised analysis.
783+
grouping_method : str, default "MODL"
784+
Name of the grouping method in case of unsupervised analysis.
785+
Its valid values are: "MODL", "BasicGrouping" or "none".
807786
Ignored for supervised analysis.
808787
max_parts : int, default 0
809788
Maximum number of variable parts produced by preprocessing methods. If equal
@@ -1124,8 +1103,8 @@ def train_recoder(
11241103
numerical_recoding_method="part Id",
11251104
pairs_recoding_method="part Id",
11261105
group_target_value=False,
1127-
discretization_method=None,
1128-
grouping_method=None,
1106+
discretization_method="MODL",
1107+
grouping_method="MODL",
11291108
max_parts=0,
11301109
log_file_path=None,
11311110
output_scenario_path=None,
@@ -1227,9 +1206,9 @@ def train_recoder(
12271206
If ``True`` keeps only informative variables.
12281207
max_variables : int, default 0
12291208
Maximum number of variables to keep. If equal to 0 keeps all variables.
1230-
keep_initial_categorical_variables : bool, default ``True``
1209+
keep_initial_categorical_variables : bool, default ``False``
12311210
If ``True`` keeps the initial categorical variables.
1232-
keep_initial_numerical_variables : bool, default ``True``
1211+
keep_initial_numerical_variables : bool, default ``False``
12331212
If ``True`` keeps initial numerical variables.
12341213
categorical_recoding_method : str
12351214
Type of recoding for categorical variables. Types available:
@@ -1256,13 +1235,13 @@ def train_recoder(
12561235
- "0-1 binarization": A 0's and 1's coding the interval/group id
12571236
- "conditional info": Conditional information of the interval/group
12581237
- "none": Keeps the variable as-is
1259-
discretization_method : str
1260-
Name of the discretization method, for unsupervised analysis only.
1261-
Its valid values are: "MODL" (default), "EqualWidth", "EqualFrequency"
1262-
or "None". Ignored for supervised analysis.
1263-
grouping_method : str
1264-
Name of the grouping method, for unsupervised analysis only.
1265-
Its valid values are: "MODL" (default), "BasicGrouping" or "None".
1238+
discretization_method : str, default "MODL"
1239+
Name of the discretization method in case of unsupervised analysis.
1240+
Its valid values are: "MODL", "EqualWidth", "EqualFrequency" or "none".
1241+
Ignored for supervised analysis.
1242+
grouping_method : str, default "MODL"
1243+
Name of the grouping method in case of unsupervised analysis.
1244+
Its valid values are: "MODL", "BasicGrouping" or "none".
12661245
Ignored for supervised analysis.
12671246
max_parts : int, default 0
12681247
Maximum number of variable parts produced by preprocessing methods. If equal

0 commit comments

Comments
 (0)