Skip to content

Commit 401c465

Browse files
authored
Merge pull request #373 from KhiopsML/370-improve-deprecation-path-for-data-paths
370 improve deprecation path for data paths
2 parents 8c5ae75 + 6142e50 commit 401c465

File tree

9 files changed

+559
-10
lines changed

9 files changed

+559
-10
lines changed

khiops/core/api.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,71 @@ def _preprocess_arguments(args):
198198
return command_line_options, system_settings, task_is_called_with_domain
199199

200200

201+
def _deprecate_legacy_data_path(data_path_task_arg_name, task_args):
202+
"""Detect and replace legacy data path with the current syntax
203+
204+
.. note:: The function mutates task_args.
205+
"""
206+
if (
207+
data_path_task_arg_name in task_args
208+
and task_args[data_path_task_arg_name] is not None
209+
):
210+
assert "dictionary_name" in task_args or "train_dictionary_name" in task_args
211+
if "dictionary_name" in task_args:
212+
current_dictionary_name = task_args["dictionary_name"]
213+
else:
214+
current_dictionary_name = task_args["train_dictionary_name"]
215+
216+
for kdic_path in task_args[data_path_task_arg_name].keys():
217+
if isinstance(kdic_path, str):
218+
deprecated_data_path_separator = "`"
219+
data_path_separator = "/"
220+
kdic_path_for_warning = kdic_path
221+
else:
222+
assert isinstance(kdic_path, bytes)
223+
deprecated_data_path_separator = b"`"
224+
data_path_separator = b"/"
225+
if isinstance(current_dictionary_name, str):
226+
current_dictionary_name = bytes(
227+
current_dictionary_name, encoding="ascii"
228+
)
229+
kdic_path_for_warning = kdic_path.decode("ascii")
230+
231+
# Path split "`" yields non-empty fragments; the first fragment
232+
# starts with the current dictionary name
233+
kdic_path_parts = kdic_path.split(deprecated_data_path_separator)
234+
if all(len(path_part) > 0 for path_part in kdic_path_parts):
235+
source_dictionary_name = kdic_path_parts[0]
236+
if source_dictionary_name == current_dictionary_name:
237+
# Escape any "/" char in the path parts except for the
238+
# current dictionary, which is is skipped from the new path
239+
new_kdic_path_parts = []
240+
for kdic_path_part in kdic_path_parts[1:]:
241+
new_kdic_path_parts.append(
242+
kdic_path_part.replace(
243+
data_path_separator,
244+
deprecated_data_path_separator + data_path_separator,
245+
)
246+
)
247+
248+
# Replace the legacy data path with the current data path
249+
new_kdic_path = data_path_separator.join(new_kdic_path_parts)
250+
kdic_file_path = task_args[data_path_task_arg_name].pop(kdic_path)
251+
task_args[data_path_task_arg_name][new_kdic_path] = kdic_file_path
252+
warnings.warn(
253+
deprecation_message(
254+
"'`'-based dictionary data path: "
255+
f"'{kdic_path_for_warning}'",
256+
"11.0.1",
257+
replacement=(
258+
"'/'-based dictionary data path "
259+
f"convention: '{new_kdic_path}'"
260+
),
261+
quote=False,
262+
)
263+
)
264+
265+
201266
def _preprocess_task_arguments(task_args):
202267
"""Preprocessing of task arguments common to various tasks
203268
@@ -320,6 +385,14 @@ def _preprocess_task_arguments(task_args):
320385
)
321386
del task_args["max_variable_importances"]
322387

388+
# Detect and replace deprecated data-path syntax on additional_data_tables
389+
# Mutate task_args in the process
390+
for data_path_task_arg_name in (
391+
"additional_data_tables",
392+
"output_additional_data_tables",
393+
):
394+
_deprecate_legacy_data_path(data_path_task_arg_name, task_args)
395+
323396
# Flatten kwargs
324397
if "kwargs" in task_args:
325398
task_args.update(task_args["kwargs"])

khiops/core/dictionary.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -325,17 +325,21 @@ def get_dictionary_at_data_path(self, data_path):
325325
`ValueError`
326326
If the path is not found.
327327
"""
328-
# If data_path includes "`" but not "/", assume legacy data path
329-
if "`" in data_path and not "/" in data_path:
330-
warnings.warn(
331-
deprecation_message(
332-
"'`'-based dictionary data path convention",
333-
"11.0.1",
334-
replacement="'/'-based dictionary data path convention",
335-
quote=False,
328+
# If data_path includes "`" and starts with an existing dictionary,
329+
# assume legacy data path
330+
if "`" in data_path:
331+
data_path_parts = data_path.split("`")
332+
source_dictionary_name = data_path_parts[0]
333+
if any(kdic.name == source_dictionary_name for kdic in self.dictionaries):
334+
warnings.warn(
335+
deprecation_message(
336+
"'`'-based dictionary data path convention",
337+
"11.0.1",
338+
replacement="'/'-based dictionary data path convention",
339+
quote=False,
340+
)
336341
)
337-
)
338-
return self._get_dictionary_at_data_path_legacy(data_path)
342+
return self._get_dictionary_at_data_path_legacy(data_path)
339343
return self._get_dictionary_at_data_path(data_path)
340344

341345
def _get_dictionary_at_data_path_legacy(self, data_path):
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// Scenario for task check_database
2+
// Dictionary file and class settings
3+
ClassManagement.OpenFile
4+
ClassFileName Customer.kdic //
5+
OK
6+
7+
// Train database settings
8+
TrainDatabase.ClassName Customer
9+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key
10+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName Customer.csv //
11+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Services
12+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName ServicesBidon.csv //
13+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Services/Usages
14+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName UsagesBidon.csv //
15+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Address
16+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName AddressBidon.csv //
17+
TrainDatabase.DatabaseSpec.Data.HeaderLineUsed true
18+
TrainDatabase.DatabaseSpec.Data.FieldSeparator
19+
TrainDatabase.DatabaseSpec.Data.DatabaseFormatDetector.DetectFileFormat
20+
TrainDatabase.DatabaseSpec.Sampling.SampleNumberPercentage 100.0
21+
TrainDatabase.DatabaseSpec.Sampling.SamplingMode Include sample
22+
TrainDatabase.DatabaseSpec.Selection.SelectionAttribute
23+
TrainDatabase.DatabaseSpec.Selection.SelectionValue
24+
25+
// Log messages limit
26+
AnalysisSpec.SystemParameters.MaxErrorMessageNumberInLog 20
27+
28+
// Execute check database
29+
LearningTools.CheckData
30+
31+
// End of scenario for task check_database
32+
33+
// Exit Khiops
34+
ClassManagement.Quit
35+
OK
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Scenario for task deploy_model
2+
// Dictionary file settings
3+
ClassManagement.OpenFile
4+
ClassFileName Customer.kdic //
5+
OK
6+
7+
// Deploy settings
8+
LearningTools.TransferDatabase
9+
ClassName Customer
10+
11+
// Input database settings
12+
SourceDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key
13+
SourceDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName Customer.csv //
14+
SourceDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Services
15+
SourceDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName ServicesBidon.csv //
16+
SourceDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Services/Usages
17+
SourceDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName UsagesBidon.csv //
18+
SourceDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Address
19+
SourceDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName AddressBidon.csv //
20+
SourceDatabase.DatabaseSpec.Data.HeaderLineUsed true
21+
SourceDatabase.DatabaseSpec.Data.FieldSeparator
22+
SourceDatabase.DatabaseSpec.Data.DatabaseFormatDetector.DetectFileFormat
23+
SourceDatabase.DatabaseSpec.Sampling.SampleNumberPercentage 100.0
24+
SourceDatabase.DatabaseSpec.Sampling.SamplingMode Include sample
25+
SourceDatabase.DatabaseSpec.Selection.SelectionAttribute
26+
SourceDatabase.DatabaseSpec.Selection.SelectionValue
27+
28+
// Output database settings
29+
TargetDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key
30+
TargetDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName ./CustomerDeployed.csv //
31+
TargetDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Services
32+
TargetDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName ./TransferServicesBidon.csv //
33+
TargetDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Services/Usages
34+
TargetDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName ./TransferUsagesBidon.csv //
35+
TargetDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Address
36+
TargetDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName ./TransferAddressBidon.csv //
37+
TargetDatabase.DatabaseSpec.Data.HeaderLineUsed true
38+
TargetDatabase.DatabaseSpec.Data.FieldSeparator
39+
40+
// Transfer
41+
TransferDatabase
42+
Exit
43+
44+
// End of scenario for task deploy_model
45+
46+
// Exit Khiops
47+
ClassManagement.Quit
48+
OK
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Scenario for task evaluate_predictor
2+
// Dictionary file settings
3+
ClassManagement.OpenFile
4+
ClassFileName ModelingCustomer.kdic //
5+
OK
6+
7+
// Evaluate predictor settings
8+
LearningTools.EvaluatePredictors
9+
MainTargetModality
10+
EvaluationDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key
11+
EvaluationDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName Customer.csv //
12+
EvaluationDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Services
13+
EvaluationDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName ServicesBidon.csv //
14+
EvaluationDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Services/Usages
15+
EvaluationDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName UsagesBidon.csv //
16+
EvaluationDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Address
17+
EvaluationDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName AddressBidon.csv //
18+
EvaluationDatabase.DatabaseSpec.Data.HeaderLineUsed true
19+
EvaluationDatabase.DatabaseSpec.Data.FieldSeparator
20+
EvaluationDatabase.DatabaseSpec.Data.DatabaseFormatDetector.DetectFileFormat
21+
EvaluationDatabase.DatabaseSpec.Sampling.SampleNumberPercentage 100.0
22+
EvaluationDatabase.DatabaseSpec.Sampling.SamplingMode Include sample
23+
EvaluatedPredictors.List.Key Customer
24+
EvaluationDatabase.DatabaseSpec.Selection.SelectionAttribute
25+
EvaluationDatabase.DatabaseSpec.Selection.SelectionValue
26+
ExportAsXls false
27+
EvaluationFileName CustomerResults\CustomerAnalysisResults.khj //
28+
29+
// Evaluate predictor
30+
EvaluatePredictors
31+
Exit
32+
33+
// End of scenario for task evaluate_predictor
34+
35+
// Exit Khiops
36+
ClassManagement.Quit
37+
OK
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Scenario for task train_coclustering
2+
// Dictionary file and class settings
3+
ClassManagement.OpenFile
4+
ClassFileName Customer.kdic //
5+
OK
6+
7+
// Train database settings
8+
Database.ClassName Customer
9+
Database.DatabaseSpec.Data.DatabaseFiles.List.Key
10+
Database.DatabaseSpec.Data.DatabaseFiles.DataTableName Customer.csv //
11+
Database.DatabaseSpec.Data.DatabaseFiles.List.Key Services
12+
Database.DatabaseSpec.Data.DatabaseFiles.DataTableName ServicesBidon.csv //
13+
Database.DatabaseSpec.Data.DatabaseFiles.List.Key Services/Usages
14+
Database.DatabaseSpec.Data.DatabaseFiles.DataTableName UsagesBidon.csv //
15+
Database.DatabaseSpec.Data.DatabaseFiles.List.Key Address
16+
Database.DatabaseSpec.Data.DatabaseFiles.DataTableName AddressBidon.csv //
17+
Database.DatabaseSpec.Data.HeaderLineUsed true
18+
Database.DatabaseSpec.Data.FieldSeparator
19+
Database.DatabaseSpec.Data.DatabaseFormatDetector.DetectFileFormat
20+
Database.DatabaseSpec.Sampling.SampleNumberPercentage 100.0
21+
Database.DatabaseSpec.Sampling.SamplingMode Include sample
22+
Database.DatabaseSpec.Selection.SelectionAttribute
23+
Database.DatabaseSpec.Selection.SelectionValue
24+
25+
// Coclustering variables settings
26+
AnalysisSpec.CoclusteringParameters.Attributes.InsertItemAfter
27+
AnalysisSpec.CoclusteringParameters.Attributes.Name id_customer
28+
AnalysisSpec.CoclusteringParameters.Attributes.InsertItemAfter
29+
AnalysisSpec.CoclusteringParameters.Attributes.Name Name
30+
AnalysisSpec.CoclusteringParameters.FrequencyAttributeName
31+
32+
// Minimum optimization time
33+
AnalysisSpec.SystemParameters.OptimizationTime 0
34+
35+
// Output settings
36+
AnalysisResults.CoclusteringFileName CustomerResults/CustomerCoclusteringResults._khcj //
37+
38+
// Train
39+
BuildCoclustering
40+
41+
// End of scenario for task train_coclustering
42+
43+
// Exit Khiops
44+
ClassManagement.Quit
45+
OK
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Scenario for task train_predictor
2+
// Dictionary file and class settings
3+
ClassManagement.OpenFile
4+
ClassFileName Customer.kdic //
5+
OK
6+
7+
// Train/test database settings
8+
TrainDatabase.ClassName Customer
9+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key
10+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName Customer.csv //
11+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Services
12+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName ServicesBidon.csv //
13+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Services/Usages
14+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName UsagesBidon.csv //
15+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.List.Key Address
16+
TrainDatabase.DatabaseSpec.Data.DatabaseFiles.DataTableName AddressBidon.csv //
17+
TrainDatabase.DatabaseSpec.Data.HeaderLineUsed true
18+
TrainDatabase.DatabaseSpec.Data.FieldSeparator
19+
TrainDatabase.DatabaseSpec.Data.DatabaseFormatDetector.DetectFileFormat
20+
TrainDatabase.DatabaseSpec.Sampling.SampleNumberPercentage 70.0
21+
TrainDatabase.DatabaseSpec.Sampling.SamplingMode Include sample
22+
TrainDatabase.DatabaseSpec.Selection.SelectionAttribute
23+
TrainDatabase.DatabaseSpec.Selection.SelectionValue
24+
TrainDatabase.TestDatabaseSpecificationMode Complementary
25+
26+
// Target variable
27+
AnalysisSpec.TargetAttributeName
28+
AnalysisSpec.MainTargetModality
29+
30+
// Do data preparation only
31+
AnalysisSpec.PredictorsSpec.AdvancedSpec.DataPreparationOnly false
32+
33+
// Selective Naive Bayes settings
34+
AnalysisSpec.PredictorsSpec.AdvancedSpec.SelectiveNaiveBayesParameters.TrainParameters.MaxEvaluatedAttributeNumber 0
35+
AnalysisSpec.PredictorsSpec.AdvancedSpec.SelectiveNaiveBayesParameters.SelectionParameters.MaxSelectedAttributeNumber 0
36+
37+
// Feature engineering
38+
AnalysisSpec.PredictorsSpec.ConstructionSpec.MaxTextFeatureNumber 10000
39+
AnalysisSpec.PredictorsSpec.ConstructionSpec.MaxTreeNumber 10
40+
AnalysisSpec.PredictorsSpec.ConstructionSpec.MaxAttributePairNumber 0
41+
AnalysisSpec.PredictorsSpec.AdvancedSpec.InspectAttributePairsParameters
42+
AllAttributePairs true
43+
Exit
44+
AnalysisSpec.PredictorsSpec.ConstructionSpec.KeepSelectedAttributesOnly true
45+
AnalysisSpec.PredictorsSpec.ConstructionSpec.MaxConstructedAttributeNumber 1000
46+
AnalysisSpec.PredictorsSpec.AdvancedSpec.InspectConstructionDomain
47+
Exit
48+
49+
// Text feature parameters
50+
AnalysisSpec.PredictorsSpec.AdvancedSpec.InspectTextFeaturesParameters
51+
TextFeatures words
52+
Exit
53+
54+
55+
// Data preparation (discretization & grouping) settings
56+
AnalysisSpec.PreprocessingSpec.TargetGrouped false
57+
AnalysisSpec.PreprocessingSpec.InspectAdvancedParameters
58+
DiscretizerUnsupervisedMethodName MODL
59+
GrouperUnsupervisedMethodName MODL
60+
Exit
61+
62+
// Max parts
63+
AnalysisSpec.PreprocessingSpec.MaxPartNumber 0
64+
65+
// Output settings
66+
AnalysisResults.ReportFileName CustomerResults/CustomerAnalysisResults._khj //
67+
68+
// Build model
69+
ComputeStats
70+
71+
// End of scenario for task train_predictor
72+
73+
// Exit Khiops
74+
ClassManagement.Quit
75+
OK

0 commit comments

Comments
 (0)