Skip to content

Commit 5c93842

Browse files
authored
Add new sklearn attributes to supervised estimators (#190)
1 parent 7da3e71 commit 5c93842

File tree

6 files changed

+569
-11
lines changed

6 files changed

+569
-11
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
- Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1.
77
- Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists.
88

9+
## 10.2.2.4 - 2024-08-05
10+
11+
## Added
12+
- (`sklearn`) Sklearn's attributes for supervised estimators.
13+
914
## 10.2.2.3 - 2024-08-02
1015

1116
### Fixed

doc/samples/samples_sklearn.rst

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@ Samples
6868
# Train the classifier
6969
khc.fit(X_train, y_train)
7070
71+
# Show the feature importance info
72+
print(f"Features evaluated: {khc.n_features_evaluated_}")
73+
print(f"Features selected : {khc.n_features_used_}")
74+
print("Top 3 used features")
75+
for i, feature in enumerate(khc.feature_used_names_[:3]):
76+
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
77+
print("---")
78+
7179
# Predict the classes on the test dataset
7280
y_test_pred = khc.predict(X_test)
7381
print("Predicted classes (first 10):")
@@ -273,6 +281,14 @@ Samples
273281
khc = KhiopsClassifier(n_trees=0)
274282
khc.fit(X, y)
275283
284+
# Show the feature importance info
285+
print(f"Features evaluated: {khc.n_features_evaluated_}")
286+
print(f"Features selected : {khc.n_features_used_}")
287+
print("Top 3 used features")
288+
for i, feature in enumerate(khc.feature_used_names_[:3]):
289+
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
290+
print("---")
291+
276292
# Predict the class on the test dataset
277293
y_pred = khc.predict(X)
278294
print("Predicted classes (first 10):")
@@ -420,6 +436,14 @@ Samples
420436
# Train the regressor
421437
khr.fit(X_train, y_train)
422438
439+
# Show the feature importance info
440+
print(f"Features evaluated: {khr.n_features_evaluated_}")
441+
print(f"Features selected : {khr.n_features_used_}")
442+
print("Top 3 used features")
443+
for i, feature in enumerate(khr.feature_used_names_[:3]):
444+
print(f"{feature} - Importance: {khr.feature_used_importances_[i][2]}")
445+
print("---")
446+
423447
# Predict the values on the test dataset
424448
y_test_pred = khr.predict(X_test)
425449
print("Predicted values for 'age' (first 10):")
@@ -561,6 +585,13 @@ Samples
561585
khe = KhiopsEncoder(n_features=10)
562586
khe.fit(X, y)
563587
588+
# Show the feature importance info
589+
print(f"Features evaluated: {khe.n_features_evaluated_}")
590+
print("Top 3 evaluated features")
591+
for i, feature in enumerate(khe.feature_evaluated_names_[:3]):
592+
print(f"{feature} - Level: {khe.feature_evaluated_importances_[i][0]}")
593+
print("---")
594+
564595
# Transform the train dataset
565596
print("Encoded feature names:")
566597
print(khe.feature_names_out_)

khiops/samples/samples_sklearn.ipynb

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,14 @@
5454
"# Train the classifier\n",
5555
"khc.fit(X_train, y_train)\n",
5656
"\n",
57+
"# Show the feature importance info\n",
58+
"print(f\"Features evaluated: {khc.n_features_evaluated_}\")\n",
59+
"print(f\"Features selected : {khc.n_features_used_}\")\n",
60+
"print(\"Top 3 used features\")\n",
61+
"for i, feature in enumerate(khc.feature_used_names_[:3]):\n",
62+
" print(f\"{feature} - Importance: {khc.feature_used_importances_[i][2]}\")\n",
63+
"print(\"---\")\n",
64+
"\n",
5765
"# Predict the classes on the test dataset\n",
5866
"y_test_pred = khc.predict(X_test)\n",
5967
"print(\"Predicted classes (first 10):\")\n",
@@ -298,6 +306,14 @@
298306
"khc = KhiopsClassifier(n_trees=0)\n",
299307
"khc.fit(X, y)\n",
300308
"\n",
309+
"# Show the feature importance info\n",
310+
"print(f\"Features evaluated: {khc.n_features_evaluated_}\")\n",
311+
"print(f\"Features selected : {khc.n_features_used_}\")\n",
312+
"print(\"Top 3 used features\")\n",
313+
"for i, feature in enumerate(khc.feature_used_names_[:3]):\n",
314+
" print(f\"{feature} - Importance: {khc.feature_used_importances_[i][2]}\")\n",
315+
"print(\"---\")\n",
316+
"\n",
301317
"# Predict the class on the test dataset\n",
302318
"y_pred = khc.predict(X)\n",
303319
"print(\"Predicted classes (first 10):\")\n",
@@ -484,6 +500,14 @@
484500
"# Train the regressor\n",
485501
"khr.fit(X_train, y_train)\n",
486502
"\n",
503+
"# Show the feature importance info\n",
504+
"print(f\"Features evaluated: {khr.n_features_evaluated_}\")\n",
505+
"print(f\"Features selected : {khr.n_features_used_}\")\n",
506+
"print(\"Top 3 used features\")\n",
507+
"for i, feature in enumerate(khr.feature_used_names_[:3]):\n",
508+
" print(f\"{feature} - Importance: {khr.feature_used_importances_[i][2]}\")\n",
509+
"print(\"---\")\n",
510+
"\n",
487511
"# Predict the values on the test dataset\n",
488512
"y_test_pred = khr.predict(X_test)\n",
489513
"print(\"Predicted values for 'age' (first 10):\")\n",
@@ -664,6 +688,13 @@
664688
"khe = KhiopsEncoder(n_features=10)\n",
665689
"khe.fit(X, y)\n",
666690
"\n",
691+
"# Show the feature importance info\n",
692+
"print(f\"Features evaluated: {khe.n_features_evaluated_}\")\n",
693+
"print(\"Top 3 evaluated features\")\n",
694+
"for i, feature in enumerate(khe.feature_evaluated_names_[:3]):\n",
695+
" print(f\"{feature} - Level: {khe.feature_evaluated_importances_[i][0]}\")\n",
696+
"print(\"---\")\n",
697+
"\n",
667698
"# Transform the train dataset\n",
668699
"print(\"Encoded feature names:\")\n",
669700
"print(khe.feature_names_out_)\n",

khiops/samples/samples_sklearn.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ def khiops_classifier():
5757
# Train the classifier
5858
khc.fit(X_train, y_train)
5959

60+
# Show the feature importance info
61+
print(f"Features evaluated: {khc.n_features_evaluated_}")
62+
print(f"Features selected : {khc.n_features_used_}")
63+
print("Top 3 used features")
64+
for i, feature in enumerate(khc.feature_used_names_[:3]):
65+
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
66+
print("---")
67+
6068
# Predict the classes on the test dataset
6169
y_test_pred = khc.predict(X_test)
6270
print("Predicted classes (first 10):")
@@ -273,6 +281,14 @@ def khiops_classifier_multitable_snowflake():
273281
khc = KhiopsClassifier(n_trees=0)
274282
khc.fit(X, y)
275283

284+
# Show the feature importance info
285+
print(f"Features evaluated: {khc.n_features_evaluated_}")
286+
print(f"Features selected : {khc.n_features_used_}")
287+
print("Top 3 used features")
288+
for i, feature in enumerate(khc.feature_used_names_[:3]):
289+
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
290+
print("---")
291+
276292
# Predict the class on the test dataset
277293
y_pred = khc.predict(X)
278294
print("Predicted classes (first 10):")
@@ -423,6 +439,14 @@ def khiops_regressor():
423439
# Train the regressor
424440
khr.fit(X_train, y_train)
425441

442+
# Show the feature importance info
443+
print(f"Features evaluated: {khr.n_features_evaluated_}")
444+
print(f"Features selected : {khr.n_features_used_}")
445+
print("Top 3 used features")
446+
for i, feature in enumerate(khr.feature_used_names_[:3]):
447+
print(f"{feature} - Importance: {khr.feature_used_importances_[i][2]}")
448+
print("---")
449+
426450
# Predict the values on the test dataset
427451
y_test_pred = khr.predict(X_test)
428452
print("Predicted values for 'age' (first 10):")
@@ -581,6 +605,13 @@ def khiops_encoder_multitable_snowflake():
581605
khe = KhiopsEncoder(n_features=10)
582606
khe.fit(X, y)
583607

608+
# Show the feature importance info
609+
print(f"Features evaluated: {khe.n_features_evaluated_}")
610+
print("Top 3 evaluated features")
611+
for i, feature in enumerate(khe.feature_evaluated_names_[:3]):
612+
print(f"{feature} - Level: {khe.feature_evaluated_importances_[i][0]}")
613+
print("---")
614+
584615
# Transform the train dataset
585616
print("Encoded feature names:")
586617
print(khe.feature_names_out_)

0 commit comments

Comments
 (0)