Skip to content

Commit 7da3e71

Browse files
authored
Merge pull request #222 from KhiopsML/dev
Release 10.2.2.3
2 parents fe00d30 + 30bfc09 commit 7da3e71

File tree

9 files changed

+166
-96
lines changed

9 files changed

+166
-96
lines changed

.github/workflows/pip.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,11 @@ jobs:
9393
kh-samples sklearn -i khiops_classifier -e
9494
9595
# Test that the line containing "MPI command" also contains
96-
# "mpiexec", which means that `mpiexec` has been found
97-
kh-status | grep "MPI command" | grep -wq mpiexec
96+
# an executable name under a /bin directory
97+
# Note: this executable name can be different, depending on the MPI
98+
# backend and OS; for instance, "orterun" for OpenMPI on Ubuntu Linux, but
99+
# "mpiexec" for OpenMPI on Rocky Linux
100+
kh-status | grep "MPI command" | grep -Ewq "(/.+?)/bin/.+"
98101
release:
99102
if: github.ref_type == 'tag'
100103
needs: [build, test]

.github/workflows/unit-tests.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ jobs:
9999
OMPI_MCA_rmaps_base_oversubscribe: true
100100
PRTE_MCA_rmaps_default_mapping_policy: :oversubscribe
101101
run: |
102+
# Make sure '/bin' is before '/usr/bin' in PATH
103+
PATH=$(echo "/bin:"$PATH | sed 's#:/bin##')
104+
102105
# This is needed so that the Git tag is parsed and the khiops-python
103106
# version is retrieved
104107
git config --global --add safe.directory $(realpath .)
@@ -177,6 +180,9 @@ jobs:
177180
# Force > 2 CPU cores to launch mpiexec
178181
KHIOPS_PROC_NUMBER: 4
179182
run: |-
183+
# Make sure '/bin' is before '/usr/bin' in PATH
184+
PATH=$(echo "/bin:"$PATH | sed 's#:/bin##')
185+
180186
# Make sure MPI support is not loaded through env modules
181187
# Note: As Docker container's shell is non-interactive, environment
182188
# modules are currently not initializing the shell anyway

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@
66
- Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1.
77
- Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists.
88

9+
## 10.2.2.3 - 2024-08-02
10+
11+
### Fixed
12+
- (`core`) API functions handling of unknown parameters: they now fail.
13+
- *Internals*:
14+
- Detection of the path to the MPI command: the real path to the executable is
15+
now used.
16+
917
## 10.2.2.2 - 2024-07-19
1018

1119
### Fixed

khiops/core/api.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,11 @@ def _preprocess_task_arguments(task_args):
299299
)
300300
)
301301

302+
# Flatten kwargs
303+
if "kwargs" in task_args:
304+
task_args.update(task_args["kwargs"])
305+
del task_args["kwargs"]
306+
302307
return task_called_with_domain
303308

304309

@@ -336,10 +341,10 @@ def _preprocess_format_spec(detect_format, header_line, field_separator):
336341
def _clean_task_args(task_args):
337342
"""Cleans the task arguments
338343
339-
More precisely:
340-
- It removes command line arguments (they already are in another object).
341-
- It removes parameters removed from the API and warns about it.
342-
- It removes renamed API parameters and warns about it.
344+
More precisely it removes:
345+
- Command line arguments (they already are in another object).
346+
- Parameters removed from the API and warns about it.
347+
- Renamed API parameters and warns about it.
343348
"""
344349
# Remove non-task parameters
345350
command_line_arg_names = [
@@ -353,7 +358,6 @@ def _clean_task_args(task_args):
353358
"trace",
354359
"stdout_file_path",
355360
"stderr_file_path",
356-
"kwargs",
357361
]
358362
for arg_name in command_line_arg_names + other_arg_names:
359363
if arg_name in task_args:

khiops/core/internals/runner.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,8 +1146,12 @@ def _initialize_mpi_command_args(self):
11461146
installation_method = _infer_khiops_installation_method()
11471147
# In Conda-based, but non-Conda environment, specify mpiexec path
11481148
if installation_method == "conda-based":
1149-
mpiexec_path = os.environ.get("KHIOPS_MPIEXEC_PATH") or os.path.join(
1150-
_infer_env_bin_dir_for_conda_based_installations(), "mpiexec"
1149+
# Python `os.path.realpath` resolves symlinks recursively, like GNU
1150+
# `readlink -f`; Python `os.readlink` does not
1151+
mpiexec_path = os.environ.get("KHIOPS_MPIEXEC_PATH") or os.path.realpath(
1152+
os.path.join(
1153+
_infer_env_bin_dir_for_conda_based_installations(), "mpiexec"
1154+
)
11511155
)
11521156
if platform.system() == "Windows" and not os.path.splitext(mpiexec_path):
11531157
mpiexec_path += ".exe"
@@ -1165,8 +1169,11 @@ def _initialize_mpi_command_args(self):
11651169
)
11661170
# In Conda or local installations, expect mpiexec in the PATH
11671171
else:
1168-
mpiexec_path = os.environ.get("KHIOPS_MPIEXEC_PATH") or shutil.which(
1169-
"mpiexec"
1172+
link_to_mpiexec = shutil.which("mpiexec")
1173+
mpiexec_path = (
1174+
os.environ.get("KHIOPS_MPIEXEC_PATH")
1175+
or link_to_mpiexec
1176+
and os.path.realpath(link_to_mpiexec)
11701177
)
11711178
# If mpiexec is not in the path, and the installation method is local,
11721179
# then try to load MPI environment module so that mpiexec is in the path

khiops/sklearn/estimators.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1464,9 +1464,11 @@ def _fit_prepare_training_function_inputs(self, dataset, computation_dir):
14641464
# Build the optional parameters from a copy of the estimator parameters
14651465
kwargs = self.get_params()
14661466

1467-
# Remove 'key' and 'output_dir'
1467+
# Remove non core.api params
14681468
del kwargs["key"]
14691469
del kwargs["output_dir"]
1470+
del kwargs["auto_sort"]
1471+
del kwargs["internal_sort"]
14701472

14711473
# Set the sampling percentage to a 100%
14721474
kwargs["sample_percentage"] = 100

khiops/sklearn/tables.py

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,27 @@ def __init__(self, X, y=None, categorical_target=True, key=None):
166166
y,
167167
categorical_target=categorical_target,
168168
)
169-
# A sparse matrix
169+
# A scipy.sparse.spmatrix
170170
elif isinstance(X, sp.spmatrix):
171171
self._init_tables_from_sparse_matrix(
172172
X, y, categorical_target=categorical_target
173173
)
174+
# Special rejection for scipy.sparse.sparray (to pass the sklearn tests)
175+
# Note: We don't use scipy.sparse.sparray because it is not implemented in scipy
176+
# 1.10 which is the latest supporting py3.8
177+
elif isinstance(
178+
X,
179+
(
180+
sp.bsr_array,
181+
sp.coo_array,
182+
sp.csc_array,
183+
sp.csr_array,
184+
sp.dia_array,
185+
sp.dok_array,
186+
sp.lil_array,
187+
),
188+
):
189+
check_array(X, accept_sparse=False)
174190
# A tuple spec
175191
elif isinstance(X, tuple):
176192
warnings.warn(
@@ -1425,32 +1441,23 @@ def _write_sparse_block(self, row_index, stream, target=None):
14251441
assert target in self.target_column, "'target' must be in the target column"
14261442
stream.write(f"{target}\t")
14271443
row = self.matrix.getrow(row_index)
1428-
# Empty row in the sparse matrix: use the first variable as missing data
1429-
# TODO: remove this part once Khiops bug
1430-
# https://github.com/KhiopsML/khiops/issues/235 is solved
1431-
if row.size == 0:
1432-
for variable_index in self.column_ids:
1433-
stream.write(f"{variable_index + 1}: ")
1434-
break
1435-
# Non-empty row in the sparse matrix: get non-missing data
1436-
else:
1437-
# Variable indices are not always sorted in `row.indices`
1438-
# Khiops needs variable indices to be sorted
1439-
sorted_indices = np.sort(row.nonzero()[1], axis=-1, kind="mergesort")
1440-
1441-
# Flatten row for Python < 3.9 scipy.sparse.lil_matrix whose API
1442-
# is not homogeneous with other sparse matrices: it stores
1443-
# opaque Python lists as elements
1444-
# Thus:
1445-
# - if isinstance(self.matrix, sp.lil_matrix) and Python 3.8, then
1446-
# row.data is np.array([list([...])])
1447-
# - else, row.data is np.array([...])
1448-
# TODO: remove this flattening once Python 3.8 support is dropped
1449-
sorted_data = np.fromiter(self._flatten(row.data), row.data.dtype)[
1450-
sorted_indices.argsort()
1451-
]
1452-
for variable_index, variable_value in zip(sorted_indices, sorted_data):
1453-
stream.write(f"{variable_index + 1}:{variable_value} ")
1444+
# Variable indices are not always sorted in `row.indices`
1445+
# Khiops needs variable indices to be sorted
1446+
sorted_indices = np.sort(row.nonzero()[1], axis=-1, kind="mergesort")
1447+
1448+
# Flatten row for Python < 3.9 scipy.sparse.lil_matrix whose API
1449+
# is not homogeneous with other sparse matrices: it stores
1450+
# opaque Python lists as elements
1451+
# Thus:
1452+
# - if isinstance(self.matrix, sp.lil_matrix) and Python 3.8, then
1453+
# row.data is np.array([list([...])])
1454+
# - else, row.data is np.array([...])
1455+
# TODO: remove this flattening once Python 3.8 support is dropped
1456+
sorted_data = np.fromiter(self._flatten(row.data), row.data.dtype)[
1457+
sorted_indices.argsort()
1458+
]
1459+
for variable_index, variable_value in zip(sorted_indices, sorted_data):
1460+
stream.write(f"{variable_index + 1}:{variable_value} ")
14541461
stream.write("\n")
14551462

14561463
def create_table_file_for_khiops(self, output_dir, sort=True):

0 commit comments

Comments
 (0)