@@ -166,11 +166,27 @@ def __init__(self, X, y=None, categorical_target=True, key=None):
166166 y ,
167167 categorical_target = categorical_target ,
168168 )
169- # A sparse matrix
169+ # A scipy. sparse.spmatrix
170170 elif isinstance (X , sp .spmatrix ):
171171 self ._init_tables_from_sparse_matrix (
172172 X , y , categorical_target = categorical_target
173173 )
174+ # Special rejection for scipy.sparse.sparray (to pass the sklearn tests)
175+ # Note: We don't use scipy.sparse.sparray because it is not implemented in scipy
176+ # 1.10 which is the latest supporting py3.8
177+ elif isinstance (
178+ X ,
179+ (
180+ sp .bsr_array ,
181+ sp .coo_array ,
182+ sp .csc_array ,
183+ sp .csr_array ,
184+ sp .dia_array ,
185+ sp .dok_array ,
186+ sp .lil_array ,
187+ ),
188+ ):
189+ check_array (X , accept_sparse = False )
174190 # A tuple spec
175191 elif isinstance (X , tuple ):
176192 warnings .warn (
@@ -1425,32 +1441,23 @@ def _write_sparse_block(self, row_index, stream, target=None):
14251441 assert target in self .target_column , "'target' must be in the target column"
14261442 stream .write (f"{ target } \t " )
14271443 row = self .matrix .getrow (row_index )
1428- # Empty row in the sparse matrix: use the first variable as missing data
1429- # TODO: remove this part once Khiops bug
1430- # https://github.com/KhiopsML/khiops/issues/235 is solved
1431- if row .size == 0 :
1432- for variable_index in self .column_ids :
1433- stream .write (f"{ variable_index + 1 } : " )
1434- break
1435- # Non-empty row in the sparse matrix: get non-missing data
1436- else :
1437- # Variable indices are not always sorted in `row.indices`
1438- # Khiops needs variable indices to be sorted
1439- sorted_indices = np .sort (row .nonzero ()[1 ], axis = - 1 , kind = "mergesort" )
1440-
1441- # Flatten row for Python < 3.9 scipy.sparse.lil_matrix whose API
1442- # is not homogeneous with other sparse matrices: it stores
1443- # opaque Python lists as elements
1444- # Thus:
1445- # - if isinstance(self.matrix, sp.lil_matrix) and Python 3.8, then
1446- # row.data is np.array([list([...])])
1447- # - else, row.data is np.array([...])
1448- # TODO: remove this flattening once Python 3.8 support is dropped
1449- sorted_data = np .fromiter (self ._flatten (row .data ), row .data .dtype )[
1450- sorted_indices .argsort ()
1451- ]
1452- for variable_index , variable_value in zip (sorted_indices , sorted_data ):
1453- stream .write (f"{ variable_index + 1 } :{ variable_value } " )
1444+ # Variable indices are not always sorted in `row.indices`
1445+ # Khiops needs variable indices to be sorted
1446+ sorted_indices = np .sort (row .nonzero ()[1 ], axis = - 1 , kind = "mergesort" )
1447+
1448+ # Flatten row for Python < 3.9 scipy.sparse.lil_matrix whose API
1449+ # is not homogeneous with other sparse matrices: it stores
1450+ # opaque Python lists as elements
1451+ # Thus:
1452+ # - if isinstance(self.matrix, sp.lil_matrix) and Python 3.8, then
1453+ # row.data is np.array([list([...])])
1454+ # - else, row.data is np.array([...])
1455+ # TODO: remove this flattening once Python 3.8 support is dropped
1456+ sorted_data = np .fromiter (self ._flatten (row .data ), row .data .dtype )[
1457+ sorted_indices .argsort ()
1458+ ]
1459+ for variable_index , variable_value in zip (sorted_indices , sorted_data ):
1460+ stream .write (f"{ variable_index + 1 } :{ variable_value } " )
14541461 stream .write ("\n " )
14551462
14561463 def create_table_file_for_khiops (self , output_dir , sort = True ):
0 commit comments