@@ -141,7 +141,7 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs):
141141
142142
143143def from_frame (df , sort_rows = False , sort_columns = False , parse_header = False , unfold_last_axis_name = False ,
144- fill_value = nan , meta = None , ** kwargs ):
144+ fill_value = nan , meta = None , cartesian_prod = True , ** kwargs ):
145145 r"""
146146 Converts Pandas DataFrame into LArray.
147147
@@ -151,9 +151,12 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
151151 Input dataframe. By default, name and labels of the last axis are defined by the name and labels of the
152152 columns Index of the dataframe unless argument unfold_last_axis_name is set to True.
153153 sort_rows : bool, optional
154- Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting). Defaults to False.
154+ Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting).
155+ Must be False if `cartesian_prod` is set to True.
156+ Defaults to False.
155157 sort_columns : bool, optional
156158 Whether or not to sort the columns alphabetically (sorting is more efficient than not sorting).
159+ Must be False if `cartesian_prod` is set to True.
157160 Defaults to False.
158161 parse_header : bool, optional
159162 Whether or not to parse columns labels. Pandas treats column labels as strings.
@@ -167,6 +170,11 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
167170 meta : list of pairs or dict or OrderedDict or Metadata, optional
168171 Metadata (title, description, author, creation_date, ...) associated with the array.
169172 Keys must be strings. Values must be of type string, int, float, date, time or datetime.
173+ cartesian_prod : bool, optional
174+ Whether or not to expand the dataframe to a cartesian product dataframe as needed by LArray.
175+ This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already
176+ well formed. If True, arguments `sort_rows` and `sort_columns` must be set to False.
177+ Defaults to True.
170178
171179 Returns
172180 -------
@@ -223,8 +231,14 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
223231 else :
224232 axes_names += [df .columns .name ]
225233
226- df , axes_labels = cartesian_product_df (df , sort_rows = sort_rows , sort_columns = sort_columns ,
227- fill_value = fill_value , ** kwargs )
234+ if cartesian_prod :
235+ df , axes_labels = cartesian_product_df (df , sort_rows = sort_rows , sort_columns = sort_columns ,
236+ fill_value = fill_value , ** kwargs )
237+ else :
238+ if sort_rows or sort_columns :
239+ raise ValueError ('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
240+ 'Please call the method sort_axes on the returned array to sort rows or columns' )
241+ axes_labels = index_to_labels (df .index , sort = False )
228242
229243 # Pandas treats column labels as column names (strings) so we need to convert them to values
230244 last_axis_labels = [parse (cell ) for cell in df .columns .values ] if parse_header else list (df .columns .values )
@@ -237,7 +251,8 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
237251 return LArray (data , axes , meta = meta )
238252
239253
240- def df_aslarray (df , sort_rows = False , sort_columns = False , raw = False , parse_header = True , wide = True , ** kwargs ):
254+ def df_aslarray (df , sort_rows = False , sort_columns = False , raw = False , parse_header = True , wide = True , cartesian_prod = True ,
255+ ** kwargs ):
241256 """
242257 Prepare Pandas DataFrame and then convert it into LArray.
243258
@@ -246,9 +261,12 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header
246261 df : Pandas DataFrame
247262 Input dataframe.
248263 sort_rows : bool, optional
249- Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting). Defaults to False.
264+ Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting).
265+ Must be False if `cartesian_prod` is set to True.
266+ Defaults to False.
250267 sort_columns : bool, optional
251268 Whether or not to sort the columns alphabetically (sorting is more efficient than not sorting).
269+ Must be False if `cartesian_prod` is set to True.
252270 Defaults to False.
253271 raw : bool, optional
254272 Whether or not to consider the input dataframe as a raw dataframe, i.e. read without index at all.
@@ -260,6 +278,11 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header
260278 Whether or not to assume the array is stored in "wide" format.
261279 If False, the array is assumed to be stored in "narrow" format: one column per axis plus one value column.
262280 Defaults to True.
281+ cartesian_prod : bool, optional
282+ Whether or not to expand the dataframe to a cartesian product dataframe as needed by LArray.
283+ This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already
284+ well formed. If True, arguments `sort_rows` and `sort_columns` must be set to False.
285+ Defaults to True.
263286
264287 Returns
265288 -------
@@ -306,7 +329,7 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header
306329 axes_names = [decode (name , 'utf8' ) for name in df .index .names ]
307330 unfold_last_axis_name = isinstance (axes_names [- 1 ], basestring ) and '\\ ' in axes_names [- 1 ]
308331 return from_frame (df , sort_rows = sort_rows , sort_columns = sort_columns , parse_header = parse_header ,
309- unfold_last_axis_name = unfold_last_axis_name , ** kwargs )
332+ unfold_last_axis_name = unfold_last_axis_name , cartesian_prod = cartesian_prod , ** kwargs )
310333
311334
312335# #################################### #
0 commit comments