@@ -141,7 +141,7 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs):
141
141
142
142
143
143
def from_frame (df , sort_rows = False , sort_columns = False , parse_header = False , unfold_last_axis_name = False ,
144
- fill_value = nan , meta = None , ** kwargs ):
144
+ fill_value = nan , meta = None , cartesian_prod = True , ** kwargs ):
145
145
r"""
146
146
Converts Pandas DataFrame into LArray.
147
147
@@ -151,9 +151,12 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
151
151
Input dataframe. By default, name and labels of the last axis are defined by the name and labels of the
152
152
columns Index of the dataframe unless argument unfold_last_axis_name is set to True.
153
153
sort_rows : bool, optional
154
- Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting). Defaults to False.
154
+ Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting).
155
+ Must be False if `cartesian_prod` is set to True.
156
+ Defaults to False.
155
157
sort_columns : bool, optional
156
158
Whether or not to sort the columns alphabetically (sorting is more efficient than not sorting).
159
+ Must be False if `cartesian_prod` is set to True.
157
160
Defaults to False.
158
161
parse_header : bool, optional
159
162
Whether or not to parse columns labels. Pandas treats column labels as strings.
@@ -167,6 +170,11 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
167
170
meta : list of pairs or dict or OrderedDict or Metadata, optional
168
171
Metadata (title, description, author, creation_date, ...) associated with the array.
169
172
Keys must be strings. Values must be of type string, int, float, date, time or datetime.
173
+ cartesian_prod : bool, optional
174
+ Whether or not to expand the dataframe to a cartesian product dataframe as needed by LArray.
175
+ This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already
176
+ well formed. If True, arguments `sort_rows` and `sort_columns` must be set to False.
177
+ Defaults to True.
170
178
171
179
Returns
172
180
-------
@@ -223,8 +231,14 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
223
231
else :
224
232
axes_names += [df .columns .name ]
225
233
226
- df , axes_labels = cartesian_product_df (df , sort_rows = sort_rows , sort_columns = sort_columns ,
227
- fill_value = fill_value , ** kwargs )
234
+ if cartesian_prod :
235
+ df , axes_labels = cartesian_product_df (df , sort_rows = sort_rows , sort_columns = sort_columns ,
236
+ fill_value = fill_value , ** kwargs )
237
+ else :
238
+ if sort_rows or sort_columns :
239
+ raise ValueError ('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
240
+ 'Please call the method sort_axes on the returned array to sort rows or columns' )
241
+ axes_labels = index_to_labels (df .index , sort = False )
228
242
229
243
# Pandas treats column labels as column names (strings) so we need to convert them to values
230
244
last_axis_labels = [parse (cell ) for cell in df .columns .values ] if parse_header else list (df .columns .values )
@@ -237,7 +251,8 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
237
251
return LArray (data , axes , meta = meta )
238
252
239
253
240
- def df_aslarray (df , sort_rows = False , sort_columns = False , raw = False , parse_header = True , wide = True , ** kwargs ):
254
+ def df_aslarray (df , sort_rows = False , sort_columns = False , raw = False , parse_header = True , wide = True , cartesian_prod = True ,
255
+ ** kwargs ):
241
256
"""
242
257
Prepare Pandas DataFrame and then convert it into LArray.
243
258
@@ -246,9 +261,12 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header
246
261
df : Pandas DataFrame
247
262
Input dataframe.
248
263
sort_rows : bool, optional
249
- Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting). Defaults to False.
264
+ Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting).
265
+ Must be False if `cartesian_prod` is set to True.
266
+ Defaults to False.
250
267
sort_columns : bool, optional
251
268
Whether or not to sort the columns alphabetically (sorting is more efficient than not sorting).
269
+ Must be False if `cartesian_prod` is set to True.
252
270
Defaults to False.
253
271
raw : bool, optional
254
272
Whether or not to consider the input dataframe as a raw dataframe, i.e. read without index at all.
@@ -260,6 +278,11 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header
260
278
Whether or not to assume the array is stored in "wide" format.
261
279
If False, the array is assumed to be stored in "narrow" format: one column per axis plus one value column.
262
280
Defaults to True.
281
+ cartesian_prod : bool, optional
282
+ Whether or not to expand the dataframe to a cartesian product dataframe as needed by LArray.
283
+ This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already
284
+ well formed. If True, arguments `sort_rows` and `sort_columns` must be set to False.
285
+ Defaults to True.
263
286
264
287
Returns
265
288
-------
@@ -306,7 +329,7 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header
306
329
axes_names = [decode (name , 'utf8' ) for name in df .index .names ]
307
330
unfold_last_axis_name = isinstance (axes_names [- 1 ], basestring ) and '\\ ' in axes_names [- 1 ]
308
331
return from_frame (df , sort_rows = sort_rows , sort_columns = sort_columns , parse_header = parse_header ,
309
- unfold_last_axis_name = unfold_last_axis_name , ** kwargs )
332
+ unfold_last_axis_name = unfold_last_axis_name , cartesian_prod = cartesian_prod , ** kwargs )
310
333
311
334
312
335
# #################################### #
0 commit comments