Closed
Description
_flex_binary_moment() sorts the columns alphabetically. I think it should preserve the order.
In [23]: df = DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=['B','A'])
In [24]: expanding_corr(df, df, pairwise=True)[3]
Out[24]:
A B
A 1.000000 -0.670166
B -0.670166 1.000000
(I originally mentioned this in #7514, but then broke it out as a separate issue.)
Also, _flex_binary_moment() doesn't work if multiple columns have the same name. While it's not obvious to me what the outcome should be for two distinct DataFrames with pairwise=False, there's no reason things shouldn't work for all other cases (i.e. a DataFrame with itself with pairwise=True or False; two distinct DataFrames with pairwise=True; and a DataFrame with a Series).
In [13]: df = DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=['C','C'])
In [14]: expanding_corr(df, df, pairwise=True)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-14-f20d94c2d8dc> in <module>()
----> 1 expanding_corr(df, df, pairwise=True)
C:\Python34\lib\site-packages\pandas\stats\moments.py in expanding_corr(arg1, arg2, min_periods, freq, center, pairwise)
940 return rolling_corr(arg1, arg2, window,
941 min_periods=min_periods,
--> 942 freq=freq, center=center, pairwise=pairwise)
943
944
C:\Python34\lib\site-packages\pandas\stats\moments.py in rolling_corr(arg1, arg2, window, min_periods, freq, center, pairwise, how)
246 return num / den
247
--> 248 return _flex_binary_moment(arg1, arg2, _get_corr, pairwise=bool(pairwise))
249
250
C:\Python34\lib\site-packages\pandas\stats\moments.py in _flex_binary_moment(arg1, arg2, f, pairwise)
278 results[k1][k2] = results[k2][k1]
279 else:
--> 280 results[k1][k2] = f(*_prep_binary(arg1[k1], arg2[k2]))
281 return Panel.from_dict(results).swapaxes('items', 'major')
282 else:
C:\Python34\lib\site-packages\pandas\stats\moments.py in _get_corr(a, b)
239 adj_window = min(window, len(a), len(b))
240 num = rolling_cov(a, b, adj_window, min_periods, freq=freq,
--> 241 center=center)
242 den = (rolling_std(a, adj_window, min_periods, freq=freq,
243 center=center) *
C:\Python34\lib\site-packages\pandas\stats\moments.py in rolling_cov(arg1, arg2, window, min_periods, freq, center, pairwise, how)
217 bias_adj = count / (count - 1)
218 return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
--> 219 rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise))
220 return rs
221
C:\Python34\lib\site-packages\pandas\stats\moments.py in _flex_binary_moment(arg1, arg2, f, pairwise)
290 results[col] = f(X[col], Y)
291
--> 292 return DataFrame(results, index=X.index, columns=res_columns)
293 else:
294 return _flex_binary_moment(arg2, arg1, f)
C:\Python34\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
201 dtype=dtype, copy=copy)
202 elif isinstance(data, dict):
--> 203 mgr = self._init_dict(data, index, columns, dtype=dtype)
204 elif isinstance(data, ma.MaskedArray):
205 import numpy.ma.mrecords as mrecords
C:\Python34\lib\site-packages\pandas\core\frame.py in _init_dict(self, data, index, columns, dtype)
325
326 return _arrays_to_mgr(arrays, data_names, index, columns,
--> 327 dtype=dtype)
328
329 def _init_ndarray(self, values, index, columns, dtype=None,
C:\Python34\lib\site-packages\pandas\core\frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
4623
4624 # don't force copy because getting jammed in an ndarray anyway
-> 4625 arrays = _homogenize(arrays, index, dtype)
4626
4627 # from BlockManager perspective
C:\Python34\lib\site-packages\pandas\core\frame.py in _homogenize(data, index, dtype)
4932
4933 v = _sanitize_array(v, index, dtype=dtype, copy=False,
-> 4934 raise_cast_failure=False)
4935
4936 homogenized.append(v)
C:\Python34\lib\site-packages\pandas\core\series.py in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
2507 raise Exception('Data must be 1-dimensional')
2508 else:
-> 2509 subarr = _asarray_tuplesafe(data, dtype=dtype)
2510
2511 # This is to prevent mixed-type Series getting all casted to
C:\Python34\lib\site-packages\pandas\core\common.py in _asarray_tuplesafe(values, dtype)
2191 except ValueError:
2192 # we have a list-of-list
-> 2193 result[:] = [tuple(x) for x in values]
2194
2195 return result
ValueError: cannot copy sequence with size 2 to array axis with dimension 4