diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index ff45d28802885..1f6b17dac6632 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -123,6 +123,6 @@ Bug Fixes - Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`) - +- Bug in ``DataFrame.corr()`` raises exception when computes Kendall correlation for DataFrames with boolean and not boolean columns (:issue:`11560`) - Bug in the link-time error caused by C ``inline`` functions on FreeBSD 10+ (with ``clang``) (:issue:`10510`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index de74b70cdfaac..538b9d3f8e712 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4411,16 +4411,21 @@ def corr(self, method='pearson', min_periods=1): else: if min_periods is None: min_periods = 1 - mat = mat.T + mat = com._ensure_float64(mat).T corrf = nanops.get_corr_func(method) K = len(cols) correl = np.empty((K, K), dtype=float) mask = np.isfinite(mat) for i, ac in enumerate(mat): for j, bc in enumerate(mat): + if i > j: + continue + valid = mask[i] & mask[j] if valid.sum() < min_periods: c = NA + elif i == j: + c = 1. elif not valid.all(): c = corrf(ac[valid], bc[valid]) else: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 1b57d53a548f3..b290b6b84ad18 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -8002,12 +8002,14 @@ def test_corr_nooverlap(self): # nothing in common for meth in ['pearson', 'kendall', 'spearman']: df = DataFrame({'A': [1, 1.5, 1, np.nan, np.nan, np.nan], - 'B': [np.nan, np.nan, np.nan, 1, 1.5, 1]}) + 'B': [np.nan, np.nan, np.nan, 1, 1.5, 1], + 'C': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]}) rs = df.corr(meth) self.assertTrue(isnull(rs.ix['A', 'B'])) self.assertTrue(isnull(rs.ix['B', 'A'])) self.assertEqual(rs.ix['A', 'A'], 1) self.assertEqual(rs.ix['B', 'B'], 1) + self.assertTrue(isnull(rs.ix['C', 'C'])) def test_corr_constant(self): tm._skip_if_no_scipy() @@ -8028,6 +8030,18 @@ def test_corr_int(self): df3.cov() df3.corr() + def test_corr_int_and_boolean(self): + tm._skip_if_no_scipy() + + # when dtypes of pandas series are different + # then ndarray will have dtype=object, + # so it need to be properly handled + df = DataFrame({"a": [True, False], "b": [1, 0]}) + + expected = DataFrame(np.ones((2, 2)), index=['a', 'b'], columns=['a', 'b']) + for meth in ['pearson', 'kendall', 'spearman']: + assert_frame_equal(df.corr(meth), expected) + def test_cov(self): # min_periods no NAs (corner case) expected = self.frame.cov()