From 192d20dbf95a5f793bc004bb39b705a5f11c6342 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 15 Feb 2023 19:44:19 -0500 Subject: [PATCH 1/5] fix Series.corr/cov raising with masked dtype --- pandas/core/nanops.py | 6 ++++++ pandas/core/series.py | 13 +++++++------ pandas/tests/series/methods/test_cov_corr.py | 12 ++++++++---- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 41ed9485643e7..4846685ee20ab 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1605,6 +1605,9 @@ def nancorr( if len(a) < min_periods: return np.nan + a = _ensure_numeric(a) + b = _ensure_numeric(b) + f = get_corr_func(method) return f(a, b) @@ -1663,6 +1666,9 @@ def nancov( if len(a) < min_periods: return np.nan + a = _ensure_numeric(a) + b = _ensure_numeric(b) + return np.cov(a, b, ddof=ddof)[0, 1] diff --git a/pandas/core/series.py b/pandas/core/series.py index e4c7c4d3b3d73..6377b13ad7efd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2677,10 +2677,11 @@ def corr( if len(this) == 0: return np.nan + this = np.asarray(this.values) + other = np.asarray(other.values) + if method in ["pearson", "spearman", "kendall"] or callable(method): - return nanops.nancorr( - this.values, other.values, method=method, min_periods=min_periods - ) + return nanops.nancorr(this, other, method=method, min_periods=min_periods) raise ValueError( "method must be either 'pearson', " @@ -2732,9 +2733,9 @@ def cov( this, other = self.align(other, join="inner", copy=False) if len(this) == 0: return np.nan - return nanops.nancov( - this.values, other.values, min_periods=min_periods, ddof=ddof - ) + this = np.asarray(this.values) + other = np.asarray(other.values) + return nanops.nancov(this, other, min_periods=min_periods, ddof=ddof) @doc( klass="Series", diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py index be3483c773143..6ab255cfa3d25 100644 --- a/pandas/tests/series/methods/test_cov_corr.py +++ b/pandas/tests/series/methods/test_cov_corr.py @@ -42,13 +42,14 @@ def test_cov(self, datetime_series): assert isna(ts1.cov(ts2, min_periods=12)) @pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3]) - def test_cov_ddof(self, test_ddof): + @pytest.mark.parametrize("dtype", ["float64", "Float64"]) + def test_cov_ddof(self, test_ddof, dtype): # GH#34611 np_array1 = np.random.rand(10) np_array2 = np.random.rand(10) - s1 = Series(np_array1) - s2 = Series(np_array2) + s1 = Series(np_array1, dtype=dtype) + s2 = Series(np_array2, dtype=dtype) result = s1.cov(s2, ddof=test_ddof) expected = np.cov(np_array1, np_array2, ddof=test_ddof)[0][1] @@ -57,9 +58,12 @@ def test_cov_ddof(self, test_ddof): class TestSeriesCorr: @td.skip_if_no_scipy - def test_corr(self, datetime_series): + @pytest.mark.parametrize("dtype", ["float64", "Float64"]) + def test_corr(self, datetime_series, dtype): from scipy import stats + datetime_series = datetime_series.astype(dtype) + # full overlap tm.assert_almost_equal(datetime_series.corr(datetime_series), 1) From f9fb8728655be20840c4e9726822e22179c9bc85 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 15 Feb 2023 19:46:12 -0500 Subject: [PATCH 2/5] gh refs --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 07c7120a8f6c1..261b73fe4b348 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1227,6 +1227,7 @@ Numeric - Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`) - Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`) - Bug in :meth:`Series.__add__` casting to object for list and masked :class:`Series` (:issue:`22962`) +- Bug in :meth:`Series.corr` and :meth:`Series.cov` raising ``AttributeError`` for masked dtypes (:issue:`51422`) - Bug in :meth:`DataFrame.query` with ``engine="numexpr"`` and column names are ``min`` or ``max`` would raise a ``TypeError`` (:issue:`50937`) Conversion From caab0d733beb3c97c2fba2be930207c2a813514a Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 15 Feb 2023 20:56:44 -0500 Subject: [PATCH 3/5] update error message --- pandas/tests/frame/methods/test_cov_corr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index c4f5b60918e84..250b07f8248ef 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -356,7 +356,7 @@ def test_corrwith_mixed_dtypes(self, numeric_only): else: with pytest.raises( TypeError, - match=r"unsupported operand type\(s\) for /: 'str' and 'int'", + match=r"Could not convert \['a' 'b' 'c' 'd'\] to numeric", ): df.corrwith(s, numeric_only=numeric_only) From 14e32baad3caab26034f35269bbc79fdc0ba81c6 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Thu, 16 Feb 2023 05:50:01 -0500 Subject: [PATCH 4/5] mypy --- pandas/core/series.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6377b13ad7efd..e5c58a03e0c61 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2677,11 +2677,13 @@ def corr( if len(this) == 0: return np.nan - this = np.asarray(this.values) - other = np.asarray(other.values) + this_values = np.asarray(this.values) + other_values = np.asarray(other.values) if method in ["pearson", "spearman", "kendall"] or callable(method): - return nanops.nancorr(this, other, method=method, min_periods=min_periods) + return nanops.nancorr( + this_values, other_values, method=method, min_periods=min_periods + ) raise ValueError( "method must be either 'pearson', " @@ -2733,9 +2735,11 @@ def cov( this, other = self.align(other, join="inner", copy=False) if len(this) == 0: return np.nan - this = np.asarray(this.values) - other = np.asarray(other.values) - return nanops.nancov(this, other, min_periods=min_periods, ddof=ddof) + this_values = np.asarray(this.values) + other_values = np.asarray(other.values) + return nanops.nancov( + this_values, other_values, min_periods=min_periods, ddof=ddof + ) @doc( klass="Series", From 1a94a2ce472a06b645faa7eeace8c37e93c2662d Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 22 Feb 2023 17:50:42 -0500 Subject: [PATCH 5/5] move whatsnew, use ._values --- doc/source/whatsnew/v2.0.0.rst | 1 - doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/series.py | 8 ++++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 75e1cd4c36a87..eff79dda821a0 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1233,7 +1233,6 @@ Numeric - Bug in arithmetic operations on :class:`Series` not propagating mask when combining masked dtypes and numpy dtypes (:issue:`45810`, :issue:`42630`) - Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`) - Bug in :meth:`Series.__add__` casting to object for list and masked :class:`Series` (:issue:`22962`) -- Bug in :meth:`Series.corr` and :meth:`Series.cov` raising ``AttributeError`` for masked dtypes (:issue:`51422`) - Bug in :meth:`~arrays.ArrowExtensionArray.mode` where ``dropna=False`` was not respected when there was ``NA`` values (:issue:`50982`) - Bug in :meth:`DataFrame.query` with ``engine="numexpr"`` and column names are ``min`` or ``max`` would raise a ``TypeError`` (:issue:`50937`) - Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with tz-aware data containing ``pd.NaT`` and ``axis=1`` would return incorrect results (:issue:`51242`) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index aeaafbc4c125d..7a72547156779 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -131,7 +131,7 @@ Timezones Numeric ^^^^^^^ -- +- Bug in :meth:`Series.corr` and :meth:`Series.cov` raising ``AttributeError`` for masked dtypes (:issue:`51422`) - Conversion diff --git a/pandas/core/series.py b/pandas/core/series.py index 1ddd50e50df28..badde08c3820d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2643,8 +2643,8 @@ def corr( if len(this) == 0: return np.nan - this_values = np.asarray(this.values) - other_values = np.asarray(other.values) + this_values = np.asarray(this._values) + other_values = np.asarray(other._values) if method in ["pearson", "spearman", "kendall"] or callable(method): return nanops.nancorr( @@ -2701,8 +2701,8 @@ def cov( this, other = self.align(other, join="inner", copy=False) if len(this) == 0: return np.nan - this_values = np.asarray(this.values) - other_values = np.asarray(other.values) + this_values = np.asarray(this._values) + other_values = np.asarray(other._values) return nanops.nancov( this_values, other_values, min_periods=min_periods, ddof=ddof )