From b1fb8549d46a5ad1923a412dc177706d3bc36a4d Mon Sep 17 00:00:00 2001 From: taytzehao Date: Fri, 16 Apr 2021 12:11:50 +0800 Subject: [PATCH 01/21] Updated qcut for Float64DType --- pandas/core/reshape/tile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 41e1ff41d9ba2..cba1a15934bb8 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -24,7 +24,7 @@ is_datetime_or_timedelta_dtype, is_extension_array_dtype, is_integer, - is_integer_dtype, + is_numeric_dtype, is_list_like, is_scalar, is_timedelta64_dtype, @@ -488,7 +488,7 @@ def _coerce_to_type(x): # Will properly support in the future. # https://github.com/pandas-dev/pandas/pull/31290 # https://github.com/pandas-dev/pandas/issues/31389 - elif is_extension_array_dtype(x.dtype) and is_integer_dtype(x.dtype): + elif is_extension_array_dtype(x.dtype) and is_numeric_dtype(x.dtype): x = x.to_numpy(dtype=np.float64, na_value=np.nan) if dtype is not None: From 7af63a220497541d05c51f7d3f92d541acf3a130 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Fri, 16 Apr 2021 09:18:37 +0000 Subject: [PATCH 02/21] Fixes from pre-commit [automated commit] --- pandas/core/reshape/tile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index cba1a15934bb8..7b9c3883d74e3 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -24,8 +24,8 @@ is_datetime_or_timedelta_dtype, is_extension_array_dtype, is_integer, - is_numeric_dtype, is_list_like, + is_numeric_dtype, is_scalar, is_timedelta64_dtype, ) From c4259aa25ebac57b4774bbfd4284f34640d70d29 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Sun, 18 Apr 2021 16:23:11 +0800 Subject: [PATCH 03/21] Added test and documentation for qcut Float64DType support --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/tests/reshape/test_qcut.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 2b0b62ab7facf..690557421f024 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -663,7 +663,7 @@ Conversion - Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`) - Bug in :meth:`StringArray.astype` falling back to numpy and raising when converting to ``dtype='categorical'`` (:issue:`40450`) - Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`) -- +- Bug in :func:`_coerce_to_type` failing to convert ``Float64DType`` input into numpy array (:issue:`40730`) Strings ^^^^^^^ diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index 7996c15ae8e64..ae19ceb71e3ac 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -301,3 +301,19 @@ def test_qcut_nullable_integer(q, any_nullable_int_dtype): expected = qcut(arr.astype(float), q) tm.assert_categorical_equal(result, expected) + + +@pytest.mark.parametrize("Data_type,Data_type_string", + [ + (pd.Float64Dtype(),"Float64Dtype"), + (pd.Int64Dtype(),"Int64Dtype") + ] + ) +def test_qcut_numeric_dtype(Data_type,Data_type_string): + series = pd.Series([1.0, 2.0, 3.0, 4.0], dtype=Data_type) + + try: + pd.qcut(series,2) + except: + Fail_string=Data_type_string+" is not supported" + pytest.fail(msg=Fail_string ) \ No newline at end of file From 29fddabedff6bfcd5c409160a57cabd9010db719 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Sun, 18 Apr 2021 16:37:56 +0800 Subject: [PATCH 04/21] Updated qcut test formatting --- pandas/tests/reshape/test_qcut.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index ae19ceb71e3ac..c03d0ff3bc9e3 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -303,17 +303,20 @@ def test_qcut_nullable_integer(q, any_nullable_int_dtype): tm.assert_categorical_equal(result, expected) -@pytest.mark.parametrize("Data_type,Data_type_string", - [ - (pd.Float64Dtype(),"Float64Dtype"), - (pd.Int64Dtype(),"Int64Dtype") - ] - ) +@pytest.mark.parametrize( + "Data_type,Data_type_string", + [ + (pd.Float64Dtype(),"Float64Dtype"), + (pd.Int64Dtype(),"Int64Dtype") + ] +) def test_qcut_numeric_dtype(Data_type,Data_type_string): series = pd.Series([1.0, 2.0, 3.0, 4.0], dtype=Data_type) try: pd.qcut(series,2) except: - Fail_string=Data_type_string+" is not supported" - pytest.fail(msg=Fail_string ) \ No newline at end of file + Fail_string = Data_type_string + " is not supported" + pytest.fail( msg = Fail_string ) + + \ No newline at end of file From e0f425301b57a50beff816505a5d9f689a666ea0 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 14:34:22 +0800 Subject: [PATCH 05/21] Update sparse array minmax method --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/arrays/sparse/array.py | 20 ++++++++++++++++++++ pandas/tests/arrays/sparse/test_array.py | 14 ++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c05711ef839c8..d8aeba6e6b28f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -874,7 +874,7 @@ Sparse - Bug in :meth:`DataFrame.sparse.to_coo` raising ``KeyError`` with columns that are a numeric :class:`Index` without a 0 (:issue:`18414`) - Bug in :meth:`SparseArray.astype` with ``copy=False`` producing incorrect results when going from integer dtype to floating dtype (:issue:`34456`) -- +- Bug in :meth:`max` and `min` for SparseArrat type do not exist (:issue:`40921`) ExtensionArray ^^^^^^^^^^^^^^ diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 37898ce682e4f..8894ee2a05b3d 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1391,6 +1391,26 @@ def mean(self, axis=0, *args, **kwargs): else: nsparse = self.sp_index.ngaps return (sp_sum + self.fill_value * nsparse) / (ct + nsparse) + + + def max(self, axis=0, *args, **kwargs): + + nv.validate_max(args, kwargs) + + if self.sp_index.ngaps > 0 and np.all(self._valid_sp_values < 0): + return 0 + else: + return np.amax(self._valid_sp_values, axis) + + + def min(self, axis=0, *args, **kwargs): + + nv.validate_min(args, kwargs) + + if self.sp_index.ngaps > 0 and np.all(self._valid_sp_values > 0): + return 0 + else: + return np.amin(self._valid_sp_values, axis) # ------------------------------------------------------------------------ # Ufuncs diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index e073871f96bb4..35ce3caf96414 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1311,3 +1311,17 @@ def test_dropna(fill_value): df = pd.DataFrame({"a": [0, 1], "b": arr}) expected_df = pd.DataFrame({"a": [1], "b": exp}, index=pd.Int64Index([1])) tm.assert_equal(df.dropna(), expected_df) + + +def test_maxmin(): + data = np.arange(10).astype(float) + max_out = SparseArray(data).max() + min_out = SparseArray(data).min() + assert max_out == 9 + assert min_out == 0 + + data = data*(-1) + max_out = SparseArray(data).max() + min_out = SparseArray(data).min() + assert max_out == 0 + assert min_out == -9 From 7b3960703758d9cff433204b9c26b0256084eb6b Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 14:43:24 +0800 Subject: [PATCH 06/21] Update sparse array minmax method 2 --- doc/source/whatsnew/v1.3.0.rst | 1 - pandas/core/reshape/tile.py | 4 ++-- pandas/tests/reshape/test_qcut.py | 19 +------------------ 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index d8aeba6e6b28f..6416cd7f12ec3 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -706,7 +706,6 @@ Conversion - Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`) - Bug in :meth:`StringArray.astype` falling back to numpy and raising when converting to ``dtype='categorical'`` (:issue:`40450`) - Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`) -- Bug in :func:`_coerce_to_type` failing to convert ``Float64DType`` input into numpy array (:issue:`40730`) Strings ^^^^^^^ diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 7b9c3883d74e3..41e1ff41d9ba2 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -24,8 +24,8 @@ is_datetime_or_timedelta_dtype, is_extension_array_dtype, is_integer, + is_integer_dtype, is_list_like, - is_numeric_dtype, is_scalar, is_timedelta64_dtype, ) @@ -488,7 +488,7 @@ def _coerce_to_type(x): # Will properly support in the future. # https://github.com/pandas-dev/pandas/pull/31290 # https://github.com/pandas-dev/pandas/issues/31389 - elif is_extension_array_dtype(x.dtype) and is_numeric_dtype(x.dtype): + elif is_extension_array_dtype(x.dtype) and is_integer_dtype(x.dtype): x = x.to_numpy(dtype=np.float64, na_value=np.nan) if dtype is not None: diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index c03d0ff3bc9e3..20aa51ce39533 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -301,22 +301,5 @@ def test_qcut_nullable_integer(q, any_nullable_int_dtype): expected = qcut(arr.astype(float), q) tm.assert_categorical_equal(result, expected) - - -@pytest.mark.parametrize( - "Data_type,Data_type_string", - [ - (pd.Float64Dtype(),"Float64Dtype"), - (pd.Int64Dtype(),"Int64Dtype") - ] -) -def test_qcut_numeric_dtype(Data_type,Data_type_string): - series = pd.Series([1.0, 2.0, 3.0, 4.0], dtype=Data_type) - - try: - pd.qcut(series,2) - except: - Fail_string = Data_type_string + " is not supported" - pytest.fail( msg = Fail_string ) - + \ No newline at end of file From 3b22ab25e04968cd18e957af89bdbe1c73fde01e Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 14:45:34 +0800 Subject: [PATCH 07/21] Update sparse array minmax method 3 --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/tests/reshape/test_qcut.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 6416cd7f12ec3..4e04de2e12d20 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -706,6 +706,7 @@ Conversion - Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`) - Bug in :meth:`StringArray.astype` falling back to numpy and raising when converting to ``dtype='categorical'`` (:issue:`40450`) - Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`) +- - Strings ^^^^^^^ diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index 20aa51ce39533..b3184fbcfdf9d 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -300,6 +300,4 @@ def test_qcut_nullable_integer(q, any_nullable_int_dtype): result = qcut(arr, q) expected = qcut(arr.astype(float), q) - tm.assert_categorical_equal(result, expected) - - \ No newline at end of file + tm.assert_categorical_equal(result, expected) \ No newline at end of file From 02d7f323438fccda187d3cc07e3e150d021dfc5b Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 14:46:46 +0800 Subject: [PATCH 08/21] Update sparse array minmax method 4 --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 4e04de2e12d20..a3032667e4579 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -706,7 +706,7 @@ Conversion - Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`) - Bug in :meth:`StringArray.astype` falling back to numpy and raising when converting to ``dtype='categorical'`` (:issue:`40450`) - Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`) -- - +-- Strings ^^^^^^^ From c6fe7e213a08a06385e839f9dfdf7fc730f2feea Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 14:49:57 +0800 Subject: [PATCH 09/21] Update sparse array minmax method 5 --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index a3032667e4579..bb9d013407367 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -706,7 +706,7 @@ Conversion - Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`) - Bug in :meth:`StringArray.astype` falling back to numpy and raising when converting to ``dtype='categorical'`` (:issue:`40450`) - Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`) --- +- Strings ^^^^^^^ From dddb6f61c42f4ecbfc698bd7443c24f5d549b9f3 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 14:51:19 +0800 Subject: [PATCH 10/21] Update sparse array minmax method 6 --- pandas/tests/reshape/test_qcut.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index b3184fbcfdf9d..864f62af56c2e 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -300,4 +300,5 @@ def test_qcut_nullable_integer(q, any_nullable_int_dtype): result = qcut(arr, q) expected = qcut(arr.astype(float), q) - tm.assert_categorical_equal(result, expected) \ No newline at end of file + tm.assert_categorical_equal(result, expected) + \ No newline at end of file From 9c03b5265dd531484b75b9912caf379c2247ef11 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 15:02:23 +0800 Subject: [PATCH 11/21] Update sparse array minmax method 6 --- pandas/tests/reshape/test_qcut.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index 864f62af56c2e..7212d3794510f 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -300,5 +300,4 @@ def test_qcut_nullable_integer(q, any_nullable_int_dtype): result = qcut(arr, q) expected = qcut(arr.astype(float), q) - tm.assert_categorical_equal(result, expected) - \ No newline at end of file + tm.assert_categorical_equal(result, expected) From 227c28207fbc383475ade0f6d9584853364a109e Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 15:04:01 +0800 Subject: [PATCH 12/21] Update sparse array minmax method 7 --- pandas/tests/reshape/test_qcut.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index 7212d3794510f..7996c15ae8e64 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -300,4 +300,4 @@ def test_qcut_nullable_integer(q, any_nullable_int_dtype): result = qcut(arr, q) expected = qcut(arr.astype(float), q) - tm.assert_categorical_equal(result, expected) + tm.assert_categorical_equal(result, expected) From 5dabba3b7572d85fcc5211b7c8cdadc40140cc8f Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 15:21:10 +0800 Subject: [PATCH 13/21] Update sparse array minmax method 8 --- pandas/core/arrays/sparse/array.py | 6 ++---- pandas/tests/arrays/sparse/test_array.py | 6 +++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 8894ee2a05b3d..8145e66176314 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1391,10 +1391,9 @@ def mean(self, axis=0, *args, **kwargs): else: nsparse = self.sp_index.ngaps return (sp_sum + self.fill_value * nsparse) / (ct + nsparse) - - - def max(self, axis=0, *args, **kwargs): + + def max(self, axis=0, *args, **kwargs): nv.validate_max(args, kwargs) if self.sp_index.ngaps > 0 and np.all(self._valid_sp_values < 0): @@ -1404,7 +1403,6 @@ def max(self, axis=0, *args, **kwargs): def min(self, axis=0, *args, **kwargs): - nv.validate_min(args, kwargs) if self.sp_index.ngaps > 0 and np.all(self._valid_sp_values > 0): diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 35ce3caf96414..c2c47daa374c5 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1311,8 +1311,8 @@ def test_dropna(fill_value): df = pd.DataFrame({"a": [0, 1], "b": arr}) expected_df = pd.DataFrame({"a": [1], "b": exp}, index=pd.Int64Index([1])) tm.assert_equal(df.dropna(), expected_df) - - + + def test_maxmin(): data = np.arange(10).astype(float) max_out = SparseArray(data).max() @@ -1320,7 +1320,7 @@ def test_maxmin(): assert max_out == 9 assert min_out == 0 - data = data*(-1) + data = data * (-1) max_out = SparseArray(data).max() min_out = SparseArray(data).min() assert max_out == 0 From 85a99b4a16c3851141d4bea6cac74a7e27f5d6b1 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Tue, 27 Apr 2021 03:01:55 +0800 Subject: [PATCH 14/21] Added test case and function to support NaN --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/arrays/sparse/array.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index bb9d013407367..55d7864c8a951 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -874,7 +874,7 @@ Sparse - Bug in :meth:`DataFrame.sparse.to_coo` raising ``KeyError`` with columns that are a numeric :class:`Index` without a 0 (:issue:`18414`) - Bug in :meth:`SparseArray.astype` with ``copy=False`` producing incorrect results when going from integer dtype to floating dtype (:issue:`34456`) -- Bug in :meth:`max` and `min` for SparseArrat type do not exist (:issue:`40921`) +- Bug in :meth:`max` and `min` for :class:`SparseArray` type do not exist (:issue:`40921`) ExtensionArray ^^^^^^^^^^^^^^ diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 8145e66176314..c866ef070719d 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1392,20 +1392,26 @@ def mean(self, axis=0, *args, **kwargs): nsparse = self.sp_index.ngaps return (sp_sum + self.fill_value * nsparse) / (ct + nsparse) - def max(self, axis=0, *args, **kwargs): nv.validate_max(args, kwargs) if self.sp_index.ngaps > 0 and np.all(self._valid_sp_values < 0): + + if self.size>0 and self._valid_sp_values.size==0: + return np.nan + return 0 else: return np.amax(self._valid_sp_values, axis) - def min(self, axis=0, *args, **kwargs): nv.validate_min(args, kwargs) if self.sp_index.ngaps > 0 and np.all(self._valid_sp_values > 0): + + if self.size>0 and self._valid_sp_values.size==0: + return np.nan + return 0 else: return np.amin(self._valid_sp_values, axis) From 0e6a3842f2a0ae5d8a7b3f571276ff2068ccbc03 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Tue, 27 Apr 2021 14:26:58 +0800 Subject: [PATCH 15/21] Resolve precommit issue --- pandas/core/arrays/sparse/array.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index c866ef070719d..06000d133f526 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1396,10 +1396,10 @@ def max(self, axis=0, *args, **kwargs): nv.validate_max(args, kwargs) if self.sp_index.ngaps > 0 and np.all(self._valid_sp_values < 0): - - if self.size>0 and self._valid_sp_values.size==0: + + if self.size > 0 and self._valid_sp_values.size == 0: return np.nan - + return 0 else: return np.amax(self._valid_sp_values, axis) @@ -1408,10 +1408,10 @@ def min(self, axis=0, *args, **kwargs): nv.validate_min(args, kwargs) if self.sp_index.ngaps > 0 and np.all(self._valid_sp_values > 0): - - if self.size>0 and self._valid_sp_values.size==0: + + if self.size > 0 and self._valid_sp_values.size == 0: return np.nan - + return 0 else: return np.amin(self._valid_sp_values, axis) From 10c40e54a3f011d8e527a79d1f6aad774c1acda1 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Tue, 27 Apr 2021 14:56:59 +0800 Subject: [PATCH 16/21] Resolve precommit issue rst precommit issue resolved --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 0325fa1734e3b..3a1dd04b6c6e2 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -877,7 +877,7 @@ Sparse - Bug in :meth:`DataFrame.sparse.to_coo` raising ``KeyError`` with columns that are a numeric :class:`Index` without a 0 (:issue:`18414`) - Bug in :meth:`SparseArray.astype` with ``copy=False`` producing incorrect results when going from integer dtype to floating dtype (:issue:`34456`) -- Bug in :meth:`max` and `min` for :class:`SparseArray` type do not exist (:issue:`40921`) +- Bug in :class:`SparseArray` type as :meth:`max` and :meth:`min` do not exist (:issue:`40921`) ExtensionArray ^^^^^^^^^^^^^^ From 13545b47c14c10d1f7b7f4a33f03f93f77fe89cf Mon Sep 17 00:00:00 2001 From: taytzehao Date: Tue, 27 Apr 2021 16:12:26 +0800 Subject: [PATCH 17/21] Add test coverage percentage --- pandas/tests/arrays/sparse/test_array.py | 32 +++++++++++++++--------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index c2c47daa374c5..a96e5b07b7f7e 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1313,15 +1313,23 @@ def test_dropna(fill_value): tm.assert_equal(df.dropna(), expected_df) -def test_maxmin(): - data = np.arange(10).astype(float) - max_out = SparseArray(data).max() - min_out = SparseArray(data).min() - assert max_out == 9 - assert min_out == 0 - - data = data * (-1) - max_out = SparseArray(data).max() - min_out = SparseArray(data).min() - assert max_out == 0 - assert min_out == -9 +class TestMinMax: + plain_data = np.arange(5).astype(float) + data_neg = plain_data * (-1) + data_NaN = SparseArray(np.array([0, 1, 2, np.nan, 4])) + data_all_NaN = SparseArray(np.array([np.nan, np.nan, np.nan, np.nan, np.nan])) + + @pytest.mark.parametrize( + "raw_data,max_expected,min_expected", + [ + (plain_data, [4], [0]), + (data_neg, [0], [-4]), + (data_NaN, [4], [0]), + (data_all_NaN, [np.nan], [np.nan]), + ], + ) + def test_maxmin(self, raw_data, max_expected, min_expected): + max_result = SparseArray(raw_data).max() + min_result = SparseArray(raw_data).min() + assert max_result in max_expected + assert min_result in min_expected From ff8337331bf9056fc66490b8a66da0a7dd375dfe Mon Sep 17 00:00:00 2001 From: taytzehao Date: Wed, 28 Apr 2021 11:11:13 +0800 Subject: [PATCH 18/21] Updated what's new and SparseArray minmax logic --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/arrays/sparse/array.py | 22 ++++++++-------------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 3a1dd04b6c6e2..6f169b2f73242 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -877,7 +877,7 @@ Sparse - Bug in :meth:`DataFrame.sparse.to_coo` raising ``KeyError`` with columns that are a numeric :class:`Index` without a 0 (:issue:`18414`) - Bug in :meth:`SparseArray.astype` with ``copy=False`` producing incorrect results when going from integer dtype to floating dtype (:issue:`34456`) -- Bug in :class:`SparseArray` type as :meth:`max` and :meth:`min` do not exist (:issue:`40921`) +- Implemented :meth:`SparseArray.max` and :meth:`SparseArray.min` (:issue:`40921`) ExtensionArray ^^^^^^^^^^^^^^ diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 06000d133f526..091efa68c67da 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1395,26 +1395,20 @@ def mean(self, axis=0, *args, **kwargs): def max(self, axis=0, *args, **kwargs): nv.validate_max(args, kwargs) - if self.sp_index.ngaps > 0 and np.all(self._valid_sp_values < 0): - - if self.size > 0 and self._valid_sp_values.size == 0: - return np.nan - - return 0 + # This condition returns a nan if there are no valid values in the array. + if self.size > 0 and self._valid_sp_values.size == 0: + return np.nan else: - return np.amax(self._valid_sp_values, axis) + return np.nanmax(self, axis) def min(self, axis=0, *args, **kwargs): nv.validate_min(args, kwargs) - if self.sp_index.ngaps > 0 and np.all(self._valid_sp_values > 0): - - if self.size > 0 and self._valid_sp_values.size == 0: - return np.nan - - return 0 + # This condition returns a nan if there are no valid values in the array. + if self.size > 0 and self._valid_sp_values.size == 0: + return np.nan else: - return np.amin(self._valid_sp_values, axis) + return np.nanmin(self, axis) # ------------------------------------------------------------------------ # Ufuncs From 5d134719d7e02958b8c7d3c66e644ebeeef5a091 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Fri, 28 May 2021 00:52:41 +0800 Subject: [PATCH 19/21] Add test for fill_na --- pandas/tests/arrays/sparse/test_array.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index b29855caf6c1d..d7e69107e68bb 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1326,6 +1326,8 @@ class TestMinMax: data_neg = plain_data * (-1) data_NaN = SparseArray(np.array([0, 1, 2, np.nan, 4])) data_all_NaN = SparseArray(np.array([np.nan, np.nan, np.nan, np.nan, np.nan])) + data_NA_filled = SparseArray(np.array([np.nan, np.nan, np.nan, np.nan, np.nan])) + data_NA_filled.fill_value = 5 @pytest.mark.parametrize( "raw_data,max_expected,min_expected", @@ -1334,6 +1336,7 @@ class TestMinMax: (data_neg, [0], [-4]), (data_NaN, [4], [0]), (data_all_NaN, [np.nan], [np.nan]), + (data_NA_filled, [5], [5]), ], ) def test_maxmin(self, raw_data, max_expected, min_expected): From 15ac6d1dbebfb57fab2b01b37f180fbaeebdf9e0 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Fri, 28 May 2021 01:15:56 +0800 Subject: [PATCH 20/21] Add whatsnew for fill_na --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 4aed4aebc4341..52cdbca9c27b8 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -1081,6 +1081,7 @@ Sparse - Bug in :meth:`DataFrame.sparse.to_coo` raising ``KeyError`` with columns that are a numeric :class:`Index` without a 0 (:issue:`18414`) - Bug in :meth:`SparseArray.astype` with ``copy=False`` producing incorrect results when going from integer dtype to floating dtype (:issue:`34456`) - Implemented :meth:`SparseArray.max` and :meth:`SparseArray.min` (:issue:`40921`) +- Return ``fill_value`` for :meth:`SparseArray.max` and :meth:`SparseArray.min` (:issue:`41552`) ExtensionArray ^^^^^^^^^^^^^^ From 818985a1357db79654c9bbcba202e80a6ba0aed7 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Fri, 28 May 2021 10:28:29 +0800 Subject: [PATCH 21/21] Resolved comments --- doc/source/whatsnew/v1.3.0.rst | 1 - pandas/tests/arrays/sparse/test_array.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 52cdbca9c27b8..4aed4aebc4341 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -1081,7 +1081,6 @@ Sparse - Bug in :meth:`DataFrame.sparse.to_coo` raising ``KeyError`` with columns that are a numeric :class:`Index` without a 0 (:issue:`18414`) - Bug in :meth:`SparseArray.astype` with ``copy=False`` producing incorrect results when going from integer dtype to floating dtype (:issue:`34456`) - Implemented :meth:`SparseArray.max` and :meth:`SparseArray.min` (:issue:`40921`) -- Return ``fill_value`` for :meth:`SparseArray.max` and :meth:`SparseArray.min` (:issue:`41552`) ExtensionArray ^^^^^^^^^^^^^^ diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index d7e69107e68bb..1cc8a2df44812 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1326,8 +1326,9 @@ class TestMinMax: data_neg = plain_data * (-1) data_NaN = SparseArray(np.array([0, 1, 2, np.nan, 4])) data_all_NaN = SparseArray(np.array([np.nan, np.nan, np.nan, np.nan, np.nan])) - data_NA_filled = SparseArray(np.array([np.nan, np.nan, np.nan, np.nan, np.nan])) - data_NA_filled.fill_value = 5 + data_NA_filled = SparseArray( + np.array([np.nan, np.nan, np.nan, np.nan, np.nan]), fill_value=5 + ) @pytest.mark.parametrize( "raw_data,max_expected,min_expected",