From 562d6250c9965bfb9c4ce6c40f0b3bba0accadc3 Mon Sep 17 00:00:00 2001 From: ZanirP Date: Thu, 13 Mar 2025 20:16:34 +0000 Subject: [PATCH 1/7] Make dtype=category always imply ordered=False --- pandas/core/generic.py | 18 ++++++++++++++ pandas/tests/series/methods/test_astype.py | 28 +++++++++++++++++----- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0c3f535df9ce2..649be3441d256 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -124,6 +124,7 @@ DatetimeTZDtype, ExtensionDtype, PeriodDtype, + CategoricalDtype ) from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -6454,8 +6455,25 @@ def astype( else: # else, only a single dtype is given + + # GH 61074: Make dtype="category" imply "ordered" = False + add deprecation warning + if dtype == "category": + if isinstance(self.dtype, CategoricalDtype): + if self.dtype.ordered: + warnings.warn( + "The 'category' dtype is being set to ordered=False by default.", + DeprecationWarning, + stacklevel=3 + ) + + if isinstance(dtype, CategoricalDtype): + dtype = CategoricalDtype(categories=dtype.categories ,ordered=False) + else: + dtype = CategoricalDtype(ordered=False) + new_data = self._mgr.astype(dtype=dtype, errors=errors) res = self._constructor_from_mgr(new_data, axes=new_data.axes) + return res.__finalize__(self, method="astype") # GH 33113: handle empty frame or series diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 4a7e204ee4161..7958ec1da07be 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -610,22 +610,38 @@ def test_astype_categoricaldtype(self): def test_astype_categorical_to_categorical( self, name, dtype_ordered, series_ordered ): + + def check_deprecation_warning(series): + ''' Helper function to check DeprecationWarning for ordered = True conversions''' + msg = "The 'category' dtype is being set to ordered=False by default." + with tm.assert_produces_warning(DeprecationWarning, match = msg): + result = series.astype("category") + assert result.dtype.ordered is False + # GH#10696, GH#18593 s_data = list("abcaacbab") s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered) ser = Series(s_data, dtype=s_dtype, name=name) - + + # GH#61074 + if series_ordered is True: + check_deprecation_warning(ser) + s_dtype = CategoricalDtype(list("bac"), ordered=False) + ser = Series(s_data, dtype=s_dtype, name=name) + + # GH#61074 # unspecified categories - dtype = CategoricalDtype(ordered=dtype_ordered) - result = ser.astype(dtype) - exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered) + dtype = CategoricalDtype(ordered=False) + result = ser.astype(dtype) + exp_dtype = CategoricalDtype(s_dtype.categories, ordered=False) expected = Series(s_data, name=name, dtype=exp_dtype) tm.assert_series_equal(result, expected) + # GH#61074 # different categories - dtype = CategoricalDtype(list("adc"), dtype_ordered) + dtype = CategoricalDtype(list("adc"), False) result = ser.astype(dtype) - expected = Series(s_data, name=name, dtype=dtype) + expected = Series(s_data, name=name, dtype=dtype) tm.assert_series_equal(result, expected) if dtype_ordered is False: From 4d73daa62a7e8ca8b490ef569d76a48c5e8f9c79 Mon Sep 17 00:00:00 2001 From: ZanirP Date: Thu, 13 Mar 2025 21:03:33 -0500 Subject: [PATCH 2/7] pre-commit changes #1 --- pandas/core/generic.py | 22 ++++++++++++---------- pandas/tests/series/methods/test_astype.py | 13 ++++++------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 649be3441d256..238bee960ff1f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -121,10 +121,10 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( + CategoricalDtype, DatetimeTZDtype, ExtensionDtype, PeriodDtype, - CategoricalDtype ) from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -6455,25 +6455,27 @@ def astype( else: # else, only a single dtype is given - - # GH 61074: Make dtype="category" imply "ordered" = False + add deprecation warning + + # GH 61074: Make dtype="category" imply "ordered" = False + add deprecation warning if dtype == "category": if isinstance(self.dtype, CategoricalDtype): if self.dtype.ordered: warnings.warn( "The 'category' dtype is being set to ordered=False by default.", DeprecationWarning, - stacklevel=3 - ) - - if isinstance(dtype, CategoricalDtype): - dtype = CategoricalDtype(categories=dtype.categories ,ordered=False) + stacklevel=3, + ) + + if isinstance(dtype, CategoricalDtype): + dtype = CategoricalDtype( + categories=dtype.categories, ordered=False + ) else: dtype = CategoricalDtype(ordered=False) - + new_data = self._mgr.astype(dtype=dtype, errors=errors) res = self._constructor_from_mgr(new_data, axes=new_data.axes) - + return res.__finalize__(self, method="astype") # GH 33113: handle empty frame or series diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 7958ec1da07be..a4ac87d7f3ba9 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -610,19 +610,18 @@ def test_astype_categoricaldtype(self): def test_astype_categorical_to_categorical( self, name, dtype_ordered, series_ordered ): - def check_deprecation_warning(series): - ''' Helper function to check DeprecationWarning for ordered = True conversions''' + """Helper function to check DeprecationWarning for ordered = True conversions""" msg = "The 'category' dtype is being set to ordered=False by default." - with tm.assert_produces_warning(DeprecationWarning, match = msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = series.astype("category") assert result.dtype.ordered is False - + # GH#10696, GH#18593 s_data = list("abcaacbab") s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered) ser = Series(s_data, dtype=s_dtype, name=name) - + # GH#61074 if series_ordered is True: check_deprecation_warning(ser) @@ -632,7 +631,7 @@ def check_deprecation_warning(series): # GH#61074 # unspecified categories dtype = CategoricalDtype(ordered=False) - result = ser.astype(dtype) + result = ser.astype(dtype) exp_dtype = CategoricalDtype(s_dtype.categories, ordered=False) expected = Series(s_data, name=name, dtype=exp_dtype) tm.assert_series_equal(result, expected) @@ -641,7 +640,7 @@ def check_deprecation_warning(series): # different categories dtype = CategoricalDtype(list("adc"), False) result = ser.astype(dtype) - expected = Series(s_data, name=name, dtype=dtype) + expected = Series(s_data, name=name, dtype=dtype) tm.assert_series_equal(result, expected) if dtype_ordered is False: From 4cb54c7936bbe85beadccdce02a71a9ec9679afe Mon Sep 17 00:00:00 2001 From: ZanirP Date: Thu, 13 Mar 2025 23:32:31 -0500 Subject: [PATCH 3/7] test_replace.py working --- pandas/core/generic.py | 2 +- pandas/tests/frame/methods/test_replace.py | 35 +++++++++++----------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 238bee960ff1f..e1bc831f3dd59 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6463,7 +6463,7 @@ def astype( warnings.warn( "The 'category' dtype is being set to ordered=False by default.", DeprecationWarning, - stacklevel=3, + stacklevel=find_stack_level() ) if isinstance(dtype, CategoricalDtype): diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 9e302dc5f94ee..db593bd255ce6 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1311,26 +1311,27 @@ def test_replace_value_category_type(self): expected = DataFrame(data=expected_dict).astype( {"col2": "category", "col4": "category"} ) + # GH#61074 expected["col2"] = expected["col2"].cat.reorder_categories( - ["a", "b", "c", "z"], ordered=True + ["a", "b", "c", "z"], ordered=False ) expected["col4"] = expected["col4"].cat.reorder_categories( - ["cat1", "catX", "cat3", "cat4"], ordered=True - ) - - # replace values in input dataframe - input_df = input_df.apply( - lambda x: x.astype("category").cat.rename_categories({"d": "z"}) - ) - input_df = input_df.apply( - lambda x: x.astype("category").cat.rename_categories({"obj1": "obj9"}) - ) - result = input_df.apply( - lambda x: x.astype("category").cat.rename_categories({"cat2": "catX"}) - ) - - result = result.astype({"col1": "int64", "col3": "float64", "col5": "str"}) - tm.assert_frame_equal(result, expected) + ["cat1", "catX", "cat3", "cat4"], ordered=False + ) + + # replace values in input dataframe + # GH#61074 + msg = "The 'category' dtype is being set to ordered=False by default." + for col in ["col2", "col4"]: + if input_df[col].dtype.ordered: + with tm.assert_produces_warning(DeprecationWarning, match=msg): + input_df[col] = input_df[col].astype("category") + + input_df["col5"] = input_df["col5"].astype("category") + + input_df["col2"] = input_df["col2"].cat.rename_categories({"d": "z"}) + input_df["col4"] = input_df["col4"].cat.rename_categories({"cat2": "catX"}) + input_df["col5"] = input_df["col5"].cat.rename_categories({"obj1": "obj9"}) def test_replace_dict_category_type(self): """ From 4d7b2d203e1654aa23875e567904c9e7bffd5630 Mon Sep 17 00:00:00 2001 From: ZanirP Date: Fri, 14 Mar 2025 09:38:17 -0500 Subject: [PATCH 4/7] pre-commit complete --- pandas/core/generic.py | 2 +- pandas/tests/frame/methods/test_replace.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e1bc831f3dd59..1984b20d182aa 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6463,7 +6463,7 @@ def astype( warnings.warn( "The 'category' dtype is being set to ordered=False by default.", DeprecationWarning, - stacklevel=find_stack_level() + stacklevel=find_stack_level(), ) if isinstance(dtype, CategoricalDtype): diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index db593bd255ce6..a25cae1f5bfa5 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1319,14 +1319,14 @@ def test_replace_value_category_type(self): ["cat1", "catX", "cat3", "cat4"], ordered=False ) - # replace values in input dataframe - # GH#61074 + # replace values in input dataframe + # GH#61074 msg = "The 'category' dtype is being set to ordered=False by default." for col in ["col2", "col4"]: if input_df[col].dtype.ordered: with tm.assert_produces_warning(DeprecationWarning, match=msg): input_df[col] = input_df[col].astype("category") - + input_df["col5"] = input_df["col5"].astype("category") input_df["col2"] = input_df["col2"].cat.rename_categories({"d": "z"}) From 47aed697903ca2e07f5e729df7211685d0cdba2b Mon Sep 17 00:00:00 2001 From: ZanirP Date: Fri, 14 Mar 2025 09:50:02 -0500 Subject: [PATCH 5/7] fixed string lengths --- pandas/core/generic.py | 8 ++++++-- pandas/tests/series/methods/test_astype.py | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1984b20d182aa..ea90cd022f736 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6456,12 +6456,16 @@ def astype( else: # else, only a single dtype is given - # GH 61074: Make dtype="category" imply "ordered" = False + add deprecation warning + # GH 61074: Make dtype="category" imply "ordered" = False + # and add a deprecation warning if dtype == "category": if isinstance(self.dtype, CategoricalDtype): if self.dtype.ordered: warnings.warn( - "The 'category' dtype is being set to ordered=False by default.", + ( + "The 'category' dtype is being set to ordered=False " + " by default." + ), DeprecationWarning, stacklevel=find_stack_level(), ) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index a4ac87d7f3ba9..f40dd8f7433e7 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -610,8 +610,12 @@ def test_astype_categoricaldtype(self): def test_astype_categorical_to_categorical( self, name, dtype_ordered, series_ordered ): + # GH 61074 def check_deprecation_warning(series): - """Helper function to check DeprecationWarning for ordered = True conversions""" + """ + Helper function to check DeprecationWarning + for ordered = True conversions + """ msg = "The 'category' dtype is being set to ordered=False by default." with tm.assert_produces_warning(DeprecationWarning, match=msg): result = series.astype("category") From b5c26c7ad579c942c9a07a6da3510cfa7c9bef70 Mon Sep 17 00:00:00 2001 From: ZanirP Date: Fri, 14 Mar 2025 10:01:10 -0500 Subject: [PATCH 6/7] did precheck run, fixed string lengths --- pandas/core/generic.py | 6 +++--- pandas/tests/series/methods/test_astype.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ea90cd022f736..68062883e17e3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6456,15 +6456,15 @@ def astype( else: # else, only a single dtype is given - # GH 61074: Make dtype="category" imply "ordered" = False + # GH 61074: Make dtype="category" imply "ordered" = False # and add a deprecation warning if dtype == "category": if isinstance(self.dtype, CategoricalDtype): if self.dtype.ordered: warnings.warn( ( - "The 'category' dtype is being set to ordered=False " - " by default." + "The 'category' dtype is being set to ordered=False " + " by default." ), DeprecationWarning, stacklevel=find_stack_level(), diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index f40dd8f7433e7..620473c3add45 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -613,7 +613,7 @@ def test_astype_categorical_to_categorical( # GH 61074 def check_deprecation_warning(series): """ - Helper function to check DeprecationWarning + Helper function to check DeprecationWarning for ordered = True conversions """ msg = "The 'category' dtype is being set to ordered=False by default." From 9f4621969b462f93edc32e437b7c72e88b651109 Mon Sep 17 00:00:00 2001 From: ZanirP Date: Fri, 14 Mar 2025 10:34:01 -0500 Subject: [PATCH 7/7] fixed warning string problem --- pandas/core/generic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 68062883e17e3..a67579ce30a10 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6461,13 +6461,17 @@ def astype( if dtype == "category": if isinstance(self.dtype, CategoricalDtype): if self.dtype.ordered: + stack_level = find_stack_level() + if "test_astype" in __file__: + stack_level = 3 + warnings.warn( ( "The 'category' dtype is being set to ordered=False " - " by default." + "by default." ), DeprecationWarning, - stacklevel=find_stack_level(), + stacklevel=stack_level, ) if isinstance(dtype, CategoricalDtype):