From 73d7b1c6e8a0b916d12084053393b13c023d377c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 26 Jan 2024 16:32:37 -0800 Subject: [PATCH 1/4] Ensure pandas.errors only imports its __all__ --- pandas/core/frame.py | 2 + pandas/core/generic.py | 2 + pandas/core/indexing.py | 2 + pandas/core/series.py | 2 + pandas/errors/__init__.py | 76 +----------------------------------- pandas/errors/cow.py | 72 ++++++++++++++++++++++++++++++++++ pandas/tests/api/test_api.py | 8 +++- 7 files changed, 88 insertions(+), 76 deletions(-) create mode 100644 pandas/errors/cow.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 97f4eaa7c208a..cda25bd7b98b0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -60,6 +60,8 @@ from pandas.errors import ( ChainedAssignmentError, InvalidIndexError, +) +from pandas.errors.cow import ( _chained_assignment_method_msg, _chained_assignment_msg, _chained_assignment_warning_method_msg, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 91a4271509421..d6c5fb3c008b5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -101,6 +101,8 @@ InvalidIndexError, SettingWithCopyError, SettingWithCopyWarning, +) +from pandas.errors.cow import ( _chained_assignment_method_msg, _chained_assignment_warning_method_msg, _check_cacher, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2e7a237406ca5..24f3ff4279a84 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -27,6 +27,8 @@ IndexingError, InvalidIndexError, LossySetitemError, +) +from pandas.errors.cow import ( _chained_assignment_msg, _chained_assignment_warning_msg, _check_cacher, diff --git a/pandas/core/series.py b/pandas/core/series.py index ad63bc8a8750f..19e54954bb41c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -45,6 +45,8 @@ from pandas.errors import ( ChainedAssignmentError, InvalidIndexError, +) +from pandas.errors.cow import ( _chained_assignment_method_msg, _chained_assignment_msg, _chained_assignment_warning_method_msg, diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index d47e02cda1837..52b896dc01e8f 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -475,81 +475,6 @@ class ChainedAssignmentError(Warning): """ -_chained_assignment_msg = ( - "A value is trying to be set on a copy of a DataFrame or Series " - "through chained assignment.\n" - "When using the Copy-on-Write mode, such chained assignment never works " - "to update the original DataFrame or Series, because the intermediate " - "object on which we are setting values always behaves as a copy.\n\n" - "Try using '.loc[row_indexer, col_indexer] = value' instead, to perform " - "the assignment in a single step.\n\n" - "See the caveats in the documentation: " - "https://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy" -) - - -_chained_assignment_method_msg = ( - "A value is trying to be set on a copy of a DataFrame or Series " - "through chained assignment using an inplace method.\n" - "When using the Copy-on-Write mode, such inplace method never works " - "to update the original DataFrame or Series, because the intermediate " - "object on which we are setting values always behaves as a copy.\n\n" - "For example, when doing 'df[col].method(value, inplace=True)', try " - "using 'df.method({col: value}, inplace=True)' instead, to perform " - "the operation inplace on the original object.\n\n" -) - - -_chained_assignment_warning_msg = ( - "ChainedAssignmentError: behaviour will change in pandas 3.0!\n" - "You are setting values through chained assignment. Currently this works " - "in certain cases, but when using Copy-on-Write (which will become the " - "default behaviour in pandas 3.0) this will never work to update the " - "original DataFrame or Series, because the intermediate object on which " - "we are setting values will behave as a copy.\n" - "A typical example is when you are setting values in a column of a " - "DataFrame, like:\n\n" - 'df["col"][row_indexer] = value\n\n' - 'Use `df.loc[row_indexer, "col"] = values` instead, to perform the ' - "assignment in a single step and ensure this keeps updating the original `df`.\n\n" - "See the caveats in the documentation: " - "https://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy\n" -) - - -_chained_assignment_warning_method_msg = ( - "A value is trying to be set on a copy of a DataFrame or Series " - "through chained assignment using an inplace method.\n" - "The behavior will change in pandas 3.0. This inplace method will " - "never work because the intermediate object on which we are setting " - "values always behaves as a copy.\n\n" - "For example, when doing 'df[col].method(value, inplace=True)', try " - "using 'df.method({col: value}, inplace=True)' or " - "df[col] = df[col].method(value) instead, to perform " - "the operation inplace on the original object.\n\n" -) - - -def _check_cacher(obj) -> bool: - # This is a mess, selection paths that return a view set the _cacher attribute - # on the Series; most of them also set _item_cache which adds 1 to our relevant - # reference count, but iloc does not, so we have to check if we are actually - # in the item cache - if hasattr(obj, "_cacher"): - parent = obj._cacher[1]() - # parent could be dead - if parent is None: - return False - if hasattr(parent, "_item_cache"): - if obj._cacher[0] in parent._item_cache: - # Check if we are actually the item from item_cache, iloc creates a - # new object - return obj is parent._item_cache[obj._cacher[0]] - return False - - class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. @@ -831,6 +756,7 @@ class InvalidComparison(Exception): "AbstractMethodError", "AttributeConflictWarning", "CategoricalConversionWarning", + "ChainedAssignmentError", "ClosedFileError", "CSSWarning", "DatabaseError", diff --git a/pandas/errors/cow.py b/pandas/errors/cow.py new file mode 100644 index 0000000000000..79003ddb8c2ba --- /dev/null +++ b/pandas/errors/cow.py @@ -0,0 +1,72 @@ +_chained_assignment_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment.\n" + "When using the Copy-on-Write mode, such chained assignment never works " + "to update the original DataFrame or Series, because the intermediate " + "object on which we are setting values always behaves as a copy.\n\n" + "Try using '.loc[row_indexer, col_indexer] = value' instead, to perform " + "the assignment in a single step.\n\n" + "See the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" +) + + +_chained_assignment_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment using an inplace method.\n" + "When using the Copy-on-Write mode, such inplace method never works " + "to update the original DataFrame or Series, because the intermediate " + "object on which we are setting values always behaves as a copy.\n\n" + "For example, when doing 'df[col].method(value, inplace=True)', try " + "using 'df.method({col: value}, inplace=True)' instead, to perform " + "the operation inplace on the original object.\n\n" +) + + +_chained_assignment_warning_msg = ( + "ChainedAssignmentError: behaviour will change in pandas 3.0!\n" + "You are setting values through chained assignment. Currently this works " + "in certain cases, but when using Copy-on-Write (which will become the " + "default behaviour in pandas 3.0) this will never work to update the " + "original DataFrame or Series, because the intermediate object on which " + "we are setting values will behave as a copy.\n" + "A typical example is when you are setting values in a column of a " + "DataFrame, like:\n\n" + 'df["col"][row_indexer] = value\n\n' + 'Use `df.loc[row_indexer, "col"] = values` instead, to perform the ' + "assignment in a single step and ensure this keeps updating the original `df`.\n\n" + "See the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy\n" +) + +_chained_assignment_warning_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment using an inplace method.\n" + "The behavior will change in pandas 3.0. This inplace method will " + "never work because the intermediate object on which we are setting " + "values always behaves as a copy.\n\n" + "For example, when doing 'df[col].method(value, inplace=True)', try " + "using 'df.method({col: value}, inplace=True)' or " + "df[col] = df[col].method(value) instead, to perform " + "the operation inplace on the original object.\n\n" +) + + +def _check_cacher(obj) -> bool: + # This is a mess, selection paths that return a view set the _cacher attribute + # on the Series; most of them also set _item_cache which adds 1 to our relevant + # reference count, but iloc does not, so we have to check if we are actually + # in the item cache + if hasattr(obj, "_cacher"): + parent = obj._cacher[1]() + # parent could be dead + if parent is None: + return False + if hasattr(parent, "_item_cache"): + if obj._cacher[0] in parent._item_cache: + # Check if we are actually the item from item_cache, iloc creates a + # new object + return obj is parent._item_cache[obj._cacher[0]] + return False diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 60bcb97aaa364..aca0d3c295e5b 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -3,7 +3,10 @@ import pytest import pandas as pd -from pandas import api +from pandas import ( + api, + errors, +) import pandas._testing as tm from pandas.api import ( extensions as api_extensions, @@ -356,6 +359,9 @@ def test_api_indexers(self): def test_api_extensions(self): self.check(api_extensions, self.allowed_api_extensions) + def test_api_errors(self): + self.check(errors, errors.__all__, ignored=["ctypes", "cow"]) + class TestTesting(Base): funcs = [ From 3ade4559ef53284de9c98616bacdeda6a1d41023 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 29 Jan 2024 10:49:46 -0800 Subject: [PATCH 2/4] Make public util API accessible --- doc/source/reference/index.rst | 7 ++++--- pandas/util/__init__.py | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst index 7da02f7958416..7f4d05414d254 100644 --- a/doc/source/reference/index.rst +++ b/doc/source/reference/index.rst @@ -24,13 +24,14 @@ The following subpackages are public. `pandas-stubs `_ package which has classes in addition to those that occur in pandas for type-hinting. -In addition, public functions in ``pandas.io`` and ``pandas.tseries`` submodules -are mentioned in the documentation. +In addition, public functions in ``pandas.io``, ``pandas.tseries``, ``pandas.util`` submodules +are explicitly mentioned in the documentation. Further APIs in these modules are not guaranteed +to be stable. .. warning:: - The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed. + The ``pandas.core``, ``pandas.compat`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed. .. If you update this toctree, also update the manual toctree in the .. main index.rst.template diff --git a/pandas/util/__init__.py b/pandas/util/__init__.py index 91282fde8b11d..928d626c057be 100644 --- a/pandas/util/__init__.py +++ b/pandas/util/__init__.py @@ -25,5 +25,9 @@ def __getattr__(key: str): raise AttributeError(f"module 'pandas.util' has no attribute '{key}'") +def __dir__(): + return list(globals.keys()) + ["hash_array", "hash_pandas_object"] + + def capitalize_first_letter(s: str) -> str: return s[:1].upper() + s[1:] From 4d83c8dfe9a404d1ae47d20051a91b5a177e94c6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 29 Jan 2024 18:10:56 -0800 Subject: [PATCH 3/4] Fix import --- pandas/tests/api/test_api.py | 29 +++++++++++++++++++++++------ pandas/util/__init__.py | 2 +- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index aca0d3c295e5b..61d6aaf63adf1 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -3,10 +3,7 @@ import pytest import pandas as pd -from pandas import ( - api, - errors, -) +from pandas import api import pandas._testing as tm from pandas.api import ( extensions as api_extensions, @@ -359,8 +356,28 @@ def test_api_indexers(self): def test_api_extensions(self): self.check(api_extensions, self.allowed_api_extensions) - def test_api_errors(self): - self.check(errors, errors.__all__, ignored=["ctypes", "cow"]) + +class TestErrors(Base): + def test_errors(self): + self.check(pd.errors, pd.errors.__all__, ignored=["ctypes", "cow"]) + + +class TestUtil(Base): + def test_util(self): + self.check( + pd.util, + ["hash_array", "hash_pandas_object"], + ignored=[ + "_decorators", + "_test_decorators", + "_exceptions", + "_validators", + "capitalize_first_letter", + "version", + "_print_versions", + "_tester", + ], + ) class TestTesting(Base): diff --git a/pandas/util/__init__.py b/pandas/util/__init__.py index 928d626c057be..59ab324ba38ca 100644 --- a/pandas/util/__init__.py +++ b/pandas/util/__init__.py @@ -26,7 +26,7 @@ def __getattr__(key: str): def __dir__(): - return list(globals.keys()) + ["hash_array", "hash_pandas_object"] + return list(globals().keys()) + ["hash_array", "hash_pandas_object"] def capitalize_first_letter(s: str) -> str: From 8b4f4661162641a28c411b06ce111cb25eb10423 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 30 Jan 2024 12:12:55 -0800 Subject: [PATCH 4/4] Type input --- pandas/errors/cow.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/errors/cow.py b/pandas/errors/cow.py index 79003ddb8c2ba..2215ec2148757 100644 --- a/pandas/errors/cow.py +++ b/pandas/errors/cow.py @@ -1,3 +1,5 @@ +from typing import Any + _chained_assignment_msg = ( "A value is trying to be set on a copy of a DataFrame or Series " "through chained assignment.\n" @@ -54,7 +56,7 @@ ) -def _check_cacher(obj) -> bool: +def _check_cacher(obj: Any) -> bool: # This is a mess, selection paths that return a view set the _cacher attribute # on the Series; most of them also set _item_cache which adds 1 to our relevant # reference count, but iloc does not, so we have to check if we are actually