From a61a0d081c4ff809ea3da0f6c88bc45865b7ff60 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Fri, 23 Dec 2022 16:37:49 +0800 Subject: [PATCH 1/8] a demo for confirm --- doc/source/reference/testing.rst | 1 + pandas/_libs/reduction.pyx | 5 +++-- pandas/core/resample.py | 17 +++++++---------- pandas/errors/__init__.py | 9 +++++++++ 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 07624e87d82e0..633450507e62c 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -45,6 +45,7 @@ Exceptions and warnings errors.LossySetitemError errors.MergeError errors.NoBufferPresent + errors.NotObjectError errors.NullFrequencyError errors.NumbaUtilError errors.NumExprClobberingError diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 7ff0842678d7f..abbf5d38ade2e 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -6,6 +6,8 @@ cnp.import_array() from pandas._libs.util cimport is_array +from pandas.errors import NotObjectError + cdef cnp.dtype _dtype_obj = np.dtype("object") @@ -18,8 +20,7 @@ cpdef check_result_array(object obj, object dtype): if dtype != _dtype_obj: # If it is object dtype, the function can be a reduction/aggregation # and still return an ndarray e.g. test_agg_over_numpy_arrays - raise ValueError("Must produce aggregated value") - + raise NotObjectError("Must produce aggregated value") cpdef inline extract_result(object res): """ extract the result object, it might be a 0-dim ndarray diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 555b47f5e2304..58b48e25981aa 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -42,6 +42,7 @@ from pandas.errors import ( AbstractMethodError, DataError, + NotObjectError, ) from pandas.util._decorators import ( Appender, @@ -459,18 +460,14 @@ def _groupby_and_aggregate(self, how, *args, **kwargs): # on Series, raising AttributeError or KeyError # (depending on whether the column lookup uses getattr/__getitem__) result = grouped.apply(how, *args, **kwargs) + except NotObjectError: + # we have a non-reducing function; try to evaluate - except ValueError as err: - if "Must produce aggregated value" in str(err): - # raised in _aggregate_named - # see test_apply_without_aggregation, test_apply_with_mutated_index - pass - else: - raise - - # we have a non-reducing function - # try to evaluate + # raised in _aggregate_named + # see test_apply_without_aggregation, test_apply_with_mutated_index result = grouped.apply(how, *args, **kwargs) + except ValueError: + raise return self._wrap_result(result) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 89ac1c10254cb..696f6fbc0e065 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -555,6 +555,14 @@ class InvalidComparison(Exception): """ +class NotObjectError(ValueError): + """ + Exception is raised by check_result_array to indicate not object dtype. + + Instead of raising a ValueError("Must produce aggregated value"). + """ + + __all__ = [ "AbstractMethodError", "AccessorRegistrationWarning", @@ -577,6 +585,7 @@ class InvalidComparison(Exception): "LossySetitemError", "MergeError", "NoBufferPresent", + "NotObjectError", "NullFrequencyError", "NumbaUtilError", "NumExprClobberingError", From d45d4b911d0a87b9f43e5a755f6fdd16100041ca Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sat, 24 Dec 2022 19:46:42 +0800 Subject: [PATCH 2/8] fix core/dtypes/cast.py --- doc/source/reference/testing.rst | 1 + pandas/core/arrays/datetimes.py | 6 ++---- pandas/core/dtypes/cast.py | 18 ++++++++++-------- pandas/errors/__init__.py | 10 ++++++++++ .../indexes/datetimes/test_constructors.py | 7 ++----- 5 files changed, 25 insertions(+), 17 deletions(-) diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 633450507e62c..4aab7ce8053ea 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -62,6 +62,7 @@ Exceptions and warnings errors.SettingWithCopyError errors.SettingWithCopyWarning errors.SpecificationError + errors.SupplyTzDetypeError errors.UndefinedVariableError errors.UnsortedIndexError errors.UnsupportedFunctionCall diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 0162f54bf5225..1897d2b91d995 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -53,6 +53,7 @@ from pandas.errors import ( OutOfBoundsDatetime, PerformanceWarning, + SupplyTzDetypeError, ) from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_inclusive @@ -2381,10 +2382,7 @@ def _validate_tz_from_dtype( # We also need to check for the case where the user passed a # tz-naive dtype (i.e. datetime64[ns]) if tz is not None and not timezones.tz_compare(tz, dtz): - raise ValueError( - "cannot supply both a tz and a " - "timezone-naive dtype (i.e. datetime64[ns])" - ) + raise SupplyTzDetypeError return tz diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f3ce104aa4a3e..22e9e27f12010 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -41,6 +41,7 @@ from pandas.errors import ( IntCastingNaNError, LossySetitemError, + SupplyTzDetypeError, ) from pandas.core.dtypes.common import ( @@ -1177,14 +1178,15 @@ def maybe_cast_to_datetime( else: try: dta = DatetimeArray._from_sequence(value, dtype=dtype) - except ValueError as err: - # We can give a Series-specific exception message. - if "cannot supply both a tz and a timezone-naive dtype" in str(err): - raise ValueError( - "Cannot convert timezone-aware data to " - "timezone-naive dtype. Use " - "pd.Series(values).dt.tz_localize(None) instead." - ) from err + + except SupplyTzDetypeError as err: + raise ValueError( + "Cannot convert timezone-aware data to " + "timezone-naive dtype. Use " + "pd.Series(values).dt.tz_localize(None) instead." + ) from err + + except ValueError: raise return dta diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 696f6fbc0e065..2c3bbf31fb5f7 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -563,6 +563,15 @@ class NotObjectError(ValueError): """ +class SupplyTzDetypeError(ValueError): + """ + Exception is raised by _validate_tz_from_dtype. + + Instead of ValueError("cannot supply both a tz and a timezone-naive dtype + (i.e. datetime64[ns])"). + """ + + __all__ = [ "AbstractMethodError", "AccessorRegistrationWarning", @@ -602,6 +611,7 @@ class NotObjectError(ValueError): "SettingWithCopyError", "SettingWithCopyWarning", "SpecificationError", + "SupplyTzDetypeError", "UndefinedVariableError", "UnsortedIndexError", "UnsupportedFunctionCall", diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 246de06a04de2..a904e6945e026 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -16,6 +16,7 @@ astype_overflowsafe, ) from pandas.compat import PY39 +from pandas.errors import SupplyTzDetypeError import pandas as pd from pandas import ( @@ -713,11 +714,7 @@ def test_constructor_dtype_tz_mismatch_raises(self): ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]" ) - msg = ( - "cannot supply both a tz and a timezone-naive dtype " - r"\(i\.e\. datetime64\[ns\]\)" - ) - with pytest.raises(ValueError, match=msg): + with pytest.raises(SupplyTzDetypeError, match=None): DatetimeIndex(idx, dtype="datetime64[ns]") # this is effectively trying to convert tz's From 9c7e88ddddb39222356e92ec3127dcb0d01930d0 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sat, 24 Dec 2022 19:52:03 +0800 Subject: [PATCH 3/8] more fix core/dtypes/cast.py --- pandas/core/arrays/datetimes.py | 4 +++- pandas/tests/indexes/datetimes/test_constructors.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1897d2b91d995..bebb875e42828 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2382,7 +2382,9 @@ def _validate_tz_from_dtype( # We also need to check for the case where the user passed a # tz-naive dtype (i.e. datetime64[ns]) if tz is not None and not timezones.tz_compare(tz, dtz): - raise SupplyTzDetypeError + raise SupplyTzDetypeError( + "cannot supply both a tz and a timezone-naive dtype" + ) return tz diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index a904e6945e026..c5eb3e19786cc 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -714,7 +714,8 @@ def test_constructor_dtype_tz_mismatch_raises(self): ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]" ) - with pytest.raises(SupplyTzDetypeError, match=None): + msg = "cannot supply both a tz and a timezone-naive dtype" + with pytest.raises(SupplyTzDetypeError, match=msg): DatetimeIndex(idx, dtype="datetime64[ns]") # this is effectively trying to convert tz's From 2ae08fffea26d7411f019303acb5d454c9f94c7a Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 25 Dec 2022 10:32:37 +0800 Subject: [PATCH 4/8] fix core/dtypes/missing.py --- doc/source/reference/testing.rst | 1 + pandas/_libs/missing.pyx | 4 +++- pandas/core/dtypes/missing.py | 7 ++++--- pandas/errors/__init__.py | 9 +++++++++ pandas/tests/frame/methods/test_compare.py | 3 ++- pandas/tests/groupby/test_any_all.py | 4 +++- pandas/tests/scalar/test_na_scalar.py | 5 +++-- 7 files changed, 25 insertions(+), 8 deletions(-) diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 4aab7ce8053ea..77b951288ffde 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -44,6 +44,7 @@ Exceptions and warnings errors.IntCastingNaNError errors.LossySetitemError errors.MergeError + errors.NATypeError errors.NoBufferPresent errors.NotObjectError errors.NullFrequencyError diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index a3b0451381ad2..7bb8c2ce611a2 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -4,6 +4,7 @@ from sys import maxsize cimport cython from cython cimport Py_ssize_t + import numpy as np cimport numpy as cnp @@ -30,6 +31,7 @@ from pandas._libs.tslibs.np_datetime cimport ( ) from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op +from pandas.errors import NATypeError cdef: float64_t INF = np.inf @@ -410,7 +412,7 @@ class NAType(C_NAType): return self.__repr__() def __bool__(self): - raise TypeError("boolean value of NA is ambiguous") + raise NATypeError("boolean value of NA is ambiguous") def __hash__(self): # GH 30013: Ensure hash is large enough to avoid hash collisions with integers diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 000b5ebbdd2f7..15db8cee92e49 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -20,6 +20,7 @@ NaT, iNaT, ) +from pandas.errors import NATypeError from pandas.core.dtypes.common import ( DT64NS_DTYPE, @@ -580,9 +581,9 @@ def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bo try: if np.any(np.asarray(left_value != right_value)): return False - except TypeError as err: - if "boolean value of NA is ambiguous" in str(err): - return False + except NATypeError: + return False + except TypeError: raise return True diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 2c3bbf31fb5f7..b5249f090027f 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -572,6 +572,14 @@ class SupplyTzDetypeError(ValueError): """ +class NATypeError(TypeError): + """ + Exception is raised by NAType.__bool__(). + + Instead of TypeError("boolean value of NA is ambiguous"). + """ + + __all__ = [ "AbstractMethodError", "AccessorRegistrationWarning", @@ -593,6 +601,7 @@ class SupplyTzDetypeError(ValueError): "IndexingError", "LossySetitemError", "MergeError", + "NATypeError", "NoBufferPresent", "NotObjectError", "NullFrequencyError", diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 455acde1af684..592beef710106 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -2,6 +2,7 @@ import pytest from pandas.compat import is_numpy_dev +from pandas.errors import NATypeError import pandas as pd import pandas._testing as tm @@ -261,7 +262,7 @@ def test_compare_ea_and_np_dtype(val1, val2): ) if val1 is pd.NA and is_numpy_dev: # can't compare with numpy array if it contains pd.NA - with pytest.raises(TypeError, match="boolean value of NA is ambiguous"): + with pytest.raises(NATypeError, match="boolean value of NA is ambiguous"): result = df1.compare(df2, keep_shape=True) else: result = df1.compare(df2, keep_shape=True) diff --git a/pandas/tests/groupby/test_any_all.py b/pandas/tests/groupby/test_any_all.py index e49238a9e6656..15a3042a976da 100644 --- a/pandas/tests/groupby/test_any_all.py +++ b/pandas/tests/groupby/test_any_all.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.errors import NATypeError + import pandas as pd from pandas import ( DataFrame, @@ -175,7 +177,7 @@ def test_object_type_missing_vals(bool_agg_func, data, expected_res, frame_or_se def test_object_NA_raises_with_skipna_false(bool_agg_func): # GH#37501 ser = Series([pd.NA], dtype=object) - with pytest.raises(TypeError, match="boolean value of NA is ambiguous"): + with pytest.raises(NATypeError, match="boolean value of NA is ambiguous"): ser.groupby([1]).agg(bool_agg_func, skipna=False) diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py index a77316cbc0ea6..61108f0a4388e 100644 --- a/pandas/tests/scalar/test_na_scalar.py +++ b/pandas/tests/scalar/test_na_scalar.py @@ -4,6 +4,7 @@ import pytest from pandas._libs.missing import NA +from pandas.errors import NATypeError from pandas.core.dtypes.common import is_scalar @@ -36,10 +37,10 @@ def test_format(): def test_truthiness(): msg = "boolean value of NA is ambiguous" - with pytest.raises(TypeError, match=msg): + with pytest.raises(NATypeError, match=msg): bool(NA) - with pytest.raises(TypeError, match=msg): + with pytest.raises(NATypeError, match=msg): not NA From d1c6bcc9eade23789b1b4e8e3803ba45f8019099 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 25 Dec 2022 10:48:37 +0800 Subject: [PATCH 5/8] fix core/groupby/generic.py --- doc/source/reference/testing.rst | 1 + pandas/core/groupby/generic.py | 13 +++++++------ pandas/core/reshape/concat.py | 3 ++- pandas/errors/__init__.py | 9 +++++++++ pandas/tests/reshape/concat/test_concat.py | 3 ++- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 77b951288ffde..fd2002d461c19 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -46,6 +46,7 @@ Exceptions and warnings errors.MergeError errors.NATypeError errors.NoBufferPresent + errors.NoObjectConcatenateError errors.NotObjectError errors.NullFrequencyError errors.NumbaUtilError diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 955f65585963d..cb08374cb14ba 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -44,7 +44,10 @@ SingleManager, TakeIndexer, ) -from pandas.errors import SpecificationError +from pandas.errors import ( + NoObjectConcatenateError, + SpecificationError, +) from pandas.util._decorators import ( Appender, Substitution, @@ -1178,12 +1181,10 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) gba = GroupByApply(self, [func], args=(), kwargs={}) try: result = gba.agg() - - except ValueError as err: - if "No objects to concatenate" not in str(err): - raise + except NoObjectConcatenateError: result = self._aggregate_frame(func) - + except ValueError: + raise else: sobj = self._selected_obj diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index aced5a73a1f02..c4bfd18f9eb81 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -22,6 +22,7 @@ AxisInt, HashableT, ) +from pandas.errors import NoObjectConcatenateError from pandas.util._decorators import cache_readonly from pandas.core.dtypes.concat import concat_compat @@ -420,7 +421,7 @@ def __init__( objs = list(objs) if len(objs) == 0: - raise ValueError("No objects to concatenate") + raise NoObjectConcatenateError("No objects to concatenate") if keys is None: objs = list(com.not_none(*objs)) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index b5249f090027f..f8eb05278eea5 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -580,6 +580,14 @@ class NATypeError(TypeError): """ +class NoObjectConcatenateError(ValueError): + """ + Exception is raised by _Concatenator.__init__(). + + Instead of ValueError("No objects to concatenate"). + """ + + __all__ = [ "AbstractMethodError", "AccessorRegistrationWarning", @@ -603,6 +611,7 @@ class NATypeError(TypeError): "MergeError", "NATypeError", "NoBufferPresent", + "NoObjectConcatenateError", "NotObjectError", "NullFrequencyError", "NumbaUtilError", diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index ea526c95f20e0..2007dc54221c4 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -14,6 +14,7 @@ from pandas.errors import ( InvalidIndexError, + NoObjectConcatenateError, PerformanceWarning, ) import pandas.util._test_decorators as td @@ -364,7 +365,7 @@ def test_concat_single_with_key(self): tm.assert_frame_equal(result, expected[:10]) def test_concat_no_items_raises(self): - with pytest.raises(ValueError, match="No objects to concatenate"): + with pytest.raises(NoObjectConcatenateError, match="No objects to concatenate"): concat([]) def test_concat_exclude_none(self): From 00dbee267e49ece2ae55998332669a7d254773ac Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 25 Dec 2022 11:35:24 +0800 Subject: [PATCH 6/8] fix core/frame.py --- doc/source/reference/testing.rst | 1 + pandas/core/frame.py | 13 ++++++++----- pandas/core/series.py | 7 +++++-- pandas/errors/__init__.py | 11 +++++++++++ pandas/tests/frame/methods/test_dot.py | 6 ++++-- pandas/tests/series/methods/test_matmul.py | 6 ++++-- 6 files changed, 33 insertions(+), 11 deletions(-) diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index fd2002d461c19..d2a0201e19a36 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -32,6 +32,7 @@ Exceptions and warnings errors.CSSWarning errors.DatabaseError errors.DataError + errors.DotMismatchShapeError errors.DtypeWarning errors.DuplicateLabelError errors.EmptyDataError diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 21b3a0c033702..21f8942d70eaf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -92,7 +92,10 @@ function as nv, np_percentile_argname, ) -from pandas.errors import InvalidIndexError +from pandas.errors import ( + DotMismatchShapeError, + InvalidIndexError, +) from pandas.util._decorators import ( Appender, Substitution, @@ -1570,7 +1573,7 @@ def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: lvals = self.values rvals = np.asarray(other) if lvals.shape[1] != rvals.shape[0]: - raise ValueError( + raise DotMismatchShapeError( f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}" ) @@ -1609,12 +1612,12 @@ def __rmatmul__(self, other) -> DataFrame: """ try: return self.T.dot(np.transpose(other)).T - except ValueError as err: - if "shape mismatch" not in str(err): - raise + except DotMismatchShapeError as err: # GH#21581 give exception message for original shapes msg = f"shapes {np.shape(other)} and {self.shape} not aligned" raise ValueError(msg) from err + except ValueError: + raise # ---------------------------------------------------------------------- # IO methods (to / from other formats) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1bdf92e1dcf02..b824602c27bb4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -62,7 +62,10 @@ npt, ) from pandas.compat.numpy import function as nv -from pandas.errors import InvalidIndexError +from pandas.errors import ( + DotMismatchShapeError, + InvalidIndexError, +) from pandas.util._decorators import ( Appender, Substitution, @@ -2881,7 +2884,7 @@ def dot(self, other: AnyArrayLike) -> Series | np.ndarray: lvals = self.values rvals = np.asarray(other) if lvals.shape[0] != rvals.shape[0]: - raise Exception( + raise DotMismatchShapeError( f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}" ) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index f8eb05278eea5..775c1668f5542 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -588,6 +588,16 @@ class NoObjectConcatenateError(ValueError): """ +class DotMismatchShapeError(ValueError): + """ + Exception is raised by dot(). + + Instead of ValueError(f"Dot product shape mismatch, + {lvals.shape} vs {rvals.shape}") + . + """ + + __all__ = [ "AbstractMethodError", "AccessorRegistrationWarning", @@ -597,6 +607,7 @@ class NoObjectConcatenateError(ValueError): "CSSWarning", "DatabaseError", "DataError", + "DotMismatchShapeError", "DtypeWarning", "DuplicateLabelError", "EmptyDataError", diff --git a/pandas/tests/frame/methods/test_dot.py b/pandas/tests/frame/methods/test_dot.py index 555e5f0e26eaf..721200a54db1e 100644 --- a/pandas/tests/frame/methods/test_dot.py +++ b/pandas/tests/frame/methods/test_dot.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.errors import DotMismatchShapeError + from pandas import ( DataFrame, Series, @@ -70,8 +72,8 @@ def test_dot_aligns(self, obj, other, expected): def test_dot_shape_mismatch(self, obj): msg = "Dot product shape mismatch" - # exception raised is of type Exception - with pytest.raises(Exception, match=msg): + # exception raised is of DotMismatchShapeError + with pytest.raises(DotMismatchShapeError, match=msg): obj.dot(obj.values[:3]) def test_dot_misaligned(self, obj, other): diff --git a/pandas/tests/series/methods/test_matmul.py b/pandas/tests/series/methods/test_matmul.py index b944395bff29f..21eb49de290df 100644 --- a/pandas/tests/series/methods/test_matmul.py +++ b/pandas/tests/series/methods/test_matmul.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.errors import DotMismatchShapeError + from pandas import ( DataFrame, Series, @@ -70,8 +72,8 @@ def test_matmul(self): tm.assert_series_equal(result, expected) msg = r"Dot product shape mismatch, \(4,\) vs \(3,\)" - # exception raised is of type Exception - with pytest.raises(Exception, match=msg): + # exception raised is of DotMismatchShapeError + with pytest.raises(DotMismatchShapeError, match=msg): a.dot(a.values[:3]) msg = "matrices are not aligned" with pytest.raises(ValueError, match=msg): From c5d874f6c273dc7bfbb894c5ed3db3456bcf2d2e Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 25 Dec 2022 12:35:57 +0800 Subject: [PATCH 7/8] fix core/apply.py --- doc/source/reference/testing.rst | 1 + pandas/core/apply.py | 22 ++++++++++++---------- pandas/core/internals/construction.py | 3 ++- pandas/errors/__init__.py | 16 ++++++++++++---- pandas/tests/frame/test_constructors.py | 9 +++++++-- 5 files changed, 34 insertions(+), 17 deletions(-) diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index d2a0201e19a36..b3ac87f57bbfc 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -32,6 +32,7 @@ Exceptions and warnings errors.CSSWarning errors.DatabaseError errors.DataError + errors.DiffArrayLengthError errors.DotMismatchShapeError errors.DtypeWarning errors.DuplicateLabelError diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 722de91ba5246..8fd0b6de21248 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -33,7 +33,10 @@ NDFrameT, npt, ) -from pandas.errors import SpecificationError +from pandas.errors import ( + DiffArrayLengthError, + SpecificationError, +) from pandas.util._decorators import cache_readonly from pandas.core.dtypes.cast import is_nested_object @@ -864,15 +867,14 @@ def wrap_results_for_axis( try: result = self.obj._constructor(data=results) - except ValueError as err: - if "All arrays must be of the same length" in str(err): - # e.g. result = [[2, 3], [1.5], ['foo', 'bar']] - # see test_agg_listlike_result GH#29587 - res = self.obj._constructor_sliced(results) - res.index = res_index - return res - else: - raise + except DiffArrayLengthError: + # e.g. result = [[2, 3], [1.5], ['foo', 'bar']] + # see test_agg_listlike_result GH#29587 + res = self.obj._constructor_sliced(results) + res.index = res_index + return res + except ValueError: + raise if not isinstance(results[0], ABCSeries): if len(result.index) == len(self.res_columns): diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 9bdfd7991689b..a9993971c5c04 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -21,6 +21,7 @@ Manager, npt, ) +from pandas.errors import DiffArrayLengthError from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, @@ -623,7 +624,7 @@ def _extract_index(data) -> Index: if have_raw_arrays: lengths = list(set(raw_lengths)) if len(lengths) > 1: - raise ValueError("All arrays must be of the same length") + raise DiffArrayLengthError("All arrays must be of the same length") if have_dicts: raise ValueError( diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 775c1668f5542..d1a48c4410f99 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -590,11 +590,18 @@ class NoObjectConcatenateError(ValueError): class DotMismatchShapeError(ValueError): """ - Exception is raised by dot(). + Exception is raised by dot(). - Instead of ValueError(f"Dot product shape mismatch, - {lvals.shape} vs {rvals.shape}") - . + Instead of ValueError(f"Dot product shape mismatch, + {lvals.shape} vs {rvals.shape}"). + """ + + +class DiffArrayLengthError(ValueError): + """ + Exception is raised by _extract_index(). + + Instead of ValueError("All arrays must be of the same length"). """ @@ -607,6 +614,7 @@ class DotMismatchShapeError(ValueError): "CSSWarning", "DatabaseError", "DataError", + "DiffArrayLengthError", "DotMismatchShapeError", "DtypeWarning", "DuplicateLabelError", diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 25f82eb7ff4b3..485a9b7b00914 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -18,7 +18,10 @@ import pytest import pytz -from pandas.errors import IntCastingNaNError +from pandas.errors import ( + DiffArrayLengthError, + IntCastingNaNError, +) import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_integer_dtype @@ -1482,7 +1485,9 @@ class CustomDict(dict): def test_constructor_ragged(self): data = {"A": np.random.randn(10), "B": np.random.randn(8)} - with pytest.raises(ValueError, match="All arrays must be of the same length"): + with pytest.raises( + DiffArrayLengthError, match="All arrays must be of the same length" + ): DataFrame(data) def test_constructor_scalar(self): From 09c4ece51245859aa7d02616fa874240f36f4201 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 25 Dec 2022 13:01:01 +0800 Subject: [PATCH 8/8] fix core/indexes/base.py --- doc/source/reference/testing.rst | 2 ++ pandas/core/construction.py | 10 ++++++++-- pandas/core/indexes/base.py | 12 +++++++----- pandas/errors/__init__.py | 18 ++++++++++++++++++ pandas/tests/indexing/test_indexing.py | 14 +++++++++++--- pandas/tests/series/test_constructors.py | 7 +++++-- 6 files changed, 51 insertions(+), 12 deletions(-) diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index b3ac87f57bbfc..be996540cf0ec 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -32,6 +32,7 @@ Exceptions and warnings errors.CSSWarning errors.DatabaseError errors.DataError + errors.DataOneDimensionalError errors.DiffArrayLengthError errors.DotMismatchShapeError errors.DtypeWarning @@ -39,6 +40,7 @@ Exceptions and warnings errors.EmptyDataError errors.IncompatibilityWarning errors.IndexingError + errors.IndexSpecifiedError errors.InvalidColumnName errors.InvalidComparison errors.InvalidIndexError diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 5a80fdb6d9e0e..70cee7bf8b069 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -27,6 +27,10 @@ DtypeObj, T, ) +from pandas.errors import ( + DataOneDimensionalError, + IndexSpecifiedError, +) from pandas.core.dtypes.base import ( ExtensionDtype, @@ -540,7 +544,9 @@ def sanitize_array( if not is_list_like(data): if index is None: - raise ValueError("index must be specified when data is not list-like") + raise IndexSpecifiedError( + "index must be specified when data is not list-like" + ) data = construct_1d_arraylike_from_scalar(data, len(index), dtype) return data @@ -666,7 +672,7 @@ def _sanitize_ndim( if isinstance(data, np.ndarray): if allow_2d: return result - raise ValueError("Data must be 1-dimensional") + raise DataOneDimensionalError("Data must be 1-dimensional") if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype): # i.e. PandasDtype("O") diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1938dc6d5c7b4..63c4d34c0a54c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -60,7 +60,9 @@ ) from pandas.compat.numpy import function as nv from pandas.errors import ( + DataOneDimensionalError, DuplicateLabelError, + IndexSpecifiedError, InvalidIndexError, ) from pandas.util._decorators import ( @@ -500,11 +502,11 @@ def __new__( try: arr = sanitize_array(data, None, dtype=dtype, copy=copy) - except ValueError as err: - if "index must be specified when data is not list-like" in str(err): - raise cls._raise_scalar_data_error(data) from err - if "Data must be 1-dimensional" in str(err): - raise ValueError("Index data must be 1-dimensional") from err + except IndexSpecifiedError as err: + raise cls._raise_scalar_data_error(data) from err + except DataOneDimensionalError as err: + raise ValueError("Index data must be 1-dimensional") from err + except ValueError: raise arr = ensure_wrapped_if_datetimelike(arr) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index d1a48c4410f99..4d319deaf3dae 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -605,6 +605,22 @@ class DiffArrayLengthError(ValueError): """ +class IndexSpecifiedError(ValueError): + """ + Exception is raised by sanitize_array(). + + Instead of ValueError("Index must be specified when using an array as input"). + """ + + +class DataOneDimensionalError(ValueError): + """ + Exception is raised by _sanitize_ndim(). + + Instead of ValueError("Data must be 1-dimensional"). + """ + + __all__ = [ "AbstractMethodError", "AccessorRegistrationWarning", @@ -614,12 +630,14 @@ class DiffArrayLengthError(ValueError): "CSSWarning", "DatabaseError", "DataError", + "DataOneDimensionalError", "DiffArrayLengthError", "DotMismatchShapeError", "DtypeWarning", "DuplicateLabelError", "EmptyDataError", "IncompatibilityWarning", + "IndexSpecifiedError", "IntCastingNaNError", "InvalidColumnName", "InvalidComparison", diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 35da972dd1a81..673a77dd6a21c 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -8,7 +8,10 @@ import numpy as np import pytest -from pandas.errors import IndexingError +from pandas.errors import ( + DataOneDimensionalError, + IndexingError, +) from pandas.core.dtypes.common import ( is_float_dtype, @@ -110,7 +113,12 @@ def test_getitem_ndarray_3d( msg = "|".join(msgs) - potential_errors = (IndexError, ValueError, NotImplementedError) + potential_errors = ( + IndexError, + ValueError, + NotImplementedError, + DataOneDimensionalError, + ) with pytest.raises(potential_errors, match=msg): idxr[nd3] @@ -124,7 +132,7 @@ def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli): err = ValueError msg = f"Cannot set values with ndim > {obj.ndim}" else: - err = ValueError + err = (ValueError, DataOneDimensionalError) msg = "|".join( [ r"Buffer has wrong number of dimensions \(expected 1, got 3\)", diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a4e82838b61d3..a4030d33234e3 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -15,7 +15,10 @@ lib, ) from pandas.compat import is_numpy_dev -from pandas.errors import IntCastingNaNError +from pandas.errors import ( + DataOneDimensionalError, + IntCastingNaNError, +) import pandas.util._test_decorators as td from pandas.core.dtypes.common import ( @@ -172,7 +175,7 @@ def test_constructor(self, datetime_series): assert not Series().index._is_all_dates # exception raised is of type ValueError GH35744 - with pytest.raises(ValueError, match="Data must be 1-dimensional"): + with pytest.raises(DataOneDimensionalError, match="Data must be 1-dimensional"): Series(np.random.randn(3, 3), index=np.arange(3)) mixed.name = "Series"