From 22d017e7ac287b612f7d4f45bb1357bec11f8c36 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 3 Oct 2022 09:10:49 -0700 Subject: [PATCH 1/7] start script to check exceptions only raise --- .pre-commit-config.yaml | 6 ++++ pandas/core/indexes/base.py | 13 ++++++-- scripts/no_return_exception.py | 59 ++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 scripts/no_return_exception.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 039b3c4e4d86c..8bbdf87bf850d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -226,6 +226,12 @@ repos: entry: python scripts/no_bool_in_generic.py language: python files: ^pandas/core/generic\.py$ + - id: no-return-exception + name: Raise Exceptions instead of Return + entry: python scripts/no_return_exception.py + language: python + files: ^pandas/ + types: [python] - id: pandas-errors-documented name: Ensure pandas errors are documented in doc/source/reference/testing.rst entry: python scripts/pandas_errors_documented.py diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 47e3314c6d9c3..014a52ccfd86e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4386,14 +4386,23 @@ def is_int(v): return indexer @final - def _invalid_indexer(self, form: str_t, key) -> TypeError: + def _invalid_indexer( + self, + form: str_t, + key, + reraise: lib.no_default | None | Exception = lib.no_default, + ) -> None: """ Consistent invalid indexer message. """ - return TypeError( + msg = ( f"cannot do {form} indexing on {type(self).__name__} with these " f"indexers [{key}] of type {type(key).__name__}" ) + if reraise is not lib.no_default: + raise TypeError(msg) from reraise + else: + raise TypeError(msg) # -------------------------------------------------------------------- # Reindex Methods diff --git a/scripts/no_return_exception.py b/scripts/no_return_exception.py new file mode 100644 index 0000000000000..40eec5dd7dee4 --- /dev/null +++ b/scripts/no_return_exception.py @@ -0,0 +1,59 @@ +""" +Check that functions return isn't use for Exception or its subclasses. + +This ensures public methods don't accidentally return exceptions instead of raising. + +This is meant to be run as a pre-commit hook - to run it manually, you can do: + + pre-commit run no-return-exception --all-files + +""" +from __future__ import annotations + +import argparse +import ast +import sys +from typing import Sequence + + +ERROR_MESSAGE = ( + "{path}:{lineno}:{col_offset}: " + "Don't return an Exception subclass, raise it instead\n" +) + + +class Visitor(ast.NodeVisitor): + def __init__(self, path: str) -> None: + self.path = path + + def visit_FunctionDef(self, node) -> None: + returns = node.returns + name = getattr(returns, "id", None) + exception_ends = ("Exit", "Interrupt", "Exception", "Error", "Iteration") + if name is not None and any(name.endswith(end) for end in exception_ends): + msg = ERROR_MESSAGE.format( + path=self.path, lineno=node.lineno, col_offset=node.col_offset + ) + sys.stdout.write(msg) + sys.exit(1) + + +def no_return_exception(content: str, path: str) -> None: + tree = ast.parse(content) + visitor = Visitor(path) + visitor.visit(tree) + + +def main(argv: Sequence[str] | None = None) -> None: + parser = argparse.ArgumentParser() + parser.add_argument("paths", nargs="*") + args = parser.parse_args(argv) + + for path in args.paths: + with open(path, encoding="utf-8") as fd: + content = fd.read() + no_return_exception(content, path) + + +if __name__ == "__main__": + main() From af8699932f18113841d6bc92268ce194a09e1b88 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 3 Oct 2022 09:11:07 -0700 Subject: [PATCH 2/7] start script to check exceptions only raise --- scripts/no_return_exception.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/no_return_exception.py b/scripts/no_return_exception.py index 40eec5dd7dee4..b9bf61fba5404 100644 --- a/scripts/no_return_exception.py +++ b/scripts/no_return_exception.py @@ -15,7 +15,6 @@ import sys from typing import Sequence - ERROR_MESSAGE = ( "{path}:{lineno}:{col_offset}: " "Don't return an Exception subclass, raise it instead\n" From fe5a0acec34c16728f279383cc043cce0250c5d0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 4 Oct 2022 16:16:00 -0700 Subject: [PATCH 3/7] Refactor function to raise --- pandas/core/indexes/base.py | 8 ++++---- pandas/core/indexes/datetimelike.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 014a52ccfd86e..333d5a0c3246f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4386,14 +4386,14 @@ def is_int(v): return indexer @final - def _invalid_indexer( + def _raise_invalid_indexer( self, form: str_t, key, reraise: lib.no_default | None | Exception = lib.no_default, ) -> None: """ - Consistent invalid indexer message. + Raise consistent invalid indexer message. """ msg = ( f"cannot do {form} indexing on {type(self).__name__} with these " @@ -6691,7 +6691,7 @@ def _validate_indexer(self, form: str_t, key, kind: str_t): assert kind in ["getitem", "iloc"] if key is not None and not is_integer(key): - raise self._invalid_indexer(form, key) + self._raise_invalid_indexer(form, key) def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default): """ @@ -6723,7 +6723,7 @@ def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default): # datetimelike Indexes # reject them, if index does not contain label if (is_float(label) or is_integer(label)) and label not in self: - raise self._invalid_indexer("slice", label) + self._raise_invalid_indexer("slice", label) return label diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 147d1649e5e1b..e3d766709f52f 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -314,12 +314,12 @@ def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): # DTI -> parsing.DateParseError # TDI -> 'unit abbreviation w/o a number' # PI -> string cannot be parsed as datetime-like - raise self._invalid_indexer("slice", label) from err + self._raise_invalid_indexer("slice", label, err) lower, upper = self._parsed_string_to_bounds(reso, parsed) return lower if side == "left" else upper elif not isinstance(label, self._data._recognized_scalars): - raise self._invalid_indexer("slice", label) + self._raise_invalid_indexer("slice", label) return label From 1f7bb4342c3038c493aa811b507edc6dc8f0c197 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 4 Oct 2022 17:07:38 -0700 Subject: [PATCH 4/7] Use pygrep instead of script --- .pre-commit-config.yaml | 7 ++-- pandas/core/indexes/base.py | 8 ++--- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/numeric.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- pandas/core/internals/managers.py | 14 ++++---- pandas/io/pytables.py | 17 +++++---- scripts/no_return_exception.py | 58 ------------------------------- 10 files changed, 28 insertions(+), 86 deletions(-) delete mode 100644 scripts/no_return_exception.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 157f753a1c83b..8d1d45a1f0909 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -227,11 +227,12 @@ repos: language: python files: ^pandas/core/generic\.py$ - id: no-return-exception - name: Raise Exceptions instead of Return - entry: python scripts/no_return_exception.py - language: python + name: Use raise instead of return for exceptions + language: pygrep + entry: 'return [A-Za-z]+(Error|Exit|Interrupt|Exception|Iteration)' files: ^pandas/ types: [python] + exclude: ^pandas/tests/ - id: pandas-errors-documented name: Ensure pandas errors are documented in doc/source/reference/testing.rst entry: python scripts/pandas_errors_documented.py diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 333d5a0c3246f..583d8a1fd37e9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -554,7 +554,7 @@ def __new__( return klass._simple_new(arr, name) elif is_scalar(data): - raise cls._scalar_data_error(data) + cls._raise_scalar_data_error(data) elif hasattr(data, "__array__"): return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) else: @@ -5288,10 +5288,10 @@ def where(self, cond, other=None) -> Index: # construction helpers @final @classmethod - def _scalar_data_error(cls, data): + def _raise_scalar_data_error(cls, data): # We return the TypeError so that we can raise it from the constructor # in order to keep mypy happy - return TypeError( + raise TypeError( f"{cls.__name__}(...) must be called with a collection of some " f"kind, {repr(data)} was passed" ) @@ -6683,7 +6683,7 @@ def _maybe_cast_listlike_indexer(self, target) -> Index: return ensure_index(target) @final - def _validate_indexer(self, form: str_t, key, kind: str_t): + def _validate_indexer(self, form: str_t, key, kind: str_t) -> None: """ If we are positional indexer, validate that we have appropriate typed bounds must be an integer. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d1bdedee5caa0..92c9cc1500962 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -230,7 +230,7 @@ def __new__( data = [] if is_scalar(data): - raise cls._scalar_data_error(data) + cls._raise_scalar_data_error(data) data = Categorical( data, categories=categories, ordered=ordered, dtype=dtype, copy=copy diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 99571de85561e..c0e3507dbab0b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -334,7 +334,7 @@ def __new__( ) -> DatetimeIndex: if is_scalar(data): - raise cls._scalar_data_error(data) + cls._raise_scalar_data_error(data) # - Cases checked above all return/raise before reaching here - # diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 946a6b818d4d4..186e45e6f4932 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -140,7 +140,7 @@ def _ensure_array(cls, data, dtype, copy: bool): if not isinstance(data, (np.ndarray, Index)): # Coerce to ndarray if not already ndarray or Index if is_scalar(data): - raise cls._scalar_data_error(data) + cls._raise_scalar_data_error(data) # other iterable of some kind if not isinstance(data, (ABCSeries, list, tuple)): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 5fb98b3bcd234..41a9fc8fa8e21 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -240,7 +240,7 @@ def __new__( # range-based. if not fields: # test_pickle_compat_construction - raise cls._scalar_data_error(None) + cls._raise_scalar_data_error(None) data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields) # PeriodArray._generate range does validation that fields is diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index f41e82897c208..eba7d79132e0b 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -128,7 +128,7 @@ def __new__( name = maybe_extract_name(name, data, cls) if is_scalar(data): - raise cls._scalar_data_error(data) + cls._raise_scalar_data_error(data) if unit in {"Y", "y", "M"}: raise ValueError( diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5a398533e1510..f3b7af0ea819d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1038,7 +1038,7 @@ def _verify_integrity(self) -> None: tot_items = sum(len(x.mgr_locs) for x in self.blocks) for block in self.blocks: if block.shape[1:] != mgr_shape[1:]: - raise construction_error(tot_items, block.shape[1:], self.axes) + raise_construction_error(tot_items, block.shape[1:], self.axes) if len(self.items) != tot_items: raise AssertionError( "Number of manager items must equal union of " @@ -2145,7 +2145,7 @@ def create_block_manager_from_blocks( except ValueError as err: arrays = [blk.values for blk in blocks] tot_items = sum(arr.shape[0] for arr in arrays) - raise construction_error(tot_items, arrays[0].shape[1:], axes, err) + raise_construction_error(tot_items, arrays[0].shape[1:], axes, err) if consolidate: mgr._consolidate_inplace() @@ -2172,13 +2172,13 @@ def create_block_manager_from_column_arrays( blocks = _form_blocks(arrays, consolidate) mgr = BlockManager(blocks, axes, verify_integrity=False) except ValueError as e: - raise construction_error(len(arrays), arrays[0].shape, axes, e) + raise_construction_error(len(arrays), arrays[0].shape, axes, e) if consolidate: mgr._consolidate_inplace() return mgr -def construction_error( +def raise_construction_error( tot_items: int, block_shape: Shape, axes: list[Index], @@ -2198,10 +2198,10 @@ def construction_error( # We return the exception object instead of raising it so that we # can raise it in the caller; mypy plays better with that if passed == implied and e is not None: - return e + raise e if block_shape[0] == 0: - return ValueError("Empty data passed with indices specified.") - return ValueError(f"Shape of passed values is {passed}, indices imply {implied}") + raise ValueError("Empty data passed with indices specified.") + raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") # ----------------------------------------------------------------------- diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 6836420483a24..d93590e750c85 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1658,13 +1658,6 @@ def _create_storer( if value is not None and not isinstance(value, (Series, DataFrame)): raise TypeError("value must be None, Series, or DataFrame") - def error(t): - # return instead of raising so mypy can tell where we are raising - return TypeError( - f"cannot properly create the storer for: [{t}] [group->" - f"{group},value->{type(value)},format->{format}" - ) - pt = _ensure_decoded(getattr(group._v_attrs, "pandas_type", None)) tt = _ensure_decoded(getattr(group._v_attrs, "table_type", None)) @@ -1699,7 +1692,10 @@ def error(t): try: cls = _STORER_MAP[pt] except KeyError as err: - raise error("_STORER_MAP") from err + raise TypeError( + f"cannot properly create the storer for: [_STORER_MAP] [group->" + f"{group},value->{type(value)},format->{format}" + ) from err return cls(self, group, encoding=encoding, errors=errors) # existing node (and must be a table) @@ -1732,7 +1728,10 @@ def error(t): try: cls = _TABLE_MAP[tt] except KeyError as err: - raise error("_TABLE_MAP") from err + raise TypeError( + f"cannot properly create the storer for: [_TABLE_MAP] [group->" + f"{group},value->{type(value)},format->{format}" + ) from err return cls(self, group, encoding=encoding, errors=errors) diff --git a/scripts/no_return_exception.py b/scripts/no_return_exception.py deleted file mode 100644 index b9bf61fba5404..0000000000000 --- a/scripts/no_return_exception.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -Check that functions return isn't use for Exception or its subclasses. - -This ensures public methods don't accidentally return exceptions instead of raising. - -This is meant to be run as a pre-commit hook - to run it manually, you can do: - - pre-commit run no-return-exception --all-files - -""" -from __future__ import annotations - -import argparse -import ast -import sys -from typing import Sequence - -ERROR_MESSAGE = ( - "{path}:{lineno}:{col_offset}: " - "Don't return an Exception subclass, raise it instead\n" -) - - -class Visitor(ast.NodeVisitor): - def __init__(self, path: str) -> None: - self.path = path - - def visit_FunctionDef(self, node) -> None: - returns = node.returns - name = getattr(returns, "id", None) - exception_ends = ("Exit", "Interrupt", "Exception", "Error", "Iteration") - if name is not None and any(name.endswith(end) for end in exception_ends): - msg = ERROR_MESSAGE.format( - path=self.path, lineno=node.lineno, col_offset=node.col_offset - ) - sys.stdout.write(msg) - sys.exit(1) - - -def no_return_exception(content: str, path: str) -> None: - tree = ast.parse(content) - visitor = Visitor(path) - visitor.visit(tree) - - -def main(argv: Sequence[str] | None = None) -> None: - parser = argparse.ArgumentParser() - parser.add_argument("paths", nargs="*") - args = parser.parse_args(argv) - - for path in args.paths: - with open(path, encoding="utf-8") as fd: - content = fd.read() - no_return_exception(content, path) - - -if __name__ == "__main__": - main() From 53e0f72e12e328c3025412fbdb4ec9359728a331 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 5 Oct 2022 09:59:12 -0700 Subject: [PATCH 5/7] Fix typing --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 583d8a1fd37e9..0a0ede0242e81 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4390,7 +4390,7 @@ def _raise_invalid_indexer( self, form: str_t, key, - reraise: lib.no_default | None | Exception = lib.no_default, + reraise: lib.NoDefault | None | Exception = lib.no_default, ) -> None: """ Raise consistent invalid indexer message. From 3e2c54b77de0b8e795b7808f32c4a9dd6f688659 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 5 Oct 2022 11:40:30 -0700 Subject: [PATCH 6/7] Rase --- pandas/core/indexes/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0a0ede0242e81..eed1d648865bb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -554,7 +554,7 @@ def __new__( return klass._simple_new(arr, name) elif is_scalar(data): - cls._raise_scalar_data_error(data) + raise cls._raise_scalar_data_error(data) elif hasattr(data, "__array__"): return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) else: @@ -5288,7 +5288,7 @@ def where(self, cond, other=None) -> Index: # construction helpers @final @classmethod - def _raise_scalar_data_error(cls, data): + def _raise_scalar_data_error(cls, data) -> None: # We return the TypeError so that we can raise it from the constructor # in order to keep mypy happy raise TypeError( From 0efdede0902e2bf0d91019419811c27b095e7edb Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 5 Oct 2022 15:07:06 -0700 Subject: [PATCH 7/7] Fix typing --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index eed1d648865bb..5bbdca9b71ee3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5288,7 +5288,7 @@ def where(self, cond, other=None) -> Index: # construction helpers @final @classmethod - def _raise_scalar_data_error(cls, data) -> None: + def _raise_scalar_data_error(cls, data): # We return the TypeError so that we can raise it from the constructor # in order to keep mypy happy raise TypeError(