diff --git a/Makefile b/Makefile index 4a9a48992f92f..b915d8840cd8d 100644 --- a/Makefile +++ b/Makefile @@ -32,3 +32,9 @@ check: --included-file-extensions="py" \ --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \ pandas/ + + python3 scripts/validate_unwanted_patterns.py \ + --validation-type="private_import_across_module" \ + --included-file-extensions="py" \ + --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ + pandas/ diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 875f1dbb83ce3..54aa830379c07 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -116,11 +116,19 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then fi RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Check for use of private module attribute access' ; echo $MSG + MSG='Check for import of private attributes across modules' ; echo $MSG if [[ "$GITHUB_ACTIONS" == "true" ]]; then - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/ + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/ else - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/ + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/ + fi + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for use of private functions across modules' ; echo $MSG + if [[ "$GITHUB_ACTIONS" == "true" ]]; then + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/ + else + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/ fi RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 6302b48cb1978..b013246e724de 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -54,7 +54,7 @@ from pandas.core import missing, nanops, ops from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts -from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.arrays.base import ExtensionOpsMixin import pandas.core.common as com from pandas.core.construction import array, extract_array @@ -472,11 +472,11 @@ class DatetimeLikeArrayMixin( def _ndarray(self) -> np.ndarray: return self._data - def _from_backing_data(self: _T, arr: np.ndarray) -> _T: + def _from_backing_data( + self: DatetimeLikeArrayT, arr: np.ndarray + ) -> DatetimeLikeArrayT: # Note: we do not retain `freq` - return type(self)._simple_new( # type: ignore[attr-defined] - arr, dtype=self.dtype - ) + return type(self)._simple_new(arr, dtype=self.dtype) # ------------------------------------------------------------------ diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index d83ff91a1315f..dc08e018397bc 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -106,7 +106,7 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: [t.numpy_dtype if isinstance(t, BaseMaskedDtype) else t for t in dtypes], [] ) if np.issubdtype(np_dtype, np.integer): - return _dtypes[str(np_dtype)] + return STR_TO_DTYPE[str(np_dtype)] return None def __from_arrow__( @@ -214,7 +214,7 @@ def coerce_to_array( if not issubclass(type(dtype), _IntegerDtype): try: - dtype = _dtypes[str(np.dtype(dtype))] + dtype = STR_TO_DTYPE[str(np.dtype(dtype))] except KeyError as err: raise ValueError(f"invalid dtype specified {dtype}") from err @@ -354,7 +354,7 @@ class IntegerArray(BaseMaskedArray): @cache_readonly def dtype(self) -> _IntegerDtype: - return _dtypes[str(self._data.dtype)] + return STR_TO_DTYPE[str(self._data.dtype)] def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]): @@ -735,7 +735,7 @@ class UInt64Dtype(_IntegerDtype): __doc__ = _dtype_docstring.format(dtype="uint64") -_dtypes: Dict[str, _IntegerDtype] = { +STR_TO_DTYPE: Dict[str, _IntegerDtype] = { "int8": Int8Dtype(), "int16": Int16Dtype(), "int32": Int32Dtype(), diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ba1b0b075936d..64ccc0be0a25d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1151,9 +1151,11 @@ def convert_dtypes( target_int_dtype = "Int64" if is_integer_dtype(input_array.dtype): - from pandas.core.arrays.integer import _dtypes + from pandas.core.arrays.integer import STR_TO_DTYPE - inferred_dtype = _dtypes.get(input_array.dtype.name, target_int_dtype) + inferred_dtype = STR_TO_DTYPE.get( + input_array.dtype.name, target_int_dtype + ) if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( input_array.dtype ): diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1e3e56f4ff09f..8a55d438cf8d4 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -459,7 +459,7 @@ def f(self): @contextmanager -def group_selection_context(groupby: "_GroupBy"): +def group_selection_context(groupby: "BaseGroupBy"): """ Set / reset the group_selection_context. """ @@ -479,7 +479,7 @@ def group_selection_context(groupby: "_GroupBy"): ] -class _GroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]): +class BaseGroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]): _group_selection = None _apply_allowlist: FrozenSet[str] = frozenset() @@ -1212,7 +1212,7 @@ def _apply_filter(self, indices, dropna): OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame) -class GroupBy(_GroupBy[FrameOrSeries]): +class GroupBy(BaseGroupBy[FrameOrSeries]): """ Class for grouping and aggregating relational data. diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f0b80c2852bd5..f269495f6011a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -312,9 +312,9 @@ def _is_dates_only(self) -> bool: ------- bool """ - from pandas.io.formats.format import _is_dates_only + from pandas.io.formats.format import is_dates_only - return self.tz is None and _is_dates_only(self._values) + return self.tz is None and is_dates_only(self._values) def __reduce__(self): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 7b5154756e613..44848e4d43909 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -26,7 +26,12 @@ from pandas.core.generic import NDFrame, _shared_docs from pandas.core.groupby.base import GroupByMixin from pandas.core.groupby.generic import SeriesGroupBy -from pandas.core.groupby.groupby import GroupBy, _GroupBy, _pipe_template, get_groupby +from pandas.core.groupby.groupby import ( + BaseGroupBy, + GroupBy, + _pipe_template, + get_groupby, +) from pandas.core.groupby.grouper import Grouper from pandas.core.groupby.ops import BinGrouper from pandas.core.indexes.api import Index @@ -40,7 +45,7 @@ _shared_docs_kwargs: Dict[str, str] = dict() -class Resampler(_GroupBy, ShallowMixin): +class Resampler(BaseGroupBy, ShallowMixin): """ Class for resampling datetimelike data, a groupby-like operation. See aggregate, transform, and apply functions on this object. diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 2bd36d8bff155..4282cb41c4e91 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -15,7 +15,7 @@ import pandas.core.common as common from pandas.core.window.common import _doc_template, _shared_docs, zsqrt -from pandas.core.window.rolling import _Rolling, flex_binary_moment +from pandas.core.window.rolling import RollingMixin, flex_binary_moment _bias_template = """ Parameters @@ -60,7 +60,7 @@ def get_center_of_mass( return float(comass) -class ExponentialMovingWindow(_Rolling): +class ExponentialMovingWindow(RollingMixin): r""" Provide exponential weighted (EW) functions. diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index ce4ab2f98c23d..46e002324ec75 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -5,10 +5,10 @@ from pandas.util._decorators import Appender, Substitution, doc from pandas.core.window.common import WindowGroupByMixin, _doc_template, _shared_docs -from pandas.core.window.rolling import _Rolling_and_Expanding +from pandas.core.window.rolling import RollingAndExpandingMixin -class Expanding(_Rolling_and_Expanding): +class Expanding(RollingAndExpandingMixin): """ Provide expanding transformations. diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 5a7482076903c..648ab4d25be83 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1214,13 +1214,13 @@ def std(self, ddof=1, *args, **kwargs): return zsqrt(self.var(ddof=ddof, name="std", **kwargs)) -class _Rolling(_Window): +class RollingMixin(_Window): @property def _constructor(self): return Rolling -class _Rolling_and_Expanding(_Rolling): +class RollingAndExpandingMixin(RollingMixin): _shared_docs["count"] = dedent( r""" @@ -1917,7 +1917,7 @@ def _get_corr(a, b): ) -class Rolling(_Rolling_and_Expanding): +class Rolling(RollingAndExpandingMixin): @cache_readonly def is_datetimelike(self) -> bool: return isinstance( diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6781d98ded41d..77f2a53fc7fab 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1586,7 +1586,7 @@ def format_percentiles( return [i + "%" for i in out] -def _is_dates_only( +def is_dates_only( values: Union[np.ndarray, DatetimeArray, Index, DatetimeIndex] ) -> bool: # return a boolean if we are only dates (and don't have a timezone) @@ -1658,8 +1658,8 @@ def get_format_datetime64_from_values( # only accepts 1D values values = values.ravel() - is_dates_only = _is_dates_only(values) - if is_dates_only: + ido = is_dates_only(values) + if ido: return date_format or "%Y-%m-%d" return date_format @@ -1668,9 +1668,9 @@ class Datetime64TZFormatter(Datetime64Formatter): def _format_strings(self) -> List[str]: """ we by definition have a TZ """ values = self.values.astype(object) - is_dates_only = _is_dates_only(values) + ido = is_dates_only(values) formatter = self.formatter or get_format_datetime64( - is_dates_only, date_format=self.date_format + ido, date_format=self.date_format ) fmt_values = [formatter(x) for x in values] diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 2add2b8c62a4e..4a0e859535215 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -18,6 +18,39 @@ import tokenize from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple +PRIVATE_IMPORTS_TO_IGNORE: Set[str] = { + "_extension_array_shared_docs", + "_index_shared_docs", + "_interval_shared_docs", + "_merge_doc", + "_shared_docs", + "_apply_docs", + "_new_Index", + "_new_PeriodIndex", + "_doc_template", + "_agg_template", + "_pipe_template", + "_get_version", + "__main__", + "_transform_template", + "_arith_doc_FRAME", + "_flex_comp_doc_FRAME", + "_make_flex_doc", + "_op_descriptions", + "_IntegerDtype", + "_use_inf_as_na", + "_get_plot_backend", + "_matplotlib", + "_arrow_utils", + "_registry", + "_get_offset", # TODO: remove after get_offset deprecation enforced + "_test_parse_iso8601", + "_json_normalize", # TODO: remove after deprecation is enforced + "_testing", + "_test_decorators", + "__version__", # check np.__version__ in compat.numpy.function +} + def _get_literal_string_prefix_len(token_string: str) -> int: """ @@ -164,6 +197,36 @@ def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str yield (node.lineno, f"Private function '{module_name}.{function_name}'") +def private_import_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: + """ + Checking that a private function is not imported across modules. + Parameters + ---------- + file_obj : IO + File-like object containing the Python code to validate. + Yields + ------ + line_number : int + Line number of import statement, that imports the private function. + msg : str + Explenation of the error. + """ + contents = file_obj.read() + tree = ast.parse(contents) + + for node in ast.walk(tree): + if not (isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom)): + continue + + for module in node.names: + module_name = module.name.split(".")[-1] + if module_name in PRIVATE_IMPORTS_TO_IGNORE: + continue + + if module_name.startswith("_"): + yield (node.lineno, f"Import of internal function {repr(module_name)}") + + def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: """ This test case is necessary after 'Black' (https://github.com/psf/black), @@ -419,6 +482,7 @@ def main( available_validation_types: List[str] = [ "bare_pytest_raises", "private_function_across_module", + "private_import_across_module", "strings_to_concatenate", "strings_with_wrong_placed_whitespace", ] @@ -449,7 +513,7 @@ def main( parser.add_argument( "--excluded-file-paths", default="asv_bench/env", - help="Comma separated file extensions to check.", + help="Comma separated file paths to exclude.", ) args = parser.parse_args()