From c7dfaf5ada31dc5ad646ac8e683a220e133c4bda Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Sep 2021 19:02:34 -0700 Subject: [PATCH 1/3] CLN: typos --- asv_bench/benchmarks/groupby.py | 2 +- pandas/_libs/internals.pyi | 2 +- pandas/_libs/reduction.pyx | 13 ------------- pandas/_libs/tslibs/strptime.pyi | 2 +- pandas/core/groupby/grouper.py | 2 +- pandas/core/indexes/base.py | 8 +++++--- pandas/core/internals/array_manager.py | 2 +- pandas/core/internals/blocks.py | 1 + pandas/core/internals/managers.py | 2 +- 9 files changed, 12 insertions(+), 22 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index a4ae35ef34a9c..cf1ce09c74e27 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -480,7 +480,7 @@ def setup(self, dtype, method, application, ncols): if len(cols) == 1: cols = cols[0] - if application == "transform": + if application == "transformation": if method == "describe": raise NotImplementedError diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi index 6542b7a251644..da18084da92f9 100644 --- a/pandas/_libs/internals.pyi +++ b/pandas/_libs/internals.pyi @@ -51,7 +51,7 @@ class BlockPlacement: def __len__(self) -> int: ... def delete(self, loc) -> BlockPlacement: ... def append(self, others: list[BlockPlacement]) -> BlockPlacement: ... - def tile_for_unstack(self, factor: int) -> np.ndarray: ... + def tile_for_unstack(self, factor: int) -> npt.NDArray[np.intp]: ... class SharedBlock: _mgr_locs: BlockPlacement diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index f6c404c07c7e4..1331fc07386fb 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,24 +1,11 @@ - -from libc.stdlib cimport ( - free, - malloc, -) - import numpy as np cimport numpy as cnp -from numpy cimport ( - int64_t, - intp_t, - ndarray, -) cnp.import_array() from pandas._libs.util cimport is_array -from pandas._libs.lib import is_scalar - cdef cnp.dtype _dtype_obj = np.dtype("object") diff --git a/pandas/_libs/tslibs/strptime.pyi b/pandas/_libs/tslibs/strptime.pyi index cf7ae8508a45f..fd88bc6938294 100644 --- a/pandas/_libs/tslibs/strptime.pyi +++ b/pandas/_libs/tslibs/strptime.pyi @@ -9,4 +9,4 @@ def array_strptime( errors: str = "raise", ) -> tuple[np.ndarray, np.ndarray]: ... -# first ndarray is M8[ns], second is object ndarray of tzinfo | None +# first ndarray is M8[ns], second is object ndarray of tzinfo | None diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index c79dadcadc8cd..d02df6a65d359 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -617,7 +617,7 @@ def codes(self) -> np.ndarray: def group_arraylike(self) -> ArrayLike: """ Analogous to result_index, but holding an ArrayLike to ensure - we can can retain ExtensionDtypes. + we can retain ExtensionDtypes. """ if self._group_index is not None: # _group_index is set in __init__ for MultiIndex cases diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a1c411031a465..6887b919cc7d6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3060,7 +3060,7 @@ def _union(self, other: Index, sort): try: return self._outer_indexer(other)[0] except (TypeError, IncompatibleFrequency): - # incomparable objects + # incomparable objects; should only be for object dtype value_list = list(lvals) # worth making this faster? a very unusual case @@ -3074,7 +3074,7 @@ def _union(self, other: Index, sort): result = algos.union_with_duplicates(lvals, rvals) return _maybe_try_sort(result, sort) - # Self may have duplicates + # Self may have duplicates; other already checked as unique # find indexes of things in "other" that are not in "self" if self._index_as_unique: indexer = self.get_indexer(other) @@ -3089,6 +3089,7 @@ def _union(self, other: Index, sort): result = lvals if not self.is_monotonic or not other.is_monotonic: + # if both are monotonic then result should already be sorted result = _maybe_try_sort(result, sort) return result @@ -3194,6 +3195,7 @@ def _intersection(self, other: Index, sort=False): try: result = self._inner_indexer(other)[0] except TypeError: + # non-comparable; should only be for object dtype pass else: # TODO: algos.unique1d should preserve DTA/TDA @@ -4485,7 +4487,7 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: def _join_monotonic( self, other: Index, how: str_t = "left" ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: - # We only get here with matching dtypes + # We only get here with matching dtypes and both monotonic increasing assert other.dtype == self.dtype if self.equals(other): diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 86087dc321bac..583a22d09b110 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1035,7 +1035,7 @@ def quantile( def unstack(self, unstacker, fill_value) -> ArrayManager: """ - Return a BlockManager with all blocks unstacked.. + Return a BlockManager with all blocks unstacked. Parameters ---------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index da0f8d2549a8b..bcb4dd284465b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1291,6 +1291,7 @@ def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool): unstacker : reshape._Unstacker fill_value : int Only used in ExtensionBlock._unstack + new_placement : np.ndarray[np.intp] allow_fill : bool Returns diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 386a4ef12e6b5..bb4d1f96e1405 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1376,7 +1376,7 @@ def quantile( def unstack(self, unstacker, fill_value) -> BlockManager: """ - Return a BlockManager with all blocks unstacked.. + Return a BlockManager with all blocks unstacked. Parameters ---------- From 24ac00fc84365d1d66b93c8adb3a63cc8543cd6d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 15 Sep 2021 10:27:10 -0700 Subject: [PATCH 2/3] asv fixup --- asv_bench/benchmarks/groupby.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index cf1ce09c74e27..d1182fafa423a 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -454,6 +454,14 @@ def setup(self, dtype, method, application, ncols): # DataFrameGroupBy doesn't have these methods raise NotImplementedError + if ( + ncols != 1 + and application == "transformation" + and method in ["head", "tail", "unique", "value_counts", "size"] + ): + # DataFrameGroupBy doesn't have these methods + raise NotImplementedError + ngroups = 1000 size = ngroups * 2 rng = np.arange(ngroups).reshape(-1, 1) From 72485aae1caab82cbc6494e16be9e4428df40416 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 15 Sep 2021 14:41:10 -0700 Subject: [PATCH 3/3] asv fixup --- asv_bench/benchmarks/groupby.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index d1182fafa423a..e41f185e08443 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -454,11 +454,13 @@ def setup(self, dtype, method, application, ncols): # DataFrameGroupBy doesn't have these methods raise NotImplementedError - if ( - ncols != 1 - and application == "transformation" - and method in ["head", "tail", "unique", "value_counts", "size"] - ): + if application == "transformation" and method in [ + "head", + "tail", + "unique", + "value_counts", + "size", + ]: # DataFrameGroupBy doesn't have these methods raise NotImplementedError