pandas-dev
diff --git a/‎.pep8speaks.yml
Lines changed: 1 addition & 0 deletions b/‎.pep8speaks.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎asv_bench/benchmarks/period.py
Lines changed: 3 additions & 0 deletions b/‎asv_bench/benchmarks/period.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎doc/source/api.rst
Lines changed: 9 additions & 0 deletions b/‎doc/source/api.rst
Lines changed: 9 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v0.24.0.txt
Lines changed: 27 additions & 1 deletion b/‎doc/source/whatsnew/v0.24.0.txt
Lines changed: 27 additions & 1 deletion
diff --git a/‎pandas/conftest.py
Lines changed: 24 additions & 0 deletions b/‎pandas/conftest.py
Lines changed: 24 additions & 0 deletions
diff --git a/‎pandas/core/algorithms.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/algorithms.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/core/arrays/base.py
Lines changed: 36 additions & 0 deletions b/‎pandas/core/arrays/base.py
Lines changed: 36 additions & 0 deletions
diff --git a/‎pandas/core/arrays/categorical.py
Lines changed: 2 additions & 4 deletions b/‎pandas/core/arrays/categorical.py
Lines changed: 2 additions & 4 deletions
diff --git a/‎pandas/core/arrays/datetimelike.py
Lines changed: 6 additions & 16 deletions b/‎pandas/core/arrays/datetimelike.py
Lines changed: 6 additions & 16 deletions
diff --git a/‎pandas/core/arrays/integer.py
Lines changed: 26 additions & 0 deletions b/‎pandas/core/arrays/integer.py
Lines changed: 26 additions & 0 deletions
diff --git a/‎pandas/core/base.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/base.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 4 additions & 0 deletions b/‎pandas/core/frame.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/core/generic.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/generic.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/core/groupby/generic.py
Lines changed: 3 additions & 2 deletions b/‎pandas/core/groupby/generic.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎pandas/core/groupby/groupby.py
Lines changed: 8 additions & 6 deletions b/‎pandas/core/groupby/groupby.py
Lines changed: 8 additions & 6 deletions
@@ -11,6 +11,7 @@ pycodestyle:
     max-line-length: 79
     ignore:
         - W503,  # line break before binary operator
+        - W504,  # line break after binary operator
         - E402,  # module level import not at top of file
         - E722,  # do not use bare except
         - E731,  # do not assign a lambda expression, use a def
 
@@ -119,3 +119,6 @@ def time_align(self):
 
     def time_intersection(self):
         self.index[:750].intersection(self.index[250:])
+
+    def time_unique(self):
+        self.index.unique()
@@ -245,6 +245,15 @@ Top-level evaluation
 
    eval
 
+Hashing
+~~~~~~~
+
+.. autosummary::
+   :toctree: generated/
+
+   util.hash_array
+   util.hash_pandas_object
+
 Testing
 ~~~~~~~
 
 
@@ -48,7 +48,7 @@ Pandas has gained the ability to hold integer dtypes with missing values. This l
 Here is an example of the usage.
 
 We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying a list or array using the traditional missing value
-marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`, :issue:`22441`)
+marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`, :issue:`22441`, :issue:`21789`, :issue:`22346`)
 
 .. ipython:: python
 
@@ -91,6 +91,13 @@ These dtypes can be merged & reshaped & casted.
    pd.concat([df[['A']], df[['B', 'C']]], axis=1).dtypes
    df['A'].astype(float)
 
+Reduction and groupby operations such as 'sum' work.
+
+.. ipython:: python
+
+   df.sum()
+   df.groupby('B').A.sum()
+
 .. warning::
 
    The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date.
@@ -397,6 +404,22 @@ is the case with :attr:`Period.end_time`, for example
 
    p.end_time
 
+.. _whatsnew_0240.api_breaking.frame_to_dict_index_orient:
+
+Raise ValueError in ``DataFrame.to_dict(orient='index')``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with 
+``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`)
+
+.. ipython:: python
+    :okexcept:
+
+    df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A'])
+    df
+    
+    df.to_dict(orient='index')
+
 .. _whatsnew_0240.api.datetimelike.normalize:
 
 Tick DateOffset Normalize Restrictions
@@ -572,7 +595,9 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`).
 - Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
 - Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`)
+- Series backed by an ``ExtensionArray`` now work with :func:`util.hash_pandas_object` (:issue:`23066`)
 - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`)
+- Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`)
 
 .. _whatsnew_0240.api.incompatibilities:
 
@@ -708,6 +733,7 @@ Performance Improvements
   (:issue:`21372`)
 - Improved the performance of :func:`pandas.get_dummies` with ``sparse=True`` (:issue:`21997`)
 - Improved performance of :func:`IndexEngine.get_indexer_non_unique` for sorted, non-unique indexes (:issue:`9466`)
+- Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`)
 
 
 .. _whatsnew_0240.docs:
 
@@ -131,6 +131,30 @@ def all_arithmetic_operators(request):
     return request.param
 
 
+_all_numeric_reductions = ['sum', 'max', 'min',
+                           'mean', 'prod', 'std', 'var', 'median',
+                           'kurt', 'skew']
+
+
+@pytest.fixture(params=_all_numeric_reductions)
+def all_numeric_reductions(request):
+    """
+    Fixture for numeric reduction names
+    """
+    return request.param
+
+
+_all_boolean_reductions = ['all', 'any']
+
+
+@pytest.fixture(params=_all_boolean_reductions)
+def all_boolean_reductions(request):
+    """
+    Fixture for boolean reduction names
+    """
+    return request.param
+
+
 _cython_table = pd.core.base.SelectionMixin._cython_table.items()
 
 
 
@@ -274,8 +274,8 @@ def match(to_match, values, na_sentinel=-1):
         # replace but return a numpy array
         # use a Series because it handles dtype conversions properly
         from pandas import Series
-        result = Series(result.ravel()).replace(-1, na_sentinel).values.\
-            reshape(result.shape)
+        result = Series(result.ravel()).replace(-1, na_sentinel)
+        result = result.values.reshape(result.shape)
 
     return result
 
 
@@ -63,6 +63,10 @@ class ExtensionArray(object):
     as they only compose abstract methods. Still, a more efficient
     implementation may be available, and these methods can be overridden.
 
+    One can implement methods to handle array reductions.
+
+    * _reduce
+
     This class does not inherit from 'abc.ABCMeta' for performance reasons.
     Methods and properties required by the interface raise
     ``pandas.errors.AbstractMethodError`` and no ``register`` method is
@@ -466,6 +470,11 @@ def _values_for_factorize(self):
             as NA in the factorization routines, so it will be coded as
             `na_sentinal` and not included in `uniques`. By default,
             ``np.nan`` is used.
+
+        Notes
+        -----
+        The values returned by this method are also used in
+        :func:`pandas.util.hash_pandas_object`.
         """
         return self.astype(object), np.nan
 
@@ -670,6 +679,33 @@ def _ndarray_values(self):
         """
         return np.array(self)
 
+    def _reduce(self, name, skipna=True, **kwargs):
+        """
+        Return a scalar result of performing the reduction operation.
+
+        Parameters
+        ----------
+        name : str
+            Name of the function, supported values are:
+            { any, all, min, max, sum, mean, median, prod,
+            std, var, sem, kurt, skew }.
+        skipna : bool, default True
+            If True, skip NaN values.
+        **kwargs
+            Additional keyword arguments passed to the reduction function.
+            Currently, `ddof` is the only supported kwarg.
+
+        Returns
+        -------
+        scalar
+
+        Raises
+        ------
+        TypeError : subclass does not define reductions
+        """
+        raise TypeError("cannot perform {name} with type {dtype}".format(
+            name=name, dtype=self.dtype))
+
 
 class ExtensionOpsMixin(object):
     """
 
@@ -2069,14 +2069,12 @@ def _reverse_indexer(self):
         return result
 
     # reduction ops #
-    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
-                filter_type=None, **kwds):
-        """ perform the reduction type operation """
+    def _reduce(self, name, axis=0, skipna=True, **kwargs):
         func = getattr(self, name, None)
         if func is None:
             msg = 'Categorical cannot perform the operation {op}'
             raise TypeError(msg.format(op=name))
-        return func(numeric_only=numeric_only, **kwds)
+        return func(**kwargs)
 
     def min(self, numeric_only=None, **kwargs):
         """ The minimum value of the object.
 
@@ -41,7 +41,7 @@
 from pandas.util._decorators import deprecate_kwarg
 
 
-def _make_comparison_op(op, cls):
+def _make_comparison_op(cls, op):
     # TODO: share code with indexes.base version?  Main difference is that
     # the block for MultiIndex was removed here.
     def cmp_method(self, other):
@@ -740,6 +740,9 @@ def __isub__(self, other):
     # --------------------------------------------------------------
     # Comparison Methods
 
+    # Called by _add_comparison_methods defined in ExtensionOpsMixin
+    _create_comparison_method = classmethod(_make_comparison_op)
+
     def _evaluate_compare(self, other, op):
         """
         We have been called because a comparison between
@@ -773,21 +776,8 @@ def _evaluate_compare(self, other, op):
         result[mask] = filler
         return result
 
-    # TODO: get this from ExtensionOpsMixin
-    @classmethod
-    def _add_comparison_methods(cls):
-        """ add in comparison methods """
-        # DatetimeArray and TimedeltaArray comparison methods will
-        # call these as their super(...) methods
-        cls.__eq__ = _make_comparison_op(operator.eq, cls)
-        cls.__ne__ = _make_comparison_op(operator.ne, cls)
-        cls.__lt__ = _make_comparison_op(operator.lt, cls)
-        cls.__gt__ = _make_comparison_op(operator.gt, cls)
-        cls.__le__ = _make_comparison_op(operator.le, cls)
-        cls.__ge__ = _make_comparison_op(operator.ge, cls)
-
-
-DatetimeLikeArrayMixin._add_comparison_methods()
+
+DatetimeLikeArrayMixin._add_comparison_ops()
 
 
 # -------------------------------------------------------------------
 
@@ -8,6 +8,7 @@
 from pandas.compat import u, range, string_types
 from pandas.compat import set_function_name
 
+from pandas.core import nanops
 from pandas.core.dtypes.cast import astype_nansafe
 from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
 from pandas.core.dtypes.common import (
@@ -529,6 +530,31 @@ def cmp_method(self, other):
         name = '__{name}__'.format(name=op.__name__)
         return set_function_name(cmp_method, name, cls)
 
+    def _reduce(self, name, skipna=True, **kwargs):
+        data = self._data
+        mask = self._mask
+
+        # coerce to a nan-aware float if needed
+        if mask.any():
+            data = self._data.astype('float64')
+            data[mask] = self._na_value
+
+        op = getattr(nanops, 'nan' + name)
+        result = op(data, axis=0, skipna=skipna, mask=mask)
+
+        # if we have a boolean op, don't coerce
+        if name in ['any', 'all']:
+            pass
+
+        # if we have a preservable numeric op,
+        # provide coercion back to an integer type if possible
+        elif name in ['sum', 'min', 'max', 'prod'] and notna(result):
+            int_result = int(result)
+            if int_result == result:
+                result = int_result
+
+        return result
+
     def _maybe_mask_result(self, result, mask, other, op_name):
         """
         Parameters
 
@@ -395,8 +395,8 @@ def nested_renaming_depr(level=4):
 
                     elif isinstance(obj, ABCSeries):
                         nested_renaming_depr()
-                    elif isinstance(obj, ABCDataFrame) and \
-                            k not in obj.columns:
+                    elif (isinstance(obj, ABCDataFrame) and
+                          k not in obj.columns):
                         raise KeyError(
                             "Column '{col}' does not exist!".format(col=k))
 
 
@@ -1224,6 +1224,10 @@ def to_dict(self, orient='dict', into=dict):
                            for k, v in zip(self.columns, np.atleast_1d(row)))
                     for row in self.values]
         elif orient.lower().startswith('i'):
+            if not self.index.is_unique:
+                raise ValueError(
+                    "DataFrame index must be unique for orient='index'."
+                )
             return into_c((t[0], dict(zip(self.columns, t[1:])))
                           for t in self.itertuples())
         else:
 
@@ -5651,8 +5651,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
                 # fill in 2d chunks
                 result = {col: s.fillna(method=method, value=value)
                           for col, s in self.iteritems()}
-                new_obj = self._constructor.\
-                    from_dict(result).__finalize__(self)
+                prelim_obj = self._constructor.from_dict(result)
+                new_obj = prelim_obj.__finalize__(self)
                 new_data = new_obj._data
 
             else:
 
@@ -1027,8 +1027,9 @@ def nunique(self, dropna=True):
         try:
             sorter = np.lexsort((val, ids))
         except TypeError:  # catches object dtypes
-            assert val.dtype == object, \
-                'val.dtype must be object, got %s' % val.dtype
+            msg = ('val.dtype must be object, got {dtype}'
+                   .format(dtype=val.dtype))
+            assert val.dtype == object, msg
             val, _ = algorithms.factorize(val, sort=False)
             sorter = np.lexsort((val, ids))
             _isna = lambda a: a == -1
 
@@ -578,8 +578,8 @@ def wrapper(*args, **kwargs):
             # a little trickery for aggregation functions that need an axis
             # argument
             kwargs_with_axis = kwargs.copy()
-            if 'axis' not in kwargs_with_axis or \
-               kwargs_with_axis['axis'] is None:
+            if ('axis' not in kwargs_with_axis or
+                    kwargs_with_axis['axis'] is None):
                 kwargs_with_axis['axis'] = self.axis
 
             def curried_with_axis(x):
@@ -1490,8 +1490,10 @@ def nth(self, n, dropna=None):
         self._set_group_selection()
 
         if not dropna:
-            mask = np.in1d(self._cumcount_array(), nth_values) | \
-                np.in1d(self._cumcount_array(ascending=False) + 1, -nth_values)
+            mask_left = np.in1d(self._cumcount_array(), nth_values)
+            mask_right = np.in1d(self._cumcount_array(ascending=False) + 1,
+                                 -nth_values)
+            mask = mask_left | mask_right
 
             out = self._selected_obj[mask]
             if not self.as_index:
@@ -1552,8 +1554,8 @@ def nth(self, n, dropna=None):
             result.loc[mask] = np.nan
 
         # reset/reindex to the original groups
-        if len(self.obj) == len(dropped) or \
-           len(result) == len(self.grouper.result_index):
+        if (len(self.obj) == len(dropped) or
+                len(result) == len(self.grouper.result_index)):
             result.index = self.grouper.result_index
         else:
             result = result.reindex(self.grouper.result_index)