pandas-dev
diff --git a/‎asv_bench/benchmarks/indexing.py
Lines changed: 9 additions & 0 deletions b/‎asv_bench/benchmarks/indexing.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎ci/build_docs.sh
Lines changed: 9 additions & 0 deletions b/‎ci/build_docs.sh
Lines changed: 9 additions & 0 deletions
diff --git a/‎doc/source/api.rst
Lines changed: 4 additions & 0 deletions b/‎doc/source/api.rst
Lines changed: 4 additions & 0 deletions
diff --git a/‎doc/source/groupby.rst
Lines changed: 57 additions & 6 deletions b/‎doc/source/groupby.rst
Lines changed: 57 additions & 6 deletions
diff --git a/‎doc/source/io.rst
Lines changed: 2 additions & 7 deletions b/‎doc/source/io.rst
Lines changed: 2 additions & 7 deletions
diff --git a/‎doc/source/reshaping.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/reshaping.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v0.20.0.txt
Lines changed: 0 additions & 1 deletion b/‎doc/source/whatsnew/v0.20.0.txt
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v0.20.2.txt
Lines changed: 24 additions & 2 deletions b/‎doc/source/whatsnew/v0.20.2.txt
Lines changed: 24 additions & 2 deletions
diff --git a/‎doc/source/whatsnew/v0.21.0.txt
Lines changed: 4 additions & 0 deletions b/‎doc/source/whatsnew/v0.21.0.txt
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/_libs/index.pyx
Lines changed: 2 additions & 2 deletions b/‎pandas/_libs/index.pyx
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/_libs/parsers.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/parsers.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/conftest.py
Lines changed: 10 additions & 0 deletions b/‎pandas/conftest.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎pandas/core/algorithms.py
Lines changed: 6 additions & 1 deletion b/‎pandas/core/algorithms.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎pandas/core/base.py
Lines changed: 10 additions & 2 deletions b/‎pandas/core/base.py
Lines changed: 10 additions & 2 deletions
diff --git a/‎pandas/core/categorical.py
Lines changed: 7 additions & 0 deletions b/‎pandas/core/categorical.py
Lines changed: 7 additions & 0 deletions
@@ -204,6 +204,12 @@ def setup(self):
             [np.arange(100), list('A'), list('A')],
             names=['one', 'two', 'three'])
 
+        rng = np.random.RandomState(4)
+        size = 1 << 16
+        self.mi_unused_levels = pd.MultiIndex.from_arrays([
+            rng.randint(0, 1 << 13, size),
+            rng.randint(0, 1 << 10, size)])[rng.rand(size) < 0.1]
+
     def time_series_xs_mi_ix(self):
         self.s.ix[999]
 
@@ -248,6 +254,9 @@ def time_multiindex_small_get_loc_warm(self):
     def time_is_monotonic(self):
         self.miint.is_monotonic
 
+    def time_remove_unused_levels(self):
+        self.mi_unused_levels.remove_unused_levels()
+
 
 class IntervalIndexing(object):
     goal_time = 0.2
 
@@ -59,6 +59,15 @@ if [ "$DOC" ]; then
     git remote -v
 
     git push origin gh-pages -f
+
+    echo "Running doctests"
+    cd "$TRAVIS_BUILD_DIR"
+    pytest --doctest-modules \
+           pandas/core/reshape/concat.py \
+           pandas/core/reshape/pivot.py \
+           pandas/core/reshape/reshape.py \
+           pandas/core/reshape/tile.py
+
 fi
 
 exit 0
@@ -724,6 +724,7 @@ Serialization / IO / Conversion
    Series.to_dense
    Series.to_string
    Series.to_clipboard
+   Series.to_latex
 
 Sparse
 ~~~~~~
@@ -1285,6 +1286,8 @@ Attributes
    Index.is_monotonic
    Index.is_monotonic_increasing
    Index.is_monotonic_decreasing
+   Index.is_strictly_monotonic_increasing
+   Index.is_strictly_monotonic_decreasing
    Index.is_unique
    Index.has_duplicates
    Index.dtype
@@ -1704,6 +1707,7 @@ Computations / Descriptive Stats
    GroupBy.mean
    GroupBy.median
    GroupBy.min
+   GroupBy.ngroup
    GroupBy.nth
    GroupBy.ohlc
    GroupBy.prod
 
@@ -1122,12 +1122,36 @@ To see the order in which each row appears within its group, use the
 
 .. ipython:: python
 
-   df = pd.DataFrame(list('aaabba'), columns=['A'])
-   df
+   dfg = pd.DataFrame(list('aaabba'), columns=['A'])
+   dfg
+
+   dfg.groupby('A').cumcount()
+
+   dfg.groupby('A').cumcount(ascending=False)
+
+.. _groupby.ngroup:
+
+Enumerate groups
+~~~~~~~~~~~~~~~~
+
+.. versionadded:: 0.20.2
+
+To see the ordering of the groups (as opposed to the order of rows
+within a group given by ``cumcount``) you can use the ``ngroup``
+method.
+
+Note that the numbers given to the groups match the order in which the
+groups would be seen when iterating over the groupby object, not the
+order they are first observed.
+
+.. ipython:: python
 
-   df.groupby('A').cumcount()
+   dfg = pd.DataFrame(list('aaabba'), columns=['A'])
+   dfg
 
-   df.groupby('A').cumcount(ascending=False)  # kwarg only
+   dfg.groupby('A').ngroup()
+
+   dfg.groupby('A').ngroup(ascending=False)
 
 Plotting
 ~~~~~~~~
@@ -1176,14 +1200,41 @@ Regroup columns of a DataFrame according to their sum, and sum the aggregated on
    df
    df.groupby(df.sum(), axis=1).sum()
 
+.. _groupby.multicolumn_factorization
+
+Multi-column factorization
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By using ``.ngroup()``, we can extract information about the groups in
+a way similar to :func:`factorize` (as described further in the
+:ref:`reshaping API <reshaping.factorization>`) but which applies
+naturally to multiple columns of mixed type and different
+sources. This can be useful as an intermediate categorical-like step
+in processing, when the relationships between the group rows are more
+important than their content, or as input to an algorithm which only
+accepts the integer encoding. (For more information about support in
+pandas for full categorical data, see the :ref:`Categorical
+introduction <categorical>` and the
+:ref:`API documentation <api.categorical>`.)
+
+.. ipython:: python
+
+    dfg = pd.DataFrame({"A": [1, 1, 2, 3, 2], "B": list("aaaba")})
+
+    dfg
+
+    dfg.groupby(["A", "B"]).ngroup()
+
+    dfg.groupby(["A", [0, 0, 0, 1, 1]]).ngroup()
+
 Groupby by Indexer to 'resample' data
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Resampling produces new hypothetical samples(resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples.
+Resampling produces new hypothetical samples (resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples.
 
 In order to resample to work on indices that are non-datetimelike , the following procedure can be utilized.
 
-In the following examples, **df.index // 5** returns a binary array which is used to determine what get's selected for the groupby operation.
+In the following examples, **df.index // 5** returns a binary array which is used to determine what gets selected for the groupby operation.
 
 .. note:: The below example shows how we can downsample by consolidation of samples into fewer samples. Here by using **df.index // 5**, we are aggregating the samples in bins. By applying **std()** function, we aggregate the information contained in many samples into a small subset of values which is their standard deviation thereby reducing the number of samples.
 
 
@@ -226,8 +226,8 @@ NA and Missing Data Handling
 na_values : scalar, str, list-like, or dict, default ``None``
   Additional strings to recognize as NA/NaN. If dict passed, specific per-column
   NA values. By default the following values are interpreted as NaN:
-  ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'NA',
-  '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', ''``.
+  ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a', 'NA',
+  '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''``.
 keep_default_na : boolean, default ``True``
   If na_values are specified and keep_default_na is ``False`` the default NaN
   values are overridden, otherwise they're appended to.
@@ -2739,11 +2739,6 @@ should be passed to ``index_col`` and ``header``
    import os
    os.remove('path_to_file.xlsx')
 
-.. warning::
-
-   Excel files saved in version 0.16.2 or prior that had index names will still able to be read in,
-   but the ``has_index_names`` argument must specified to ``True``.
-
 
 Parsing Specific Columns
 ++++++++++++++++++++++++
 
@@ -636,7 +636,7 @@ When a column contains only one level, it will be omitted in the result.
 
     pd.get_dummies(df, drop_first=True)
 
-
+.. _reshaping.factorize:
 
 Factorizing values
 ------------------
 
@@ -515,7 +515,6 @@ Other Enhancements
 - Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here <basics.accelerate>` (:issue:`16157`)
 - ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'|'mid'|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`)
 
-
 .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
 
 
 
@@ -20,6 +20,13 @@ Enhancements
 ~~~~~~~~~~~~
 
 - Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`)
+- ``Series`` provides a ``to_latex`` method (:issue:`16180`)
+- Added :attr:`Index.is_strictly_monotonic_increasing` and :attr:`Index.is_strictly_monotonic_decreasing` properties (:issue:`16515`)
+
+- A new groupby method :meth:`~pandas.core.groupby.GroupBy.ngroup`,
+  parallel to the existing :meth:`~pandas.core.groupby.GroupBy.cumcount`,
+  has been added to return the group order (:issue:`11642`); see
+  :ref:`here <groupby.ngroup>`.
 
 .. _whatsnew_0202.performance:
 
@@ -30,15 +37,23 @@ Performance Improvements
 - Performance regression fix for MultiIndexes (:issue:`16319`, :issue:`16346`)
 - Improved performance of ``.clip()`` with scalar arguments (:issue:`15400`)
 - Improved performance of groupby with categorical groupers (:issue:`16413`)
+- Improved performance of ``MultiIndex.remove_unused_levels()`` (:issue:`16556`)
 
 .. _whatsnew_0202.bug_fixes:
 
 Bug Fixes
 ~~~~~~~~~
 
+- Silenced a warning on some Windows environments about "tput: terminal attributes: No such device or address" when
+  detecting the terminal size. This fix only applies to python 3 (:issue:`16496`)
 - Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`)
+- Bug in ``Index.symmetric_difference()`` on two equal MultiIndex's, results in a TypeError (:issue `13490`)
 - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`)
-
+- Passing an invalid engine to :func:`read_csv` now raises an informative
+  ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`)
+- Bug in :func:`unique` on an array of tuples (:issue:`16519`)
+- Bug in :func:`cut`` when ``labels`` are set, resulting in incorrect label ordering (:issue:`16459`)
+- Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`)
 
 Conversion
 ^^^^^^^^^^
@@ -51,15 +66,19 @@ Indexing
 ^^^^^^^^
 
 - Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`)
-
+- Bug in partial string indexing with a monotonic, but not strictly-monotonic, index incorrectly reversing the slice bounds (:issue:`16515`)
+- Bug in ``MultiIndex.remove_unused_levels()`` (:issue:`16556`)
 
 I/O
 ^^^
 
 - Bug in pd.read_csv() when comment is passed in space deliminted text files (:issue:`16472`)
 - Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`)
 - Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`)
+- Bug in ``pd.read_csv()`` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`)
+- Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`)
 
+- Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`)
 
 Plotting
 ^^^^^^^^
@@ -75,6 +94,8 @@ Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 - Bug creating datetime rolling window on an empty DataFrame (:issue:`15819`)
+- Bug in ``rolling.cov()`` with offset window (:issue:`16058`)
+- Bug in ``.resample()`` and ``.groupby()`` when aggregating on integers (:issue:`16361`)
 
 
 Sparse
@@ -89,6 +110,7 @@ Reshaping
 - Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`)
 - Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`)
 - Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`)
+- Bug in ``DataFrame.agg()`` and ``Series.agg()`` with aggregating on non-callable attributes (:issue:`16405`)
 
 
 Numeric
 
@@ -37,6 +37,7 @@ Other Enhancements
 - :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`)
 - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
 - :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`)
+- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`)
 
 .. _whatsnew_0210.api_breaking:
 
@@ -48,6 +49,8 @@ Backwards incompatible API changes
 
 - Accessing a non-existent attribute on a closed :class:`HDFStore` will now
   raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`)
+- :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`)
+- :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`)
 
 .. _whatsnew_0210.api:
 
@@ -69,6 +72,7 @@ Deprecations
 Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+- ``pd.read_excel()`` has dropped the ``has_index_names`` parameter (:issue:`10967`)
 
 
 .. _whatsnew_0210.performance:
 
@@ -152,7 +152,7 @@ cdef class IndexEngine:
 
         try:
             return self.mapping.get_item(val)
-        except TypeError:
+        except (TypeError, ValueError):
             raise KeyError(val)
 
     cdef inline _get_loc_duplicates(self, object val):
@@ -470,7 +470,7 @@ cdef class DatetimeEngine(Int64Engine):
         try:
             val = _to_i8(val)
             return self.mapping.get_item(val)
-        except TypeError:
+        except (TypeError, ValueError):
             self._date_check_type(val)
             raise KeyError(val)
 
 
@@ -277,7 +277,7 @@ DEFAULT_CHUNKSIZE = 256 * 1024
 # no longer excluding inf representations
 # '1.#INF','-1.#INF', '1.#INF000000',
 _NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN',
-              b'#N/A N/A', b'NA', b'#NA', b'NULL', b'NaN',
+              b'#N/A N/A', b'n/a', b'NA', b'#NA', b'NULL', b'null', b'NaN',
               b'nan', b'']
 
 
 
@@ -45,3 +45,13 @@ def spmatrix(request):
     tm._skip_if_no_scipy()
     from scipy import sparse
     return getattr(sparse, request.param + '_matrix')
+
+
+@pytest.fixture
+def ip():
+    """An instance of IPython.InteractiveShell.
+    Will raise a skip if IPython is not installed.
+    """
+    pytest.importorskip('IPython', minversion="6.0.0")
+    from IPython.core.interactiveshell import InteractiveShell
+    return InteractiveShell()
@@ -163,7 +163,7 @@ def _ensure_arraylike(values):
                                ABCIndexClass, ABCSeries)):
         inferred = lib.infer_dtype(values)
         if inferred in ['mixed', 'string', 'unicode']:
-            values = np.asarray(values, dtype=object)
+            values = lib.list_to_object_array(values)
         else:
             values = np.asarray(values)
     return values
@@ -328,6 +328,11 @@ def unique(values):
     [b, a, c]
     Categories (3, object): [a < b < c]
 
+    An array of tuples
+
+    >>> pd.unique([('a', 'b'), ('b', 'a'), ('a', 'c'), ('b', 'a')])
+    array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
+
     See Also
     --------
     pandas.Index.unique
 
@@ -378,7 +378,7 @@ def aggregate(self, func, *args, **kwargs):
     def _try_aggregate_string_function(self, arg, *args, **kwargs):
         """
         if arg is a string, then try to operate on it:
-        - try to find a function on ourselves
+        - try to find a function (or attribute) on ourselves
         - try to find a numpy function
         - raise
 
@@ -387,7 +387,15 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs):
 
         f = getattr(self, arg, None)
         if f is not None:
-            return f(*args, **kwargs)
+            if callable(f):
+                return f(*args, **kwargs)
+
+            # people may try to aggregate on a non-callable attribute
+            # but don't let them think they can pass args to it
+            assert len(args) == 0
+            assert len([kwarg for kwarg in kwargs
+                        if kwarg not in ['axis', '_level']]) == 0
+            return f
 
         f = getattr(np, arg, None)
         if f is not None:
 
@@ -342,6 +342,13 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
         self._categories = categories
         self._codes = coerce_indexer_dtype(codes, categories)
 
+    def __dir__(self):
+        # Avoid IPython warnings for deprecated properties
+        # https://github.com/pandas-dev/pandas/issues/16409
+        rv = set(dir(type(self)))
+        rv.discard("labels")
+        return sorted(rv)
+
     @property
     def _constructor(self):
         return Categorical