pandas-dev
diff --git a/‎doc/source/conf.py
Lines changed: 3 additions & 3 deletions b/‎doc/source/conf.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/cookbook.rst
Lines changed: 11 additions & 0 deletions b/‎doc/source/cookbook.rst
Lines changed: 11 additions & 0 deletions
diff --git a/‎doc/source/groupby.rst
Lines changed: 10 additions & 0 deletions b/‎doc/source/groupby.rst
Lines changed: 10 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v0.24.0.txt
Lines changed: 2 additions & 3 deletions b/‎doc/source/whatsnew/v0.24.0.txt
Lines changed: 2 additions & 3 deletions
diff --git a/‎pandas/_libs/algos.pyx
Lines changed: 3 additions & 1 deletion b/‎pandas/_libs/algos.pyx
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/_libs/groupby.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/groupby.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/groupby_helper.pxi.in
Lines changed: 0 additions & 20 deletions b/‎pandas/_libs/groupby_helper.pxi.in
Lines changed: 0 additions & 20 deletions
diff --git a/‎pandas/_libs/hashtable_class_helper.pxi.in
Lines changed: 13 additions & 13 deletions b/‎pandas/_libs/hashtable_class_helper.pxi.in
Lines changed: 13 additions & 13 deletions
diff --git a/‎pandas/_libs/hashtable_func_helper.pxi.in
Lines changed: 11 additions & 11 deletions b/‎pandas/_libs/hashtable_func_helper.pxi.in
Lines changed: 11 additions & 11 deletions
diff --git a/‎pandas/_libs/join.pyx
Lines changed: 2 additions & 2 deletions b/‎pandas/_libs/join.pyx
Lines changed: 2 additions & 2 deletions
@@ -99,7 +99,7 @@
 # JP: added from sphinxdocs
 autosummary_generate = False
 
-if any(re.match("\s*api\s*", l) for l in index_rst_lines):
+if any(re.match(r"\s*api\s*", l) for l in index_rst_lines):
     autosummary_generate = True
 
 # numpydoc
@@ -341,8 +341,8 @@
 # file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
     ('index', 'pandas.tex',
-     u'pandas: powerful Python data analysis toolkit',
-     u'Wes McKinney\n\& PyData Development Team', 'manual'),
+     'pandas: powerful Python data analysis toolkit',
+     r'Wes McKinney\n\& PyData Development Team', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
 
@@ -1226,6 +1226,17 @@ Computation
 Correlation
 ***********
 
+Often it's useful to obtain the lower (or upper) triangular form of a correlation matrix calculated from :func:`DataFrame.corr`.  This can be achieved by passing a boolean mask to ``where`` as follows:
+
+.. ipython:: python
+
+    df = pd.DataFrame(np.random.random(size=(100, 5)))
+
+    corr_mat = df.corr()
+    mask = np.tril(np.ones_like(corr_mat, dtype=np.bool), k=-1)
+
+    corr_mat.where(mask)
+
 The `method` argument within `DataFrame.corr` can accept a callable in addition to the named correlation types.  Here we compute the `distance correlation <https://en.wikipedia.org/wiki/Distance_correlation>`__ matrix for a `DataFrame` object.
 
 .. code-block:: python
 
@@ -125,6 +125,16 @@ We could naturally group by either the ``A`` or ``B`` columns, or both:
    grouped = df.groupby('A')
    grouped = df.groupby(['A', 'B'])
 
+.. versionadded:: 0.24
+
+If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all
+but the specified columns
+
+.. ipython:: python
+
+   df2 = df.set_index(['A', 'B'])
+   grouped = df2.groupby(level=df2.index.names.difference(['B'])
+
 These will split the DataFrame on its index (rows). We could also split by the
 columns:
 
 
@@ -13,10 +13,9 @@ v0.24.0 (Month XX, 2018)
 New features
 ~~~~~~~~~~~~
 - :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`)
-
-
 - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
-
+- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods. This functionality greatly simplifies groupby's that rely on explicitly excluding certain columns. See :ref:`Splitting an object into groups
+<groupby.split>` for more information (:issue:`15475`, :issue:`15506`)
 - :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing
 the user to override the engine's default behavior to include or omit the
 dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
 
@@ -77,6 +77,8 @@ class NegInfinity(object):
     __ge__ = lambda self, other: isinstance(other, NegInfinity)
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
 cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
     """
     Efficiently find the unique first-differences of the given array.
@@ -793,7 +795,7 @@ arrmap_bool = arrmap["uint8_t"]
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def is_monotonic(ndarray[algos_t] arr, bint timelike):
+def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
     """
     Returns
     -------
 
@@ -353,7 +353,7 @@ def group_any_all(ndarray[uint8_t] out,
     The returned values will either be 0 or 1 (False or True, respectively).
     """
     cdef:
-        Py_ssize_t i, N=len(labels)
+        Py_ssize_t i, N = len(labels)
         int64_t lab
         uint8_t flag_val
 
 
@@ -667,11 +667,6 @@ def group_max(ndarray[groupby_t, ndim=2] out,
                     out[i, j] = maxx[i, j]
 
 
-group_max_float64 = group_max["float64_t"]
-group_max_float32 = group_max["float32_t"]
-group_max_int64 = group_max["int64_t"]
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def group_min(ndarray[groupby_t, ndim=2] out,
@@ -734,11 +729,6 @@ def group_min(ndarray[groupby_t, ndim=2] out,
                     out[i, j] = minx[i, j]
 
 
-group_min_float64 = group_min["float64_t"]
-group_min_float32 = group_min["float32_t"]
-group_min_int64 = group_min["int64_t"]
-
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_cummin(ndarray[groupby_t, ndim=2] out,
@@ -787,11 +777,6 @@ def group_cummin(ndarray[groupby_t, ndim=2] out,
                         out[i, j] = mval
 
 
-group_cummin_float64 = group_cummin["float64_t"]
-group_cummin_float32 = group_cummin["float32_t"]
-group_cummin_int64 = group_cummin["int64_t"]
-
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_cummax(ndarray[groupby_t, ndim=2] out,
@@ -837,8 +822,3 @@ def group_cummax(ndarray[groupby_t, ndim=2] out,
                         if val > mval:
                             accum[lab, j] = mval = val
                         out[i, j] = mval
-
-
-group_cummax_float64 = group_cummax["float64_t"]
-group_cummax_float32 = group_cummax["float32_t"]
-group_cummax_int64 = group_cummax["int64_t"]
@@ -86,12 +86,12 @@ cdef class {{name}}Vector:
         self.data.n = 0
         self.data.m = _INIT_VEC_CAP
         self.ao = np.empty(self.data.m, dtype={{idtype}})
-        self.data.data = <{{arg}}*> self.ao.data
+        self.data.data = <{{arg}}*>self.ao.data
 
     cdef resize(self):
         self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
         self.ao.resize(self.data.m, refcheck=False)
-        self.data.data = <{{arg}}*> self.ao.data
+        self.data.data = <{{arg}}*>self.ao.data
 
     def __dealloc__(self):
         if self.data is not NULL:
@@ -140,7 +140,7 @@ cdef class StringVector:
         self.external_view_exists = False
         self.data.n = 0
         self.data.m = _INIT_VEC_CAP
-        self.data.data = <char **> malloc(self.data.m * sizeof(char *))
+        self.data.data = <char **>malloc(self.data.m * sizeof(char *))
         if not self.data.data:
             raise MemoryError()
 
@@ -153,7 +153,7 @@ cdef class StringVector:
         self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
 
         orig_data = self.data.data
-        self.data.data = <char **> malloc(self.data.m * sizeof(char *))
+        self.data.data = <char **>malloc(self.data.m * sizeof(char *))
         if not self.data.data:
             raise MemoryError()
         for i in range(m):
@@ -208,22 +208,22 @@ cdef class ObjectVector:
         self.n = 0
         self.m = _INIT_VEC_CAP
         self.ao = np.empty(_INIT_VEC_CAP, dtype=object)
-        self.data = <PyObject**> self.ao.data
+        self.data = <PyObject**>self.ao.data
 
     def __len__(self):
         return self.n
 
-    cdef inline append(self, object o):
+    cdef inline append(self, object obj):
         if self.n == self.m:
             if self.external_view_exists:
                 raise ValueError("external reference but "
                                  "Vector.resize() needed")
             self.m = max(self.m * 2, _INIT_VEC_CAP)
             self.ao.resize(self.m, refcheck=False)
-            self.data = <PyObject**> self.ao.data
+            self.data = <PyObject**>self.ao.data
 
-        Py_INCREF(o)
-        self.data[self.n] = <PyObject*> o
+        Py_INCREF(obj)
+        self.data[self.n] = <PyObject*>obj
         self.n += 1
 
     def to_array(self):
@@ -768,7 +768,7 @@ cdef class StringHashTable(HashTable):
         use_na_value = na_value is not None
 
         # assign pointers and pre-filter out missing
-        vecs = <const char **> malloc(n * sizeof(char *))
+        vecs = <const char **>malloc(n * sizeof(char *))
         for i in range(n):
             val = values[i]
 
@@ -844,9 +844,9 @@ cdef class PyObjectHashTable(HashTable):
 
     def sizeof(self, deep=False):
         """ return the size of my table in bytes """
-        return self.table.n_buckets * (sizeof(PyObject *) + # keys
-                                       sizeof(Py_ssize_t) + # vals
-                                       sizeof(uint32_t)) # flags
+        return self.table.n_buckets * (sizeof(PyObject *) +  # keys
+                                       sizeof(Py_ssize_t) +  # vals
+                                       sizeof(uint32_t))  # flags
 
     cpdef get_item(self, object val):
         cdef khiter_t k
 
@@ -45,11 +45,11 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,
         val = values[i]
 
         if not checknull(val) or not dropna:
-            k = kh_get_{{ttype}}(table, <PyObject*> val)
+            k = kh_get_{{ttype}}(table, <PyObject*>val)
             if k != table.n_buckets:
                 table.vals[k] += 1
             else:
-                k = kh_put_{{ttype}}(table, <PyObject*> val, &ret)
+                k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
                 table.vals[k] = 1
     {{else}}
     with nogil:
@@ -103,7 +103,7 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
     {{if dtype == 'object'}}
     for k in range(table.n_buckets):
         if kh_exist_{{ttype}}(table, k):
-            result_keys[i] = <{{dtype}}> table.keys[k]
+            result_keys[i] = <{{dtype}}>table.keys[k]
             result_counts[i] = table.vals[k]
             i += 1
     {{else}}
@@ -152,7 +152,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
     if keep == 'last':
         {{if dtype == 'object'}}
         for i from n > i >= 0:
-            kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
+            kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
             out[i] = ret == 0
         {{else}}
         with nogil:
@@ -163,7 +163,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
     elif keep == 'first':
         {{if dtype == 'object'}}
         for i in range(n):
-            kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
+            kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
             out[i] = ret == 0
         {{else}}
         with nogil:
@@ -175,13 +175,13 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
         {{if dtype == 'object'}}
         for i in range(n):
             value = values[i]
-            k = kh_get_{{ttype}}(table, <PyObject*> value)
+            k = kh_get_{{ttype}}(table, <PyObject*>value)
             if k != table.n_buckets:
                 out[table.vals[k]] = 1
                 out[i] = 1
             else:
-                k = kh_put_{{ttype}}(table, <PyObject*> value, &ret)
-                table.keys[k] = <PyObject*> value
+                k = kh_put_{{ttype}}(table, <PyObject*>value, &ret)
+                table.keys[k] = <PyObject*>value
                 table.vals[k] = i
                 out[i] = 0
         {{else}}
@@ -245,7 +245,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
 
     {{if dtype == 'object'}}
     for i in range(n):
-        kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
+        kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
     {{else}}
     with nogil:
         for i in range(n):
@@ -259,7 +259,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
     {{if dtype == 'object'}}
     for i in range(n):
         val = arr[i]
-        k = kh_get_{{ttype}}(table, <PyObject*> val)
+        k = kh_get_{{ttype}}(table, <PyObject*>val)
         result[i] = (k != table.n_buckets)
     {{else}}
     with nogil:
@@ -342,7 +342,7 @@ def mode_{{dtype}}({{ctype}}[:] values, bint dropna):
             else:
                 continue
 
-            modes[j] = <object> table.keys[k]
+            modes[j] = <object>table.keys[k]
     {{endif}}
 
     kh_destroy_{{table_type}}(table)
 
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
-cimport cython
-from cython cimport Py_ssize_t
+import cython
+from cython import Py_ssize_t
 
 import numpy as np
 cimport numpy as cnp