From 49f06ed19d07ff598ef181e524e7a91c2487fd64 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sat, 6 Oct 2018 08:20:31 -0700
Subject: [PATCH 01/12] use fused types for some sparse functions

---
 pandas/_libs/sparse_op_helper.pxi.in | 86 ++++++++++------------------
 1 file changed, 29 insertions(+), 57 deletions(-)

diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in
index 2843a3cf7dd28..9e60dbf495393 100644
--- a/pandas/_libs/sparse_op_helper.pxi.in
+++ b/pandas/_libs/sparse_op_helper.pxi.in
@@ -8,18 +8,12 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 # Sparse op
 #----------------------------------------------------------------------
 
-{{py:
-
-# dtype, float_group
-dtypes = [('float64', True), ('int64', False)]
-
-}}
+ctypedef fused sparse_t:
+    float64_t
+    int64_t
 
-{{for dtype, float_group in dtypes}}
 
-{{if float_group}}
-
-cdef inline {{dtype}}_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
+cdef inline float64_t __div(sparse_t a, sparse_t b):
     if b == 0:
         if a > 0:
             return INF
@@ -30,63 +24,41 @@ cdef inline {{dtype}}_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
     else:
         return float(a) / b
 
-cdef inline {{dtype}}_t __truediv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
-    return __div_{{dtype}}(a, b)
 
-cdef inline {{dtype}}_t __floordiv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
-    if b == 0:
-        # numpy >= 1.11 returns NaN
-        # for a // 0, rather than +-inf
-        if _np_version_under1p11:
-            if a > 0:
-                return INF
-            elif a < 0:
-                return -INF
-        return NaN
-    else:
-        return a // b
+cdef inline float64_t __truediv(sparse_t a, sparse_t b):
+    return __div(a, b)
 
-cdef inline {{dtype}}_t __mod_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
-    if b == 0:
-        return NaN
-    else:
-        return a % b
-
-{{else}}
 
-cdef inline float64_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
+cdef inline sparse_t __mod(sparse_t a, sparse_t b):
     if b == 0:
-        if a > 0:
-            return INF
-        elif a < 0:
-            return -INF
-        else:
+        if sparse_t is float64_t:
             return NaN
+        else:
+            return 0
     else:
-        return float(a) / b
+        return a % b
 
-cdef inline float64_t __truediv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
-    return __div_{{dtype}}(a, b)
 
-cdef inline {{dtype}}_t __floordiv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
+cdef inline sparse_t __floordiv(sparse_t a, sparse_t b):
     if b == 0:
-        return 0
+        if sparse_t is float64_t:
+            # numpy >= 1.11 returns NaN
+            # for a // 0, rather than +-inf
+            if _np_version_under1p11:
+                if a > 0:
+                    return INF
+                elif a < 0:
+                    return -INF
+            return NaN
+        else:
+            return 0
     else:
         return a // b
 
-cdef inline {{dtype}}_t __mod_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
-    if b == 0:
-        return 0
-    else:
-        return a % b
 
-{{endif}}
-
-{{endfor}}
-
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # sparse array op
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 
 {{py:
 
@@ -106,10 +78,10 @@ def get_op(tup):
     ops_dict = {'add': '{0} + {1}',
                 'sub': '{0} - {1}',
                 'mul': '{0} * {1}',
-                'div': '__div_{2}({0}, {1})',
-                'mod': '__mod_{2}({0}, {1})',
-                'truediv': '__truediv_{2}({0}, {1})',
-                'floordiv': '__floordiv_{2}({0}, {1})',
+                'div': '__div({0}, {1})',
+                'mod': '__mod({0}, {1})',
+                'truediv': '__truediv({0}, {1})',
+                'floordiv': '__floordiv({0}, {1})',
                 'pow': '{0} ** {1}',
                 'eq': '{0} == {1}',
                 'ne': '{0} != {1}',

From d24ec562ae2a6fd405f3d13c7ca7ace6f33abf77 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sat, 6 Oct 2018 10:36:28 -0700
Subject: [PATCH 02/12] use fused types in groupby_helper

---
 pandas/_libs/groupby_helper.pxi.in | 55 +++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 765381d89705d..e6ee8d345df55 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -600,7 +600,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
 
 {{py:
 
-# name, c_type, dest_type2, nan_val
+# name, dest_type2, nan_val, inf_val
 dtypes = [('float64', 'float64_t', 'NAN', 'np.inf'),
           ('float32', 'float32_t', 'NAN', 'np.inf'),
           ('int64', 'int64_t', 'iNaT', '_int64_max')]
@@ -766,25 +766,36 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                         accum[lab, j] = mval = val
                     out[i, j] = mval
 
+{{endfor}}
+
+
+ctypedef fused groupby_t:
+    float64_t
+    float32_t
+    int64_t
+
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
-                          ndarray[{{dest_type2}}, ndim=2] values,
-                          ndarray[int64_t] labels,
-                          bint is_datetimelike):
+def group_cummax(ndarray[groupby_t, ndim=2] out,
+                 ndarray[groupby_t, ndim=2] values,
+                 ndarray[int64_t] labels,
+                 bint is_datetimelike):
     """
     Only transforms on axis=0
     """
     cdef:
         Py_ssize_t i, j, N, K, size
-        {{dest_type2}} val, mval
-        ndarray[{{dest_type2}}, ndim=2] accum
+        groupby_t val, mval
+        ndarray[groupby_t, ndim=2] accum
         int64_t lab
 
     N, K = (<object> values).shape
     accum = np.empty_like(values)
-    accum.fill(-{{inf_val}})
+    if groupby_t is int64_t:
+        accum.fill(-_int64_max)
+    else:
+        accum.fill(-np.inf)
 
     with nogil:
         for i in range(N):
@@ -795,16 +806,22 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
             for j in range(K):
                 val = values[i, j]
 
-                {{if name == 'int64'}}
-                if is_datetimelike and val == {{nan_val}}:
-                    out[i, j] = {{nan_val}}
+                if groupby_t is int64_t:
+                    if is_datetimelike and val == iNaT:
+                        out[i, j] = iNaT
+                    else:
+                        mval = accum[lab, j]
+                        if val > mval:
+                            accum[lab, j] = mval = val
+                        out[i, j] = mval
                 else:
-                {{else}}
-                if val == val:
-                {{endif}}
-                    mval = accum[lab, j]
-                    if val > mval:
-                        accum[lab, j] = mval = val
-                    out[i, j] = mval
+                    if val == val:
+                        mval = accum[lab, j]
+                        if val > mval:
+                            accum[lab, j] = mval = val
+                        out[i, j] = mval
 
-{{endfor}}
+
+group_cummax_float64 = group_cummax["float64_t"]
+group_cummax_float32 = group_cummax["float32_t"]
+group_cummax_int64 = group_cummax["int64_t"]

From 54520e2a565e484f36c8503816eeee96ec4e5be1 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sat, 6 Oct 2018 11:38:59 -0700
Subject: [PATCH 03/12] Use fused types for more of groupby_helper

---
 pandas/_libs/groupby_helper.pxi.in | 59 ++++++++++++++++++------------
 1 file changed, 36 insertions(+), 23 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index e6ee8d345df55..3042affa63960 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -725,24 +725,37 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                     out[i, j] = minx[i, j]
 
 
+
+{{endfor}}
+
+
+ctypedef fused groupby_t:
+    float64_t
+    float32_t
+    int64_t
+
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
-                          ndarray[{{dest_type2}}, ndim=2] values,
-                          ndarray[int64_t] labels,
-                          bint is_datetimelike):
+def group_cummin(ndarray[groupby_t, ndim=2] out,
+                 ndarray[groupby_t, ndim=2] values,
+                 ndarray[int64_t] labels,
+                 bint is_datetimelike):
     """
     Only transforms on axis=0
     """
     cdef:
         Py_ssize_t i, j, N, K, size
-        {{dest_type2}} val, mval
-        ndarray[{{dest_type2}}, ndim=2] accum
+        groupby_t val, mval
+        ndarray[groupby_t, ndim=2] accum
         int64_t lab
 
     N, K = (<object> values).shape
     accum = np.empty_like(values)
-    accum.fill({{inf_val}})
+    if groupby_t is int64_t:
+        accum.fill(_int64_max)
+    else:
+        accum.fill(np.inf)
 
     with nogil:
         for i in range(N):
@@ -754,25 +767,25 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                 val = values[i, j]
 
                 # val = nan
-                {{if name == 'int64'}}
-                if is_datetimelike and val == {{nan_val}}:
-                    out[i, j] = {{nan_val}}
+                if groupby_t is int64_t:
+                    if is_datetimelike and val == iNaT:
+                        out[i, j] = iNaT
+                    else:
+                        mval = accum[lab, j]
+                        if val < mval:
+                            accum[lab, j] = mval = val
+                        out[i, j] = mval
                 else:
-                {{else}}
-                if val == val:
-                {{endif}}
-                    mval = accum[lab, j]
-                    if val < mval:
-                        accum[lab, j] = mval = val
-                    out[i, j] = mval
-
-{{endfor}}
+                    if val == val:
+                        mval = accum[lab, j]
+                        if val < mval:
+                            accum[lab, j] = mval = val
+                        out[i, j] = mval
 
 
-ctypedef fused groupby_t:
-    float64_t
-    float32_t
-    int64_t
+group_cummin_float64 = group_cummin["float64_t"]
+group_cummin_float32 = group_cummin["float32_t"]
+group_cummin_int64 = group_cummin["int64_t"]
 
 
 @cython.boundscheck(False)

From 1c7995804ee223d2d10280df46671e3a13460972 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sat, 6 Oct 2018 12:24:56 -0700
Subject: [PATCH 04/12] fuse more

---
 pandas/_libs/algos_common_helper.pxi.in | 32 +++++++++++--------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
index 9f531f36d1a64..92e8e9d61d251 100644
--- a/pandas/_libs/algos_common_helper.pxi.in
+++ b/pandas/_libs/algos_common_helper.pxi.in
@@ -16,33 +16,29 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 
 {{py:
 
-# name, c_type, dest_type, dest_dtype
-dtypes = [('float64', 'float64_t', 'float64_t', 'np.float64'),
-          ('float32', 'float32_t', 'float32_t', 'np.float32'),
-          ('int8',  'int8_t',  'float32_t', 'np.float32'),
-          ('int16', 'int16_t', 'float32_t', 'np.float32'),
-          ('int32', 'int32_t', 'float64_t', 'np.float64'),
-          ('int64', 'int64_t', 'float64_t', 'np.float64')]
+# name, c_type, dest_type
+dtypes = [('float64', 'float64_t', 'float64_t'),
+          ('float32', 'float32_t', 'float32_t'),
+          ('int8',  'int8_t',  'float32_t'),
+          ('int16', 'int16_t', 'float32_t'),
+          ('int32', 'int32_t', 'float64_t'),
+          ('int64', 'int64_t', 'float64_t')]
 
 def get_dispatch(dtypes):
 
-    for name, c_type, dest_type, dest_dtype, in dtypes:
-
-        dest_type2 = dest_type
-        dest_type = dest_type.replace('_t', '')
-
-        yield name, c_type, dest_type, dest_type2, dest_dtype
+    for name, c_type, dest_type, in dtypes:
+        yield name, c_type, dest_type
 
 }}
 
-{{for name, c_type, dest_type, dest_type2, dest_dtype
+{{for name, c_type, dest_type
       in get_dispatch(dtypes)}}
 
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
-                     ndarray[{{dest_type2}}, ndim=2] out,
+                     ndarray[{{dest_type}}, ndim=2] out,
                      Py_ssize_t periods, int axis):
     cdef:
         Py_ssize_t i, j, sx, sy
@@ -84,9 +80,9 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
                     out[i, j] = arr[i, j] - arr[i, j - periods]
 
 
-def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
-                                 ndarray[int64_t] indexer, Py_ssize_t loc,
-                                 ndarray[{{dest_type2}}] out):
+def put2d_{{name}}_{{dest_type[:-2]}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
+                                      ndarray[int64_t] indexer, Py_ssize_t loc,
+                                      ndarray[{{dest_type}}] out):
     cdef:
         Py_ssize_t i, j, k
 

From e6002779e10a3bc738d9905bf415ee2f433e20bb Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sat, 6 Oct 2018 12:26:19 -0700
Subject: [PATCH 05/12] remove unnecessary arg

---
 pandas/core/internals/blocks.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 0e57dd33b1c4e..2b9f583cd2ebd 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1153,7 +1153,7 @@ def check_int_bool(self, inplace):
                                                inplace=inplace, limit=limit,
                                                fill_value=fill_value,
                                                coerce=coerce,
-                                               downcast=downcast, mgr=mgr)
+                                               downcast=downcast)
         # try an interp method
         try:
             m = missing.clean_interp_method(method, **kwargs)
@@ -1169,13 +1169,14 @@ def check_int_bool(self, inplace):
                                      limit_direction=limit_direction,
                                      limit_area=limit_area,
                                      fill_value=fill_value, inplace=inplace,
-                                     downcast=downcast, mgr=mgr, **kwargs)
+                                     downcast=downcast, **kwargs)
 
-        raise ValueError("invalid method '{0}' to interpolate.".format(method))
+        raise ValueError("invalid method '{method}' to interpolate."
+                         .format(method=method))
 
     def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
                                limit=None, fill_value=None, coerce=False,
-                               downcast=None, mgr=None):
+                               downcast=None):
         """ fillna but using the interpolate machinery """
 
         inplace = validate_bool_kwarg(inplace, 'inplace')
@@ -1202,7 +1203,7 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
     def _interpolate(self, method=None, index=None, values=None,
                      fill_value=None, axis=0, limit=None,
                      limit_direction='forward', limit_area=None,
-                     inplace=False, downcast=None, mgr=None, **kwargs):
+                     inplace=False, downcast=None, **kwargs):
         """ interpolate using scipy wrappers """
 
         inplace = validate_bool_kwarg(inplace, 'inplace')
@@ -1219,8 +1220,8 @@ def _interpolate(self, method=None, index=None, values=None,
 
         if method in ('krogh', 'piecewise_polynomial', 'pchip'):
             if not index.is_monotonic:
-                raise ValueError("{0} interpolation requires that the "
-                                 "index be monotonic.".format(method))
+                raise ValueError("{method} interpolation requires that the "
+                                 "index be monotonic.".format(method=method))
         # process 1-d slices in the axis direction
 
         def func(x):

From 89997ee412e12bd741893572d682ad52c86ba083 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sat, 6 Oct 2018 19:14:23 -0700
Subject: [PATCH 06/12] cleanup and fuse

---
 pandas/_libs/groupby_helper.pxi.in   | 102 +++++++++++++--------------
 pandas/_libs/join_func_helper.pxi.in |  68 ++++++++++++------
 2 files changed, 95 insertions(+), 75 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 3042affa63960..ccc5aad425cec 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -14,26 +14,22 @@ _int64_max = np.iinfo(np.int64).max
 
 {{py:
 
-# name, c_type, dest_type, dest_dtype
-dtypes = [('float64', 'float64_t', 'float64_t', 'np.float64'),
-          ('float32', 'float32_t', 'float32_t', 'np.float32')]
+# name, c_type
+dtypes = [('float64', 'float64_t'),
+          ('float32', 'float32_t')]
 
 def get_dispatch(dtypes):
 
-    for name, c_type, dest_type, dest_dtype in dtypes:
-
-        dest_type2 = dest_type
-        dest_type = dest_type.replace('_t', '')
-
-        yield name, c_type, dest_type, dest_type2, dest_dtype
+    for name, c_type in dtypes:
+        yield name, c_type
 }}
 
-{{for name, c_type, dest_type, dest_type2, dest_dtype in get_dispatch(dtypes)}}
+{{for name, c_type in get_dispatch(dtypes)}}
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
+def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out,
                        ndarray[int64_t] counts,
                        ndarray[{{c_type}}, ndim=2] values,
                        ndarray[int64_t] labels,
@@ -43,8 +39,8 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{dest_type2}} val, count
-        ndarray[{{dest_type2}}, ndim=2] sumx, nobs
+        {{c_type}} val, count
+        ndarray[{{c_type}}, ndim=2] sumx, nobs
 
     if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
@@ -80,7 +76,7 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
+def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out,
                         ndarray[int64_t] counts,
                         ndarray[{{c_type}}, ndim=2] values,
                         ndarray[int64_t] labels,
@@ -90,8 +86,8 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{dest_type2}} val, count
-        ndarray[{{dest_type2}}, ndim=2] prodx, nobs
+        {{c_type}} val, count
+        ndarray[{{c_type}}, ndim=2] prodx, nobs
 
     if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
@@ -127,15 +123,15 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 @cython.cdivision(True)
-def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
+def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out,
                        ndarray[int64_t] counts,
-                       ndarray[{{dest_type2}}, ndim=2] values,
+                       ndarray[{{c_type}}, ndim=2] values,
                        ndarray[int64_t] labels,
                        Py_ssize_t min_count=-1):
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{dest_type2}} val, ct, oldmean
-        ndarray[{{dest_type2}}, ndim=2] nobs, mean
+        {{c_type}} val, ct, oldmean
+        ndarray[{{c_type}}, ndim=2] nobs, mean
 
     assert min_count == -1, "'min_count' only used in add and prod"
 
@@ -179,15 +175,15 @@ def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
+def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out,
                         ndarray[int64_t] counts,
-                        ndarray[{{dest_type2}}, ndim=2] values,
+                        ndarray[{{c_type}}, ndim=2] values,
                         ndarray[int64_t] labels,
                         Py_ssize_t min_count=-1):
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{dest_type2}} val, count
-        ndarray[{{dest_type2}}, ndim=2] sumx, nobs
+        {{c_type}} val, count
+        ndarray[{{c_type}}, ndim=2] sumx, nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
 
@@ -224,9 +220,9 @@ def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
+def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out,
                   ndarray[int64_t] counts,
-                  ndarray[{{dest_type2}}, ndim=2] values,
+                  ndarray[{{c_type}}, ndim=2] values,
                   ndarray[int64_t] labels,
                   Py_ssize_t min_count=-1):
     """
@@ -234,7 +230,7 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, lab
-        {{dest_type2}} val, count
+        {{c_type}} val, count
         Py_ssize_t ngroups = len(counts)
 
     assert min_count == -1, "'min_count' only used in add and prod"
@@ -278,26 +274,26 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 
 {{py:
 
-# name, c_type, dest_type2, nan_val
-dtypes = [('float64', 'float64_t', 'float64_t', 'NAN'),
-          ('float32', 'float32_t', 'float32_t', 'NAN'),
-          ('int64', 'int64_t', 'int64_t', 'iNaT'),
-          ('object', 'object', 'object', 'NAN')]
+# name, c_type, nan_val
+dtypes = [('float64', 'float64_t', 'NAN'),
+          ('float32', 'float32_t', 'NAN'),
+          ('int64', 'int64_t', 'iNaT'),
+          ('object', 'object', 'NAN')]
 
 def get_dispatch(dtypes):
 
-    for name, c_type, dest_type2, nan_val in dtypes:
+    for name, c_type, nan_val in dtypes:
 
-        yield name, c_type, dest_type2, nan_val
+        yield name, c_type, nan_val
 }}
 
 
-{{for name, c_type, dest_type2, nan_val in get_dispatch(dtypes)}}
+{{for name, c_type, nan_val in get_dispatch(dtypes)}}
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
+def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out,
                         ndarray[int64_t] counts,
                         ndarray[{{c_type}}, ndim=2] values,
                         ndarray[int64_t] labels,
@@ -307,8 +303,8 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{dest_type2}} val
-        ndarray[{{dest_type2}}, ndim=2] resx
+        {{c_type}} val
+        ndarray[{{c_type}}, ndim=2] resx
         ndarray[int64_t, ndim=2] nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
@@ -353,7 +349,7 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
+def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out,
                        ndarray[int64_t] counts,
                        ndarray[{{c_type}}, ndim=2] values,
                        ndarray[int64_t] labels, int64_t rank,
@@ -363,8 +359,8 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{dest_type2}} val
-        ndarray[{{dest_type2}}, ndim=2] resx
+        {{c_type}} val
+        ndarray[{{c_type}}, ndim=2] resx
         ndarray[int64_t, ndim=2] nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
@@ -600,26 +596,26 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
 
 {{py:
 
-# name, dest_type2, nan_val, inf_val
+# name, c_type, nan_val, inf_val
 dtypes = [('float64', 'float64_t', 'NAN', 'np.inf'),
           ('float32', 'float32_t', 'NAN', 'np.inf'),
           ('int64', 'int64_t', 'iNaT', '_int64_max')]
 
 def get_dispatch(dtypes):
 
-    for name, dest_type2, nan_val, inf_val in dtypes:
-        yield name, dest_type2, nan_val, inf_val
+    for name, c_type, nan_val, inf_val in dtypes:
+        yield name, c_type, nan_val, inf_val
 }}
 
 
-{{for name, dest_type2, nan_val, inf_val in get_dispatch(dtypes)}}
+{{for name, c_type, nan_val, inf_val in get_dispatch(dtypes)}}
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
+def group_max_{{name}}(ndarray[{{c_type}}, ndim=2] out,
                        ndarray[int64_t] counts,
-                       ndarray[{{dest_type2}}, ndim=2] values,
+                       ndarray[{{c_type}}, ndim=2] values,
                        ndarray[int64_t] labels,
                        Py_ssize_t min_count=-1):
     """
@@ -627,8 +623,8 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{dest_type2}} val, count
-        ndarray[{{dest_type2}}, ndim=2] maxx, nobs
+        {{c_type}} val, count
+        ndarray[{{c_type}}, ndim=2] maxx, nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
 
@@ -672,9 +668,9 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
+def group_min_{{name}}(ndarray[{{c_type}}, ndim=2] out,
                        ndarray[int64_t] counts,
-                       ndarray[{{dest_type2}}, ndim=2] values,
+                       ndarray[{{c_type}}, ndim=2] values,
                        ndarray[int64_t] labels,
                        Py_ssize_t min_count=-1):
     """
@@ -682,8 +678,8 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{dest_type2}} val, count
-        ndarray[{{dest_type2}}, ndim=2] minx, nobs
+        {{c_type}} val, count
+        ndarray[{{c_type}}, ndim=2] minx, nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
 
diff --git a/pandas/_libs/join_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in
index a72b113a6fdb6..e579f43715315 100644
--- a/pandas/_libs/join_func_helper.pxi.in
+++ b/pandas/_libs/join_func_helper.pxi.in
@@ -211,34 +211,34 @@ def asof_join_nearest_{{on_dtype}}_by_{{by_dtype}}(
 {{endfor}}
 
 
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # asof_join
-#----------------------------------------------------------------------
-
-{{py:
-
-# on_dtype
-dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t',
-          'int8_t', 'int16_t', 'int32_t', 'int64_t',
-          'float', 'double']
-
-}}
-
-{{for on_dtype in dtypes}}
-
-
-def asof_join_backward_{{on_dtype}}(
-        ndarray[{{on_dtype}}] left_values,
-        ndarray[{{on_dtype}}] right_values,
-        bint allow_exact_matches=1,
-        tolerance=None):
+# ----------------------------------------------------------------------
+
+ctypedef fused asof_t:
+    uint8_t
+    uint16_t
+    uint32_t
+    uint64_t
+    int8_t
+    int16_t
+    int32_t
+    int64_t
+    float
+    double
+
+
+def asof_join_backward(ndarray[asof_t] left_values,
+                       ndarray[asof_t] right_values,
+                       bint allow_exact_matches=1,
+                       tolerance=None):
 
     cdef:
         Py_ssize_t left_pos, right_pos, left_size, right_size
         ndarray[int64_t] left_indexer, right_indexer
         bint has_tolerance = 0
-        {{on_dtype}} tolerance_ = 0
-        {{on_dtype}} diff = 0
+        asof_t tolerance_ = 0
+        asof_t diff = 0
 
     # if we are using tolerance, set our objects
     if tolerance is not None:
@@ -281,6 +281,30 @@ def asof_join_backward_{{on_dtype}}(
     return left_indexer, right_indexer
 
 
+asof_join_backward_uint8_t = asof_join_backward["uint8_t"]
+asof_join_backward_uint16_t = asof_join_backward["uint16_t"]
+asof_join_backward_uint32_t = asof_join_backward["uint32_t"]
+asof_join_backward_uint64_t = asof_join_backward["uint64_t"]
+asof_join_backward_int8_t = asof_join_backward["int8_t"]
+asof_join_backward_int16_t = asof_join_backward["int16_t"]
+asof_join_backward_int32_t = asof_join_backward["int32_t"]
+asof_join_backward_int64_t = asof_join_backward["int64_t"]
+asof_join_backward_float = asof_join_backward["float"]
+asof_join_backward_double = asof_join_backward["double"]
+
+
+{{py:
+
+# on_dtype
+dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t',
+          'int8_t', 'int16_t', 'int32_t', 'int64_t',
+          'float', 'double']
+
+}}
+
+{{for on_dtype in dtypes}}
+
+
 def asof_join_forward_{{on_dtype}}(
         ndarray[{{on_dtype}}] left_values,
         ndarray[{{on_dtype}}] right_values,

From b13317b8ac32ce515a9b7dad212b391f72e1a60d Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sat, 6 Oct 2018 20:13:10 -0700
Subject: [PATCH 07/12] revert non-central changes

---
 pandas/core/internals/blocks.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 2b9f583cd2ebd..0e57dd33b1c4e 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1153,7 +1153,7 @@ def check_int_bool(self, inplace):
                                                inplace=inplace, limit=limit,
                                                fill_value=fill_value,
                                                coerce=coerce,
-                                               downcast=downcast)
+                                               downcast=downcast, mgr=mgr)
         # try an interp method
         try:
             m = missing.clean_interp_method(method, **kwargs)
@@ -1169,14 +1169,13 @@ def check_int_bool(self, inplace):
                                      limit_direction=limit_direction,
                                      limit_area=limit_area,
                                      fill_value=fill_value, inplace=inplace,
-                                     downcast=downcast, **kwargs)
+                                     downcast=downcast, mgr=mgr, **kwargs)
 
-        raise ValueError("invalid method '{method}' to interpolate."
-                         .format(method=method))
+        raise ValueError("invalid method '{0}' to interpolate.".format(method))
 
     def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
                                limit=None, fill_value=None, coerce=False,
-                               downcast=None):
+                               downcast=None, mgr=None):
         """ fillna but using the interpolate machinery """
 
         inplace = validate_bool_kwarg(inplace, 'inplace')
@@ -1203,7 +1202,7 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
     def _interpolate(self, method=None, index=None, values=None,
                      fill_value=None, axis=0, limit=None,
                      limit_direction='forward', limit_area=None,
-                     inplace=False, downcast=None, **kwargs):
+                     inplace=False, downcast=None, mgr=None, **kwargs):
         """ interpolate using scipy wrappers """
 
         inplace = validate_bool_kwarg(inplace, 'inplace')
@@ -1220,8 +1219,8 @@ def _interpolate(self, method=None, index=None, values=None,
 
         if method in ('krogh', 'piecewise_polynomial', 'pchip'):
             if not index.is_monotonic:
-                raise ValueError("{method} interpolation requires that the "
-                                 "index be monotonic.".format(method=method))
+                raise ValueError("{0} interpolation requires that the "
+                                 "index be monotonic.".format(method))
         # process 1-d slices in the axis direction
 
         def func(x):

From db9d796f9d670bef642f142997700f29f23b2db2 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sun, 7 Oct 2018 09:14:35 -0700
Subject: [PATCH 08/12] fuse more things

---
 pandas/_libs/algos_rank_helper.pxi.in |  56 +++++-------
 pandas/_libs/groupby_helper.pxi.in    | 123 +++++++++++++-------------
 pandas/_libs/join_func_helper.pxi.in  |  68 +++++++-------
 3 files changed, 121 insertions(+), 126 deletions(-)

diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in
index 130276ae0e73c..d7b08b0548810 100644
--- a/pandas/_libs/algos_rank_helper.pxi.in
+++ b/pandas/_libs/algos_rank_helper.pxi.in
@@ -131,45 +131,20 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
     argsorted = _as.astype('i8')
 
     {{if dtype == 'object'}}
-    for i in range(n):
-        sum_ranks += i + 1
-        dups += 1
-        isnan = sorted_mask[i]
-        val = util.get_value_at(sorted_data, i)
-
-        if isnan and keep_na:
-            ranks[argsorted[i]] = nan
-            continue
-        count += 1.0
-
-        if (i == n - 1 or
-                are_diff(util.get_value_at(sorted_data, i + 1), val) or
-                i == non_na_idx):
-            if tiebreak == TIEBREAK_AVERAGE:
-                for j in range(i - dups + 1, i + 1):
-                    ranks[argsorted[j]] = sum_ranks / dups
-            elif tiebreak == TIEBREAK_MIN:
-                for j in range(i - dups + 1, i + 1):
-                    ranks[argsorted[j]] = i - dups + 2
-            elif tiebreak == TIEBREAK_MAX:
-                for j in range(i - dups + 1, i + 1):
-                    ranks[argsorted[j]] = i + 1
-            elif tiebreak == TIEBREAK_FIRST:
-                raise ValueError('first not supported for non-numeric data')
-            elif tiebreak == TIEBREAK_FIRST_DESCENDING:
-                for j in range(i - dups + 1, i + 1):
-                    ranks[argsorted[j]] = 2 * i - j - dups + 2
-            elif tiebreak == TIEBREAK_DENSE:
-                total_tie_count += 1
-                for j in range(i - dups + 1, i + 1):
-                    ranks[argsorted[j]] = total_tie_count
-            sum_ranks = dups = 0
+    if True:
     {{else}}
     with nogil:
+    {{endif}}
+        # TODO: why does the 2d version not have a nogil block?
         for i in range(n):
             sum_ranks += i + 1
             dups += 1
+
+            {{if dtype == 'object'}}
+            val = util.get_value_at(sorted_data, i)
+            {{else}}
             val = sorted_data[i]
+            {{endif}}
 
             {{if dtype != 'uint64'}}
             isnan = sorted_mask[i]
@@ -180,8 +155,14 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
 
             count += 1.0
 
-            if (i == n - 1 or sorted_data[i + 1] != val or
-                i == non_na_idx):
+            {{if dtype == 'object'}}
+            if (i == n - 1 or
+                    are_diff(util.get_value_at(sorted_data, i + 1), val) or
+                    i == non_na_idx):
+            {{else}}
+            if (i == n - 1 or sorted_data[i + 1] != val or i == non_na_idx):
+            {{endif}}
+
                 if tiebreak == TIEBREAK_AVERAGE:
                     for j in range(i - dups + 1, i + 1):
                         ranks[argsorted[j]] = sum_ranks / dups
@@ -192,8 +173,12 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
                     for j in range(i - dups + 1, i + 1):
                         ranks[argsorted[j]] = i + 1
                 elif tiebreak == TIEBREAK_FIRST:
+                    {{if dtype == 'object'}}
+                    raise ValueError('first not supported for non-numeric data')
+                    {{else}}
                     for j in range(i - dups + 1, i + 1):
                         ranks[argsorted[j]] = j + 1
+                    {{endif}}
                 elif tiebreak == TIEBREAK_FIRST_DESCENDING:
                     for j in range(i - dups + 1, i + 1):
                         ranks[argsorted[j]] = 2 * i - j - dups + 2
@@ -202,7 +187,6 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
                     for j in range(i - dups + 1, i + 1):
                         ranks[argsorted[j]] = total_tie_count
                 sum_ranks = dups = 0
-    {{endif}}
     if pct:
         if tiebreak == TIEBREAK_DENSE:
             return ranks / total_tie_count
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index ccc5aad425cec..859bb66249c3b 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -466,7 +466,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
     # with mask, without obfuscating location of missing data
     # in values array
     masked_vals = np.array(values[:, 0], copy=True)
-    {{if name=='int64'}}
+    {{if name == 'int64'}}
     mask = (masked_vals == {{nan_val}}).astype(np.uint8)
     {{else}}
     mask = np.isnan(masked_vals).astype(np.uint8)
@@ -590,41 +590,30 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
 {{endfor}}
 
 
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # group_min, group_max
-#----------------------------------------------------------------------
-
-{{py:
-
-# name, c_type, nan_val, inf_val
-dtypes = [('float64', 'float64_t', 'NAN', 'np.inf'),
-          ('float32', 'float32_t', 'NAN', 'np.inf'),
-          ('int64', 'int64_t', 'iNaT', '_int64_max')]
-
-def get_dispatch(dtypes):
-
-    for name, c_type, nan_val, inf_val in dtypes:
-        yield name, c_type, nan_val, inf_val
-}}
+# ----------------------------------------------------------------------
 
-
-{{for name, c_type, nan_val, inf_val in get_dispatch(dtypes)}}
+ctypedef fused groupby_t:
+    float64_t
+    float32_t
+    int64_t
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_max_{{name}}(ndarray[{{c_type}}, ndim=2] out,
-                       ndarray[int64_t] counts,
-                       ndarray[{{c_type}}, ndim=2] values,
-                       ndarray[int64_t] labels,
-                       Py_ssize_t min_count=-1):
+def group_max(ndarray[groupby_t, ndim=2] out,
+              ndarray[int64_t] counts,
+              ndarray[groupby_t, ndim=2] values,
+              ndarray[int64_t] labels,
+              Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{c_type}} val, count
-        ndarray[{{c_type}}, ndim=2] maxx, nobs
+        groupby_t val, count, nan_val
+        ndarray[groupby_t, ndim=2] maxx, nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
 
@@ -634,7 +623,12 @@ def group_max_{{name}}(ndarray[{{c_type}}, ndim=2] out,
     nobs = np.zeros_like(out)
 
     maxx = np.empty_like(out)
-    maxx.fill(-{{inf_val}})
+    if groupby_t is int64_t:
+        maxx.fill(-_int64_max)
+        nan_val = iNaT
+    else:
+        maxx.fill(-np.inf)
+        nan_val = NAN
 
     N, K = (<object> values).shape
 
@@ -649,37 +643,44 @@ def group_max_{{name}}(ndarray[{{c_type}}, ndim=2] out,
                 val = values[i, j]
 
                 # not nan
-                {{if name == 'int64'}}
-                if val != {{nan_val}}:
-                {{else}}
-                if val == val and val != {{nan_val}}:
-                {{endif}}
-                    nobs[lab, j] += 1
-                    if val > maxx[lab, j]:
-                        maxx[lab, j] = val
+                if groupby_t is int64_t:
+                    if val != nan_val:
+                        nobs[lab, j] += 1
+                        if val > maxx[lab, j]:
+                            maxx[lab, j] = val
+                else:
+                    if val == val and val != nan_val:
+                        nobs[lab, j] += 1
+                        if val > maxx[lab, j]:
+                            maxx[lab, j] = val
 
         for i in range(ncounts):
             for j in range(K):
                 if nobs[i, j] == 0:
-                    out[i, j] = {{nan_val}}
+                    out[i, j] = nan_val
                 else:
                     out[i, j] = maxx[i, j]
 
 
+group_max_float64 = group_max["float64_t"]
+group_max_float32 = group_max["float32_t"]
+group_max_int64 = group_max["int64_t"]
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_min_{{name}}(ndarray[{{c_type}}, ndim=2] out,
-                       ndarray[int64_t] counts,
-                       ndarray[{{c_type}}, ndim=2] values,
-                       ndarray[int64_t] labels,
-                       Py_ssize_t min_count=-1):
+def group_min(ndarray[groupby_t, ndim=2] out,
+              ndarray[int64_t] counts,
+              ndarray[groupby_t, ndim=2] values,
+              ndarray[int64_t] labels,
+              Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        {{c_type}} val, count
-        ndarray[{{c_type}}, ndim=2] minx, nobs
+        groupby_t val, count, nan_val
+        ndarray[groupby_t, ndim=2] minx, nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
 
@@ -689,7 +690,12 @@ def group_min_{{name}}(ndarray[{{c_type}}, ndim=2] out,
     nobs = np.zeros_like(out)
 
     minx = np.empty_like(out)
-    minx.fill({{inf_val}})
+    if groupby_t is int64_t:
+        minx.fill(_int64_max)
+        nan_val = iNaT
+    else:
+        minx.fill(np.inf)
+        nan_val = NAN
 
     N, K = (<object> values).shape
 
@@ -704,31 +710,28 @@ def group_min_{{name}}(ndarray[{{c_type}}, ndim=2] out,
                 val = values[i, j]
 
                 # not nan
-                {{if name == 'int64'}}
-                if val != {{nan_val}}:
-                {{else}}
-                if val == val and val != {{nan_val}}:
-                {{endif}}
-                    nobs[lab, j] += 1
-                    if val < minx[lab, j]:
-                        minx[lab, j] = val
+                if groupby_t is int64_t:
+                    if val != nan_val:
+                        nobs[lab, j] += 1
+                        if val < minx[lab, j]:
+                            minx[lab, j] = val
+                else:
+                    if val == val and val != nan_val:
+                        nobs[lab, j] += 1
+                        if val < minx[lab, j]:
+                            minx[lab, j] = val
 
         for i in range(ncounts):
             for j in range(K):
                 if nobs[i, j] == 0:
-                    out[i, j] = {{nan_val}}
+                    out[i, j] = nan_val
                 else:
                     out[i, j] = minx[i, j]
 
 
-
-{{endfor}}
-
-
-ctypedef fused groupby_t:
-    float64_t
-    float32_t
-    int64_t
+group_min_float64 = group_min["float64_t"]
+group_min_float32 = group_min["float32_t"]
+group_min_int64 = group_min["int64_t"]
 
 
 @cython.boundscheck(False)
diff --git a/pandas/_libs/join_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in
index e579f43715315..c7369c6b18093 100644
--- a/pandas/_libs/join_func_helper.pxi.in
+++ b/pandas/_libs/join_func_helper.pxi.in
@@ -293,30 +293,17 @@ asof_join_backward_float = asof_join_backward["float"]
 asof_join_backward_double = asof_join_backward["double"]
 
 
-{{py:
-
-# on_dtype
-dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t',
-          'int8_t', 'int16_t', 'int32_t', 'int64_t',
-          'float', 'double']
-
-}}
-
-{{for on_dtype in dtypes}}
-
-
-def asof_join_forward_{{on_dtype}}(
-        ndarray[{{on_dtype}}] left_values,
-        ndarray[{{on_dtype}}] right_values,
-        bint allow_exact_matches=1,
-        tolerance=None):
+def asof_join_forward(ndarray[asof_t] left_values,
+                      ndarray[asof_t] right_values,
+                      bint allow_exact_matches=1,
+                      tolerance=None):
 
     cdef:
         Py_ssize_t left_pos, right_pos, left_size, right_size
         ndarray[int64_t] left_indexer, right_indexer
         bint has_tolerance = 0
-        {{on_dtype}} tolerance_ = 0
-        {{on_dtype}} diff = 0
+        asof_t tolerance_ = 0
+        asof_t diff = 0
 
     # if we are using tolerance, set our objects
     if tolerance is not None:
@@ -360,16 +347,27 @@ def asof_join_forward_{{on_dtype}}(
     return left_indexer, right_indexer
 
 
-def asof_join_nearest_{{on_dtype}}(
-        ndarray[{{on_dtype}}] left_values,
-        ndarray[{{on_dtype}}] right_values,
-        bint allow_exact_matches=1,
-        tolerance=None):
+asof_join_forward_uint8_t = asof_join_forward["uint8_t"]
+asof_join_forward_uint16_t = asof_join_forward["uint16_t"]
+asof_join_forward_uint32_t = asof_join_forward["uint32_t"]
+asof_join_forward_uint64_t = asof_join_forward["uint64_t"]
+asof_join_forward_int8_t = asof_join_forward["int8_t"]
+asof_join_forward_int16_t = asof_join_forward["int16_t"]
+asof_join_forward_int32_t = asof_join_forward["int32_t"]
+asof_join_forward_int64_t = asof_join_forward["int64_t"]
+asof_join_forward_float = asof_join_forward["float"]
+asof_join_forward_double = asof_join_forward["double"]
+
+
+def asof_join_nearest(ndarray[asof_t] left_values,
+                      ndarray[asof_t] right_values,
+                      bint allow_exact_matches=1,
+                      tolerance=None):
 
     cdef:
         Py_ssize_t left_size, right_size, i
         ndarray[int64_t] left_indexer, right_indexer, bli, bri, fli, fri
-        {{on_dtype}} bdiff, fdiff
+        asof_t bdiff, fdiff
 
     left_size = len(left_values)
     right_size = len(right_values)
@@ -378,10 +376,10 @@ def asof_join_nearest_{{on_dtype}}(
     right_indexer = np.empty(left_size, dtype=np.int64)
 
     # search both forward and backward
-    bli, bri = asof_join_backward_{{on_dtype}}(left_values, right_values,
-                                               allow_exact_matches, tolerance)
-    fli, fri = asof_join_forward_{{on_dtype}}(left_values, right_values,
-                                              allow_exact_matches, tolerance)
+    bli, bri = asof_join_backward(left_values, right_values,
+                                  allow_exact_matches, tolerance)
+    fli, fri = asof_join_forward(left_values, right_values,
+                                 allow_exact_matches, tolerance)
 
     for i in range(len(bri)):
         # choose timestamp from right with smaller difference
@@ -395,4 +393,14 @@ def asof_join_nearest_{{on_dtype}}(
 
     return left_indexer, right_indexer
 
-{{endfor}}
+
+asof_join_nearest_uint8_t = asof_join_nearest["uint8_t"]
+asof_join_nearest_uint16_t = asof_join_nearest["uint16_t"]
+asof_join_nearest_uint32_t = asof_join_nearest["uint32_t"]
+asof_join_nearest_uint64_t = asof_join_nearest["uint64_t"]
+asof_join_nearest_int8_t = asof_join_nearest["int8_t"]
+asof_join_nearest_int16_t = asof_join_nearest["int16_t"]
+asof_join_nearest_int32_t = asof_join_nearest["int32_t"]
+asof_join_nearest_int64_t = asof_join_nearest["int64_t"]
+asof_join_nearest_float = asof_join_nearest["float"]
+asof_join_nearest_double = asof_join_nearest["double"]

From a69438bcfaed0547acf0ac46054146a392aa0b95 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sun, 14 Oct 2018 11:03:48 -0700
Subject: [PATCH 09/12] nicer names

---
 pandas/_libs/sparse_op_helper.pxi.in | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in
index 9e60dbf495393..d02a985de1d61 100644
--- a/pandas/_libs/sparse_op_helper.pxi.in
+++ b/pandas/_libs/sparse_op_helper.pxi.in
@@ -13,7 +13,7 @@ ctypedef fused sparse_t:
     int64_t
 
 
-cdef inline float64_t __div(sparse_t a, sparse_t b):
+cdef inline float64_t __div__(sparse_t a, sparse_t b):
     if b == 0:
         if a > 0:
             return INF
@@ -25,11 +25,11 @@ cdef inline float64_t __div(sparse_t a, sparse_t b):
         return float(a) / b
 
 
-cdef inline float64_t __truediv(sparse_t a, sparse_t b):
-    return __div(a, b)
+cdef inline float64_t __truediv__(sparse_t a, sparse_t b):
+    return __div__(a, b)
 
 
-cdef inline sparse_t __mod(sparse_t a, sparse_t b):
+cdef inline sparse_t __mod__(sparse_t a, sparse_t b):
     if b == 0:
         if sparse_t is float64_t:
             return NaN
@@ -39,7 +39,7 @@ cdef inline sparse_t __mod(sparse_t a, sparse_t b):
         return a % b
 
 
-cdef inline sparse_t __floordiv(sparse_t a, sparse_t b):
+cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b):
     if b == 0:
         if sparse_t is float64_t:
             # numpy >= 1.11 returns NaN
@@ -78,10 +78,10 @@ def get_op(tup):
     ops_dict = {'add': '{0} + {1}',
                 'sub': '{0} - {1}',
                 'mul': '{0} * {1}',
-                'div': '__div({0}, {1})',
-                'mod': '__mod({0}, {1})',
-                'truediv': '__truediv({0}, {1})',
-                'floordiv': '__floordiv({0}, {1})',
+                'div': '__div__({0}, {1})',
+                'mod': '__mod__({0}, {1})',
+                'truediv': '__truediv__({0}, {1})',
+                'floordiv': '__floordiv__({0}, {1})',
                 'pow': '{0} ** {1}',
                 'eq': '{0} == {1}',
                 'ne': '{0} != {1}',

From cdcde6c3c4dc7c1cbb3e76b38d4188b4091f0dd4 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sun, 14 Oct 2018 11:08:01 -0700
Subject: [PATCH 10/12] requested comments/cleanups

---
 pandas/_libs/algos_common_helper.pxi.in | 11 ++++++-----
 pandas/_libs/groupby_helper.pxi.in      |  2 ++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
index 92e8e9d61d251..3e9670b140361 100644
--- a/pandas/_libs/algos_common_helper.pxi.in
+++ b/pandas/_libs/algos_common_helper.pxi.in
@@ -27,11 +27,12 @@ dtypes = [('float64', 'float64_t', 'float64_t'),
 def get_dispatch(dtypes):
 
     for name, c_type, dest_type, in dtypes:
-        yield name, c_type, dest_type
+        dest_name = dest_type[:-2]  # i.e. strip "_t"
+        yield name, c_type, dest_type, dest_name
 
 }}
 
-{{for name, c_type, dest_type
+{{for name, c_type, dest_type, dest_name
       in get_dispatch(dtypes)}}
 
 
@@ -80,9 +81,9 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
                     out[i, j] = arr[i, j] - arr[i, j - periods]
 
 
-def put2d_{{name}}_{{dest_type[:-2]}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
-                                      ndarray[int64_t] indexer, Py_ssize_t loc,
-                                      ndarray[{{dest_type}}] out):
+def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
+                                 ndarray[int64_t] indexer, Py_ssize_t loc,
+                                 ndarray[{{dest_type}}] out):
     cdef:
         Py_ssize_t i, j, k
 
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 859bb66249c3b..84c5ccfe9c65a 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -594,6 +594,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
 # group_min, group_max
 # ----------------------------------------------------------------------
 
+# TODO: consider implementing for more dtypes
 ctypedef fused groupby_t:
     float64_t
     float32_t
@@ -624,6 +625,7 @@ def group_max(ndarray[groupby_t, ndim=2] out,
 
     maxx = np.empty_like(out)
     if groupby_t is int64_t:
+        # Note: evaluated at compile-time
         maxx.fill(-_int64_max)
         nan_val = iNaT
     else:

From adbc67ce1a0e10d33a7d5a66d8601980410a8a4e Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sun, 14 Oct 2018 11:50:03 -0700
Subject: [PATCH 11/12] Dummy commit to force CI


From dc76269dca591f60cb1e4fa39114f4611eb4acc5 Mon Sep 17 00:00:00 2001
From: Brock Mendel <jbrockmendel@gmail.com>
Date: Sun, 14 Oct 2018 13:33:29 -0700
Subject: [PATCH 12/12] wrap long line

---
 pandas/_libs/algos_rank_helper.pxi.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in
index d7b08b0548810..bb4aec75ed567 100644
--- a/pandas/_libs/algos_rank_helper.pxi.in
+++ b/pandas/_libs/algos_rank_helper.pxi.in
@@ -174,7 +174,8 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
                         ranks[argsorted[j]] = i + 1
                 elif tiebreak == TIEBREAK_FIRST:
                     {{if dtype == 'object'}}
-                    raise ValueError('first not supported for non-numeric data')
+                    raise ValueError('first not supported for '
+                                     'non-numeric data')
                     {{else}}
                     for j in range(i - dups + 1, i + 1):
                         ranks[argsorted[j]] = j + 1