From c25cad9f8ebadc3e8c4c77155d48d90bd0e64e3a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 7 Apr 2020 18:21:33 -0700
Subject: [PATCH 1/2] PERF: implement dtype-only dtype checks

---
 pandas/_libs/__init__.py     |  2 ++
 pandas/_libs/parsers.pyx     | 28 +++++++++++++++------------
 pandas/core/dtypes/common.py | 37 +++++++++++++++++++++++++++++++++++-
 3 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py
index 141ca0645b906..f119e280f5867 100644
--- a/pandas/_libs/__init__.py
+++ b/pandas/_libs/__init__.py
@@ -6,9 +6,11 @@
     "Timedelta",
     "Timestamp",
     "iNaT",
+    "Interval",
 ]
 
 
+from pandas._libs.interval import Interval
 from pandas._libs.tslibs import (
     NaT,
     NaTType,
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index e4aeb7ad69792..2a41039e25228 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -50,11 +50,15 @@ from pandas._libs.khash cimport (
     kh_get_str_starts_item, kh_destroy_str_starts, kh_resize_str_starts)
 
 from pandas.core.dtypes.common import (
-    is_categorical_dtype,
-    is_integer_dtype, is_float_dtype,
-    is_bool_dtype, is_object_dtype,
-    is_datetime64_dtype,
-    pandas_dtype, is_extension_array_dtype)
+    is_cat_dtype,
+    is_integer_dtype,
+    is_float_dtype,
+    is_bool_dtype,
+    is_object_dtype,
+    is_dt64_dtype,
+    pandas_dtype,
+    is_ea_dtype,
+)
 from pandas.core.arrays import Categorical
 from pandas.core.dtypes.concat import union_categoricals
 import pandas.io.common as icom
@@ -1064,7 +1068,7 @@ cdef class TextReader:
 
             # don't try to upcast EAs
             try_upcast = upcast_na and na_count > 0
-            if try_upcast and not is_extension_array_dtype(col_dtype):
+            if try_upcast and not is_ea_dtype(col_dtype):
                 col_res = _maybe_upcast(col_res)
 
             if col_res is None:
@@ -1140,7 +1144,7 @@ cdef class TextReader:
                              bint user_dtype,
                              kh_str_starts_t *na_hashset,
                              object na_flist):
-        if is_categorical_dtype(dtype):
+        if is_cat_dtype(dtype):
             # TODO: I suspect that _categorical_convert could be
             # optimized when dtype is an instance of CategoricalDtype
             codes, cats, na_count = _categorical_convert(
@@ -1153,7 +1157,7 @@ cdef class TextReader:
                 cats, codes, dtype, true_values=true_values)
             return cat, na_count
 
-        elif is_extension_array_dtype(dtype):
+        elif is_ea_dtype(dtype):
             result, na_count = self._string_convert(i, start, end, na_filter,
                                                     na_hashset)
             array_type = dtype.construct_array_type()
@@ -1223,7 +1227,7 @@ cdef class TextReader:
         elif is_object_dtype(dtype):
             return self._string_convert(i, start, end, na_filter,
                                         na_hashset)
-        elif is_datetime64_dtype(dtype):
+        elif is_dt64_dtype(dtype):
             raise TypeError(f"the dtype {dtype} is not supported "
                             f"for parsing, pass this column "
                             f"using parse_dates instead")
@@ -2035,19 +2039,19 @@ def _concatenate_chunks(list chunks):
         arrs = [chunk.pop(name) for chunk in chunks]
         # Check each arr for consistent types.
         dtypes = {a.dtype for a in arrs}
-        numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)}
+        numpy_dtypes = {x for x in dtypes if not is_cat_dtype(x)}
         if len(numpy_dtypes) > 1:
             common_type = np.find_common_type(numpy_dtypes, [])
             if common_type == np.object:
                 warning_columns.append(str(name))
 
         dtype = dtypes.pop()
-        if is_categorical_dtype(dtype):
+        if is_cat_dtype(dtype):
             sort_categories = isinstance(dtype, str)
             result[name] = union_categoricals(arrs,
                                               sort_categories=sort_categories)
         else:
-            if is_extension_array_dtype(dtype):
+            if is_ea_dtype(dtype):
                 array_type = dtype.construct_array_type()
                 result[name] = array_type._concat_same_type(arrs)
             else:
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 16373bd697c1f..9b20292fea431 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from pandas._libs import algos
+from pandas._libs import Interval, Period, algos
 from pandas._libs.tslibs import conversion
 from pandas._typing import ArrayLike, DtypeObj
 
@@ -1516,6 +1516,41 @@ def is_extension_array_dtype(arr_or_dtype) -> bool:
     return isinstance(dtype, ExtensionDtype) or registry.find(dtype) is not None
 
 
+def is_ea_dtype(dtype) -> bool:
+    return isinstance(dtype, ExtensionDtype)
+
+
+def is_dt64_dtype(dtype) -> bool:
+    return isinstance(dtype, np.dtype) and dtype.kind == "M"
+
+
+def is_dt64tz_dtype(dtype) -> bool:
+    return isinstance(dtype, ExtensionDtype) and dtype.kind == "M"
+
+
+def is_dt64_any_dtype(dtype) -> bool:
+    return isinstance(dtype, (np.dtype, ExtensionDtype)) and dtype.kind == "M"
+
+
+def is_td64_dtype(dtype) -> bool:
+    return isinstance(dtype, np.dtype) and dtype.kind == "m"
+
+
+def is_period_dtype_obj(dtype) -> bool:
+    return isinstance(dtype, ExtensionDtype) and dtype.type is Period
+
+
+def is_interval_dtype_obj(dtype) -> bool:
+    return isinstance(dtype, ExtensionDtype) and dtype.type is Interval
+
+
+def is_cat_dtype(dtype) -> bool:
+    """
+    Check if we have a CategoricalDtype object.
+    """
+    return isinstance(dtype, ExtensionDtype) and dtype.name == "category"
+
+
 def is_complex_dtype(arr_or_dtype) -> bool:
     """
     Check whether the provided array or dtype is of a complex dtype.

From a56482befbc09fc50e5a82927c588a80b9f0de53 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 16 Apr 2020 13:50:44 -0700
Subject: [PATCH 2/2] remove strict versions

---
 pandas/_libs/parsers.pyx     | 28 +++++++---------
 pandas/core/dtypes/common.py | 62 ++++++++++++++++--------------------
 2 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 2a41039e25228..e4aeb7ad69792 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -50,15 +50,11 @@ from pandas._libs.khash cimport (
     kh_get_str_starts_item, kh_destroy_str_starts, kh_resize_str_starts)
 
 from pandas.core.dtypes.common import (
-    is_cat_dtype,
-    is_integer_dtype,
-    is_float_dtype,
-    is_bool_dtype,
-    is_object_dtype,
-    is_dt64_dtype,
-    pandas_dtype,
-    is_ea_dtype,
-)
+    is_categorical_dtype,
+    is_integer_dtype, is_float_dtype,
+    is_bool_dtype, is_object_dtype,
+    is_datetime64_dtype,
+    pandas_dtype, is_extension_array_dtype)
 from pandas.core.arrays import Categorical
 from pandas.core.dtypes.concat import union_categoricals
 import pandas.io.common as icom
@@ -1068,7 +1064,7 @@ cdef class TextReader:
 
             # don't try to upcast EAs
             try_upcast = upcast_na and na_count > 0
-            if try_upcast and not is_ea_dtype(col_dtype):
+            if try_upcast and not is_extension_array_dtype(col_dtype):
                 col_res = _maybe_upcast(col_res)
 
             if col_res is None:
@@ -1144,7 +1140,7 @@ cdef class TextReader:
                              bint user_dtype,
                              kh_str_starts_t *na_hashset,
                              object na_flist):
-        if is_cat_dtype(dtype):
+        if is_categorical_dtype(dtype):
             # TODO: I suspect that _categorical_convert could be
             # optimized when dtype is an instance of CategoricalDtype
             codes, cats, na_count = _categorical_convert(
@@ -1157,7 +1153,7 @@ cdef class TextReader:
                 cats, codes, dtype, true_values=true_values)
             return cat, na_count
 
-        elif is_ea_dtype(dtype):
+        elif is_extension_array_dtype(dtype):
             result, na_count = self._string_convert(i, start, end, na_filter,
                                                     na_hashset)
             array_type = dtype.construct_array_type()
@@ -1227,7 +1223,7 @@ cdef class TextReader:
         elif is_object_dtype(dtype):
             return self._string_convert(i, start, end, na_filter,
                                         na_hashset)
-        elif is_dt64_dtype(dtype):
+        elif is_datetime64_dtype(dtype):
             raise TypeError(f"the dtype {dtype} is not supported "
                             f"for parsing, pass this column "
                             f"using parse_dates instead")
@@ -2039,19 +2035,19 @@ def _concatenate_chunks(list chunks):
         arrs = [chunk.pop(name) for chunk in chunks]
         # Check each arr for consistent types.
         dtypes = {a.dtype for a in arrs}
-        numpy_dtypes = {x for x in dtypes if not is_cat_dtype(x)}
+        numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)}
         if len(numpy_dtypes) > 1:
             common_type = np.find_common_type(numpy_dtypes, [])
             if common_type == np.object:
                 warning_columns.append(str(name))
 
         dtype = dtypes.pop()
-        if is_cat_dtype(dtype):
+        if is_categorical_dtype(dtype):
             sort_categories = isinstance(dtype, str)
             result[name] = union_categoricals(arrs,
                                               sort_categories=sort_categories)
         else:
-            if is_ea_dtype(dtype):
+            if is_extension_array_dtype(dtype):
                 array_type = dtype.construct_array_type()
                 result[name] = array_type._concat_same_type(arrs)
             else:
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index a31837838144b..76aefd9d5ba8f 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -396,6 +396,9 @@ def is_datetime64_dtype(arr_or_dtype) -> bool:
     >>> is_datetime64_dtype([1, 2, 3])
     False
     """
+    if isinstance(arr_or_dtype, np.dtype):
+        # GH#33400 fastpath for dtype object
+        return arr_or_dtype.kind == "M"
     return _is_dtype_type(arr_or_dtype, classes(np.datetime64))
 
 
@@ -431,6 +434,10 @@ def is_datetime64tz_dtype(arr_or_dtype) -> bool:
     >>> is_datetime64tz_dtype(s)
     True
     """
+    if isinstance(arr_or_dtype, ExtensionDtype):
+        # GH#33400 fastpath for dtype object
+        return arr_or_dtype.kind == "M"
+
     if arr_or_dtype is None:
         return False
     return DatetimeTZDtype.is_dtype(arr_or_dtype)
@@ -463,6 +470,10 @@ def is_timedelta64_dtype(arr_or_dtype) -> bool:
     >>> is_timedelta64_dtype('0 days')
     False
     """
+    if isinstance(arr_or_dtype, np.dtype):
+        # GH#33400 fastpath for dtype object
+        return arr_or_dtype.kind == "m"
+
     return _is_dtype_type(arr_or_dtype, classes(np.timedelta64))
 
 
@@ -493,6 +504,10 @@ def is_period_dtype(arr_or_dtype) -> bool:
     >>> is_period_dtype(pd.PeriodIndex([], freq="A"))
     True
     """
+    if isinstance(arr_or_dtype, ExtensionDtype):
+        # GH#33400 fastpath for dtype object
+        return arr_or_dtype.type is Period
+
     # TODO: Consider making Period an instance of PeriodDtype
     if arr_or_dtype is None:
         return False
@@ -528,6 +543,10 @@ def is_interval_dtype(arr_or_dtype) -> bool:
     >>> is_interval_dtype(pd.IntervalIndex([interval]))
     True
     """
+    if isinstance(arr_or_dtype, ExtensionDtype):
+        # GH#33400 fastpath for dtype object
+        return arr_or_dtype.type is Interval
+
     # TODO: Consider making Interval an instance of IntervalDtype
     if arr_or_dtype is None:
         return False
@@ -561,6 +580,10 @@ def is_categorical_dtype(arr_or_dtype) -> bool:
     >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
     True
     """
+    if isinstance(arr_or_dtype, ExtensionDtype):
+        # GH#33400 fastpath for dtype object
+        return arr_or_dtype.name == "category"
+
     if arr_or_dtype is None:
         return False
     return CategoricalDtype.is_dtype(arr_or_dtype)
@@ -938,6 +961,10 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool:
     >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))
     True
     """
+    if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)):
+        # GH#33400 fastpath for dtype object
+        return arr_or_dtype.kind == "M"
+
     if arr_or_dtype is None:
         return False
     return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype)
@@ -1505,41 +1532,6 @@ def is_extension_array_dtype(arr_or_dtype) -> bool:
     return isinstance(dtype, ExtensionDtype) or registry.find(dtype) is not None
 
 
-def is_ea_dtype(dtype) -> bool:
-    return isinstance(dtype, ExtensionDtype)
-
-
-def is_dt64_dtype(dtype) -> bool:
-    return isinstance(dtype, np.dtype) and dtype.kind == "M"
-
-
-def is_dt64tz_dtype(dtype) -> bool:
-    return isinstance(dtype, ExtensionDtype) and dtype.kind == "M"
-
-
-def is_dt64_any_dtype(dtype) -> bool:
-    return isinstance(dtype, (np.dtype, ExtensionDtype)) and dtype.kind == "M"
-
-
-def is_td64_dtype(dtype) -> bool:
-    return isinstance(dtype, np.dtype) and dtype.kind == "m"
-
-
-def is_period_dtype_obj(dtype) -> bool:
-    return isinstance(dtype, ExtensionDtype) and dtype.type is Period
-
-
-def is_interval_dtype_obj(dtype) -> bool:
-    return isinstance(dtype, ExtensionDtype) and dtype.type is Interval
-
-
-def is_cat_dtype(dtype) -> bool:
-    """
-    Check if we have a CategoricalDtype object.
-    """
-    return isinstance(dtype, ExtensionDtype) and dtype.name == "category"
-
-
 def is_complex_dtype(arr_or_dtype) -> bool:
     """
     Check whether the provided array or dtype is of a complex dtype.