pandas-dev · jreback · Apr 18, 2017 · Apr 18, 2017
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -14,6 +14,7 @@
 from pandas.core.dtypes.common import (
     is_unsigned_integer_dtype, is_signed_integer_dtype,
     is_integer_dtype, is_complex_dtype,
+    is_object_dtype,
     is_categorical_dtype, is_sparse,
     is_period_dtype,
     is_numeric_dtype, is_float_dtype,
@@ -63,6 +64,35 @@ def _ensure_data(values, dtype=None):
 
     """
 
+    # we check some simple dtypes first
+    try:
+        if is_bool_dtype(values) or is_bool_dtype(dtype):
+            # we are actually coercing to uint64
+            # until our algos suppport uint8 directly (see TODO)
+            return np.asarray(values).astype('uint64'), 'bool', 'uint64'
+        elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype):
+            return _ensure_int64(values), 'int64', 'int64'
+        elif (is_unsigned_integer_dtype(values) or
+              is_unsigned_integer_dtype(dtype)):
+            return _ensure_uint64(values), 'uint64', 'uint64'
+        elif is_float_dtype(values) or is_float_dtype(dtype):
+            return _ensure_float64(values), 'float64', 'float64'
+        elif is_object_dtype(values) and dtype is None:
+            return _ensure_object(np.asarray(values)), 'object', 'object'
+        elif is_complex_dtype(values) or is_complex_dtype(dtype):
+
+            # ignore the fact that we are casting to float
+            # which discards complex parts
+            with catch_warnings(record=True):
+                values = _ensure_float64(values)
+            return values, 'float64', 'float64'
+
+    except (TypeError, ValueError):
+        # if we are trying to coerce to a dtype
+        # and it is incompat this will fall thru to here
+        return _ensure_object(values), 'object', 'object'
+
+    # datetimelike
     if (needs_i8_conversion(values) or
             is_period_dtype(dtype) or
             is_datetime64_any_dtype(dtype) or
@@ -94,43 +124,9 @@ def _ensure_data(values, dtype=None):
 
         return values, dtype, 'int64'
 
+    # we have failed, return object
     values = np.asarray(values)
-
-    try:
-        if is_bool_dtype(values) or is_bool_dtype(dtype):
-            # we are actually coercing to uint64
-            # until our algos suppport uint8 directly (see TODO)
-            values = values.astype('uint64')
-            dtype = 'bool'
-            ndtype = 'uint64'
-        elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype):
-            values = _ensure_int64(values)
-            ndtype = dtype = 'int64'
-        elif (is_unsigned_integer_dtype(values) or
-              is_unsigned_integer_dtype(dtype)):
-            values = _ensure_uint64(values)
-            ndtype = dtype = 'uint64'
-        elif is_complex_dtype(values) or is_complex_dtype(dtype):
-
-            # ignore the fact that we are casting to float
-            # which discards complex parts
-            with catch_warnings(record=True):
-                values = _ensure_float64(values)
-            ndtype = dtype = 'float64'
-        elif is_float_dtype(values) or is_float_dtype(dtype):
-            values = _ensure_float64(values)
-            ndtype = dtype = 'float64'
-        else:
-            values = _ensure_object(values)
-            ndtype = dtype = 'object'
-
-    except (TypeError, ValueError):
-        # if we are trying to coerce to a dtype
-        # and it is incompat this will fall thru to here
-        values = _ensure_object(values)
-        ndtype = dtype = 'object'
-
-    return values, dtype, ndtype
+    return _ensure_object(values), 'object', 'object'
 
 
 def _reconstruct_data(values, dtype, original):
@@ -465,7 +461,7 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False):
     if not is_list_like(values):
         raise TypeError("Only list-like objects are allowed to be passed to"
                         "safe_sort as values")
-    values = np.array(values, copy=False)
+    values = np.asarray(values)
 
     def sort_mixed(values):
         # order ints before strings, safe in py3
@@ -547,6 +543,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
     PeriodIndex
     """
 
+    values = _ensure_arraylike(values)
     original = values
     values, dtype, _ = _ensure_data(values)
     (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -82,6 +82,8 @@ def _ensure_categorical(arr):
 
 
 def is_object_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.object_)
 
@@ -120,6 +122,8 @@ def is_period(array):
 
 
 def is_datetime64_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     try:
         tipo = _get_dtype_type(arr_or_dtype)
     except TypeError:
@@ -128,23 +132,33 @@ def is_datetime64_dtype(arr_or_dtype):
 
 
 def is_datetime64tz_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     return DatetimeTZDtype.is_dtype(arr_or_dtype)
 
 
 def is_timedelta64_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.timedelta64)
 
 
 def is_period_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     return PeriodDtype.is_dtype(arr_or_dtype)
 
 
 def is_interval_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     return IntervalDtype.is_dtype(arr_or_dtype)
 
 
 def is_categorical_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     return CategoricalDtype.is_dtype(arr_or_dtype)
 
 
@@ -178,6 +192,8 @@ def is_string_dtype(arr_or_dtype):
 
     # TODO: gh-15585: consider making the checks stricter.
 
+    if arr_or_dtype is None:
+        return False
     try:
         dtype = _get_dtype(arr_or_dtype)
         return dtype.kind in ('O', 'S', 'U') and not is_period_dtype(dtype)
@@ -224,45 +240,61 @@ def is_dtype_equal(source, target):
 
 
 def is_any_int_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.integer)
 
 
 def is_integer_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return (issubclass(tipo, np.integer) and
             not issubclass(tipo, (np.datetime64, np.timedelta64)))
 
 
 def is_signed_integer_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return (issubclass(tipo, np.signedinteger) and
             not issubclass(tipo, (np.datetime64, np.timedelta64)))
 
 
 def is_unsigned_integer_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return (issubclass(tipo, np.unsignedinteger) and
             not issubclass(tipo, (np.datetime64, np.timedelta64)))
 
 
 def is_int64_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.int64)
 
 
 def is_int_or_datetime_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return (issubclass(tipo, np.integer) or
             issubclass(tipo, (np.datetime64, np.timedelta64)))
 
 
 def is_datetime64_any_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     return (is_datetime64_dtype(arr_or_dtype) or
             is_datetime64tz_dtype(arr_or_dtype))
 
 
 def is_datetime64_ns_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     try:
         tipo = _get_dtype(arr_or_dtype)
     except TypeError:
@@ -303,6 +335,8 @@ def is_timedelta64_ns_dtype(arr_or_dtype):
     False
     """
 
+    if arr_or_dtype is None:
+        return False
     try:
         tipo = _get_dtype(arr_or_dtype)
         return tipo == _TD_DTYPE
@@ -311,6 +345,8 @@ def is_timedelta64_ns_dtype(arr_or_dtype):
 
 
 def is_datetime_or_timedelta_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, (np.datetime64, np.timedelta64))
 
@@ -398,12 +434,16 @@ def is_object(x):
 
 
 def needs_i8_conversion(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     return (is_datetime_or_timedelta_dtype(arr_or_dtype) or
             is_datetime64tz_dtype(arr_or_dtype) or
             is_period_dtype(arr_or_dtype))
 
 
 def is_numeric_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return (issubclass(tipo, (np.number, np.bool_)) and
             not issubclass(tipo, (np.datetime64, np.timedelta64)))
@@ -438,6 +478,8 @@ def is_string_like_dtype(arr_or_dtype):
     False
     """
 
+    if arr_or_dtype is None:
+        return False
     try:
         dtype = _get_dtype(arr_or_dtype)
         return dtype.kind in ('S', 'U')
@@ -446,16 +488,22 @@ def is_string_like_dtype(arr_or_dtype):
 
 
 def is_float_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.floating)
 
 
 def is_floating_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return isinstance(tipo, np.floating)
 
 
 def is_bool_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     try:
         tipo = _get_dtype_type(arr_or_dtype)
     except ValueError:
@@ -479,6 +527,8 @@ def is_extension_type(value):
 
 
 def is_complex_dtype(arr_or_dtype):
+    if arr_or_dtype is None:
+        return False
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.complexfloating)