pandas-dev
diff --git a/‎asv_bench/benchmarks/dtypes.py
Lines changed: 39 additions & 0 deletions b/‎asv_bench/benchmarks/dtypes.py
Lines changed: 39 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/pandas_vb_common.py
Lines changed: 10 additions & 0 deletions b/‎asv_bench/benchmarks/pandas_vb_common.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/timeseries.py
Lines changed: 5 additions & 1 deletion b/‎asv_bench/benchmarks/timeseries.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎doc/source/basics.rst
Lines changed: 0 additions & 21 deletions b/‎doc/source/basics.rst
Lines changed: 0 additions & 21 deletions
diff --git a/‎doc/source/contributing_docstring.rst
Lines changed: 10 additions & 8 deletions b/‎doc/source/contributing_docstring.rst
Lines changed: 10 additions & 8 deletions
diff --git a/‎doc/source/install.rst
Lines changed: 2 additions & 2 deletions b/‎doc/source/install.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/source/text.rst
Lines changed: 7 additions & 6 deletions b/‎doc/source/text.rst
Lines changed: 7 additions & 6 deletions
diff --git a/‎doc/source/timeseries.rst
Lines changed: 8 additions & 4 deletions b/‎doc/source/timeseries.rst
Lines changed: 8 additions & 4 deletions
diff --git a/‎doc/source/whatsnew/v0.24.0.rst
Lines changed: 5 additions & 4 deletions b/‎doc/source/whatsnew/v0.24.0.rst
Lines changed: 5 additions & 4 deletions
diff --git a/‎pandas/_libs/algos_common_helper.pxi.in
Lines changed: 0 additions & 2 deletions b/‎pandas/_libs/algos_common_helper.pxi.in
Lines changed: 0 additions & 2 deletions
diff --git a/‎pandas/_libs/interval.pyx
Lines changed: 2 additions & 2 deletions b/‎pandas/_libs/interval.pyx
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/_libs/lib.pyx
Lines changed: 9 additions & 3 deletions b/‎pandas/_libs/lib.pyx
Lines changed: 9 additions & 3 deletions
@@ -0,0 +1,39 @@
+from pandas.api.types import pandas_dtype
+
+import numpy as np
+from .pandas_vb_common import (
+    numeric_dtypes, datetime_dtypes, string_dtypes, extension_dtypes)
+
+
+_numpy_dtypes = [np.dtype(dtype)
+                 for dtype in (numeric_dtypes +
+                               datetime_dtypes +
+                               string_dtypes)]
+_dtypes = _numpy_dtypes + extension_dtypes
+
+
+class Dtypes(object):
+    params = (_dtypes +
+              list(map(lambda dt: dt.name, _dtypes)))
+    param_names = ['dtype']
+
+    def time_pandas_dtype(self, dtype):
+        pandas_dtype(dtype)
+
+
+class DtypesInvalid(object):
+    param_names = ['dtype']
+    params = ['scalar-string', 'scalar-int', 'list-string', 'array-string']
+    data_dict = {'scalar-string': 'foo',
+                 'scalar-int': 1,
+                 'list-string': ['foo'] * 1000,
+                 'array-string': np.array(['foo'] * 1000)}
+
+    def time_pandas_dtype_invalid(self, dtype):
+        try:
+            pandas_dtype(self.data_dict[dtype])
+        except TypeError:
+            pass
+
+
+from .pandas_vb_common import setup  # noqa: F401
@@ -2,6 +2,7 @@
 from importlib import import_module
 
 import numpy as np
+import pandas as pd
 
 # Compatibility import for lib
 for imp in ['pandas._libs.lib', 'pandas.lib']:
@@ -14,6 +15,15 @@
 numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
                   np.float64, np.int16, np.int8, np.uint16, np.uint8]
 datetime_dtypes = [np.datetime64, np.timedelta64]
+string_dtypes = [np.object]
+extension_dtypes = [pd.Int8Dtype, pd.Int16Dtype,
+                    pd.Int32Dtype, pd.Int64Dtype,
+                    pd.UInt8Dtype, pd.UInt16Dtype,
+                    pd.UInt32Dtype, pd.UInt64Dtype,
+                    pd.CategoricalDtype,
+                    pd.IntervalDtype,
+                    pd.DatetimeTZDtype('ns', 'UTC'),
+                    pd.PeriodDtype('D')]
 
 
 def setup(*args, **kwargs):
 
@@ -12,7 +12,7 @@
 
 class DatetimeIndex(object):
 
-    params = ['dst', 'repeated', 'tz_aware', 'tz_naive']
+    params = ['dst', 'repeated', 'tz_aware', 'tz_local', 'tz_naive']
     param_names = ['index_type']
 
     def setup(self, index_type):
@@ -26,6 +26,10 @@ def setup(self, index_type):
                                           periods=N,
                                           freq='s',
                                           tz='US/Eastern'),
+                   'tz_local': date_range(start='2000',
+                                          periods=N,
+                                          freq='s',
+                                          tz=dateutil.tz.tzlocal()),
                    'tz_naive': date_range(start='2000',
                                           periods=N,
                                           freq='s')}
 
@@ -99,27 +99,6 @@ are two possibly useful representations:
 
 Timezones may be preserved with ``dtype=object``
 
-.. ipython:: python
-
-   ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
-   ser.to_numpy(dtype=object)
-
-Or thrown away with ``dtype='datetime64[ns]'``
-
-   ser.to_numpy(dtype="datetime64[ns]")
-
-:meth:`~Series.to_numpy` gives some control over the ``dtype`` of the
-resulting :class:`ndarray`. For example, consider datetimes with timezones.
-NumPy doesn't have a dtype to represent timezone-aware datetimes, so there
-are two possibly useful representations:
-
-1. An object-dtype :class:`ndarray` with :class:`Timestamp` objects, each
-   with the correct ``tz``
-2. A ``datetime64[ns]`` -dtype :class:`ndarray`, where the values have
-   been converted to UTC and the timezone discarded
-
-Timezones may be preserved with ``dtype=object``
-
 .. ipython:: python
 
    ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
 
@@ -457,12 +457,14 @@ For example, with a single value:
         float
             Random number generated.
         """
-        return random.random()
+        return np.random.random()
 
 With more than one value:
 
 .. code-block:: python
 
+    import string
+
     def random_letters():
         """
         Generate and return a sequence of random letters.
@@ -477,8 +479,8 @@ With more than one value:
         letters : str
             String of random letters.
         """
-        length = random.randint(1, 10)
-        letters = ''.join(random.choice(string.ascii_lowercase)
+        length = np.random.randint(1, 10)
+        letters = ''.join(np.random.choice(string.ascii_lowercase)
                           for i in range(length))
         return length, letters
 
@@ -499,7 +501,7 @@ If the method yields its value:
             Random number generated.
         """
         while True:
-            yield random.random()
+            yield np.random.random()
 
 .. _docstring.see_also:
 
@@ -686,8 +688,8 @@ shown:
 
 .. code-block:: python
 
-    import numpy as np          # noqa: F401
-    import pandas as pd         # noqa: F401
+    import numpy as np
+    import pandas as pd
 
 Any other module used in the examples must be explicitly imported, one per line (as
 recommended in :pep:`8#imports`)
@@ -776,7 +778,7 @@ positional arguments ``head(3)``.
 
             Examples
             --------
-            >>> s = pd.Series('Antelope', 'Lion', 'Zebra', numpy.nan)
+            >>> s = pd.Series('Antelope', 'Lion', 'Zebra', np.nan)
             >>> s.contains(pattern='a')
             0    False
             1    False
@@ -834,7 +836,7 @@ positional arguments ``head(3)``.
         --------
         >>> import numpy as np
         >>> import pandas as pd
-        >>> df = pd.DataFrame(numpy.random.randn(3, 3),
+        >>> df = pd.DataFrame(np.random.randn(3, 3),
         ...                   columns=('a', 'b', 'c'))
         >>> df.method(1)
         21
 
@@ -24,11 +24,11 @@ The Python core team plans to stop supporting Python 2.7 on January 1st, 2020.
 In line with `NumPy's plans`_, all pandas releases through December 31, 2018
 will support Python 2.
 
-The final release before **December 31, 2018** will be the last release to
+The 0.24.x feature release will be the last release to
 support Python 2. The released package will continue to be available on
 PyPI and through conda.
 
-Starting **January 1, 2019**, all releases will be Python 3 only.
+ Starting **January 1, 2019**, all new feature releases (> 0.24) will be Python 3 only.
 
 If there are people interested in continued support for Python 2.7 past December
 31, 2018 (either backporting bug fixes or funding) please reach out to the
 
@@ -303,23 +303,24 @@ The same alignment can be used when ``others`` is a ``DataFrame``:
 Concatenating a Series and many objects into a Series
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-All one-dimensional list-likes can be combined in a list-like container (including iterators, ``dict``-views, etc.):
+Several array-like items (specifically: ``Series``, ``Index``, and 1-dimensional variants of ``np.ndarray``)
+can be combined in a list-like container (including iterators, ``dict``-views, etc.).
 
 .. ipython:: python
 
     s
     u
-    s.str.cat([u.array,
-               u.index.astype(str).array], na_rep='-')
+    s.str.cat([u, u.to_numpy()], join='left')
 
-All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None:
+All elements without an index (e.g. ``np.ndarray``) within the passed list-like must match in length to the calling ``Series`` (or ``Index``),
+but ``Series`` and ``Index`` may have arbitrary length (as long as alignment is not disabled with ``join=None``):
 
 .. ipython:: python
 
     v
-    s.str.cat([u, v], join='outer', na_rep='-')
+    s.str.cat([v, u, u.to_numpy()], join='outer', na_rep='-')
 
-If using ``join='right'`` on a list of ``others`` that contains different indexes,
+If using ``join='right'`` on a list-like of ``others`` that contains different indexes,
 the union of these indexes will be used as the basis for the final concatenation:
 
 .. ipython:: python
 
@@ -2425,21 +2425,25 @@ a convert on an aware stamp.
 .. note::
 
    Using :meth:`Series.to_numpy` on a ``Series``, returns a NumPy array of the data.
-   These values are converted to UTC, as NumPy does not currently support timezones (even though it is *printing* in the local timezone!).
+   NumPy does not currently support timezones (even though it is *printing* in the local timezone!),
+   therefore an object array of Timestamps is returned for timezone aware data:
 
    .. ipython:: python
 
       s_naive.to_numpy()
       s_aware.to_numpy()
 
-   Further note that once converted to a NumPy array these would lose the tz tenor.
+   By converting to an object array of Timestamps, it preserves the timezone
+   information. For example, when converting back to a Series:
 
    .. ipython:: python
 
       pd.Series(s_aware.to_numpy())
 
-   However, these can be easily converted:
+   However, if you want an actual NumPy ``datetime64[ns]`` array (with the values
+   converted to UTC) instead of an array of objects, you can specify the
+   ``dtype`` argument:
 
    .. ipython:: python
 
-      pd.Series(s_aware.to_numpy()).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
+      s_aware.to_numpy(dtype='datetime64[ns]')
@@ -6,9 +6,7 @@ What's New in 0.24.0 (January XX, 2019)
 .. warning::
 
    The 0.24.x series of releases will be the last to support Python 2. Future feature
-   releases will support Python 3 only.
-
-   See :ref:`install.dropping-27` for more.
+   releases will support Python 3 only. See :ref:`install.dropping-27` for more.
 
 {{ header }}
 
@@ -432,7 +430,7 @@ Backwards incompatible API changes
 - The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`)
 - Incorrectly passing a :class:`DatetimeIndex` to :meth:`MultiIndex.from_tuples`, rather than a sequence of tuples, now raises a ``TypeError`` rather than a ``ValueError`` (:issue:`24024`)
 - :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`)
-- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes (:issue:`21681`)
+- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes; pandas will still raise on a merge between a numeric and an ``object`` dtyped column that is composed only of strings (:issue:`21681`)
 
 Percentage change on groupby
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1222,6 +1220,7 @@ Deprecations
 - :func:`pandas.api.types.is_datetimetz` is deprecated in favor of `pandas.api.types.is_datetime64tz` (:issue:`23917`)
 - Creating a :class:`TimedeltaIndex`, :class:`DatetimeIndex`, or :class:`PeriodIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range`, :func:`date_range`, or :func:`period_range` (:issue:`23919`)
 - Passing a string alias like ``'datetime64[ns, UTC]'`` as the ``unit`` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`).
+- The ``skipna`` parameter of :meth:`~pandas.api.types.infer_dtype` will switch to ``True`` by default in a future version of pandas (:issue:`17066`, :issue:`24050`)
 - In :meth:`Series.where` with Categorical data, providing an ``other`` that is not present in the categories is deprecated. Convert the categorical to a different dtype or add the ``other`` to the categories first (:issue:`24077`).
 - :meth:`Series.clip_lower`, :meth:`Series.clip_upper`, :meth:`DataFrame.clip_lower` and :meth:`DataFrame.clip_upper` are deprecated and will be removed in a future version. Use ``Series.clip(lower=threshold)``, ``Series.clip(upper=threshold)`` and the equivalent ``DataFrame`` methods (:issue:`24203`)
 
@@ -1381,6 +1380,7 @@ Performance Improvements
 - Improved performance of iterating over a :class:`Series`. Using :meth:`DataFrame.itertuples` now creates iterators
   without internally allocating lists of all elements (:issue:`20783`)
 - Improved performance of :class:`Period` constructor, additionally benefitting ``PeriodArray`` and ``PeriodIndex`` creation (:issue:`24084` and :issue:`24118`)
+- Improved performance of tz-aware :class:`DatetimeArray` binary operations (:issue:`24491`)
 
 .. _whatsnew_0240.docs:
 
@@ -1592,6 +1592,7 @@ Missing
 - Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
 - :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
 - :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
+- :func:`DataFrame` and :func:`Series` now properly handle numpy masked arrays with hardened masks. Previously, constructing a DataFrame or Series from a masked array with a hard mask would create a pandas object containing the underlying value, rather than the expected NaN. (:issue:`24574`)
 
 
 MultiIndex
 
@@ -109,8 +109,6 @@ def ensure_object(object arr):
             return arr
         else:
             return arr.astype(np.object_)
-    elif hasattr(arr, '_box_values_as_index'):
-        return arr._box_values_as_index()
     else:
         return np.array(arr, dtype=np.object_)
 
 
@@ -389,8 +389,8 @@ cdef class Interval(IntervalMixin):
 
         See Also
         --------
-        IntervalArray.overlaps : The corresponding method for IntervalArray
-        IntervalIndex.overlaps : The corresponding method for IntervalIndex
+        IntervalArray.overlaps : The corresponding method for IntervalArray.
+        IntervalIndex.overlaps : The corresponding method for IntervalIndex.
 
         Examples
         --------
 
@@ -4,6 +4,7 @@ from fractions import Fraction
 from numbers import Number
 
 import sys
+import warnings
 
 import cython
 from cython import Py_ssize_t
@@ -1079,7 +1080,7 @@ cdef _try_infer_map(v):
     return None
 
 
-def infer_dtype(value: object, skipna: bool=False) -> str:
+def infer_dtype(value: object, skipna: object=None) -> str:
     """
     Efficiently infer the type of a passed val, or list-like
     array of values. Return a string describing the type.
@@ -1088,8 +1089,7 @@ def infer_dtype(value: object, skipna: bool=False) -> str:
     ----------
     value : scalar, list, ndarray, or pandas type
     skipna : bool, default False
-        Ignore NaN values when inferring the type. The default of ``False``
-        will be deprecated in a later version of pandas.
+        Ignore NaN values when inferring the type.
 
         .. versionadded:: 0.21.0
 
@@ -1186,6 +1186,12 @@ def infer_dtype(value: object, skipna: bool=False) -> str:
         bint seen_pdnat = False
         bint seen_val = False
 
+    if skipna is None:
+        msg = ('A future version of pandas will default to `skipna=True`. To '
+               'silence this warning, pass `skipna=True|False` explicitly.')
+        warnings.warn(msg, FutureWarning, stacklevel=2)
+        skipna = False
+
     if util.is_array(value):
         values = value
     elif hasattr(value, 'dtype'):