Skip to content

Commit 3c1ffd0

Browse files
committed
Merge remote-tracking branch 'upstream/master' into dt-array-5
2 parents ea44792 + 19f715c commit 3c1ffd0

File tree

105 files changed

+2452
-2142
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+2452
-2142
lines changed

asv_bench/benchmarks/dtypes.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from pandas.api.types import pandas_dtype
2+
3+
import numpy as np
4+
from .pandas_vb_common import (
5+
numeric_dtypes, datetime_dtypes, string_dtypes, extension_dtypes)
6+
7+
8+
_numpy_dtypes = [np.dtype(dtype)
9+
for dtype in (numeric_dtypes +
10+
datetime_dtypes +
11+
string_dtypes)]
12+
_dtypes = _numpy_dtypes + extension_dtypes
13+
14+
15+
class Dtypes(object):
16+
params = (_dtypes +
17+
list(map(lambda dt: dt.name, _dtypes)))
18+
param_names = ['dtype']
19+
20+
def time_pandas_dtype(self, dtype):
21+
pandas_dtype(dtype)
22+
23+
24+
class DtypesInvalid(object):
25+
param_names = ['dtype']
26+
params = ['scalar-string', 'scalar-int', 'list-string', 'array-string']
27+
data_dict = {'scalar-string': 'foo',
28+
'scalar-int': 1,
29+
'list-string': ['foo'] * 1000,
30+
'array-string': np.array(['foo'] * 1000)}
31+
32+
def time_pandas_dtype_invalid(self, dtype):
33+
try:
34+
pandas_dtype(self.data_dict[dtype])
35+
except TypeError:
36+
pass
37+
38+
39+
from .pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/pandas_vb_common.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from importlib import import_module
33

44
import numpy as np
5+
import pandas as pd
56

67
# Compatibility import for lib
78
for imp in ['pandas._libs.lib', 'pandas.lib']:
@@ -14,6 +15,15 @@
1415
numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
1516
np.float64, np.int16, np.int8, np.uint16, np.uint8]
1617
datetime_dtypes = [np.datetime64, np.timedelta64]
18+
string_dtypes = [np.object]
19+
extension_dtypes = [pd.Int8Dtype, pd.Int16Dtype,
20+
pd.Int32Dtype, pd.Int64Dtype,
21+
pd.UInt8Dtype, pd.UInt16Dtype,
22+
pd.UInt32Dtype, pd.UInt64Dtype,
23+
pd.CategoricalDtype,
24+
pd.IntervalDtype,
25+
pd.DatetimeTZDtype('ns', 'UTC'),
26+
pd.PeriodDtype('D')]
1727

1828

1929
def setup(*args, **kwargs):

asv_bench/benchmarks/timeseries.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
class DatetimeIndex(object):
1414

15-
params = ['dst', 'repeated', 'tz_aware', 'tz_naive']
15+
params = ['dst', 'repeated', 'tz_aware', 'tz_local', 'tz_naive']
1616
param_names = ['index_type']
1717

1818
def setup(self, index_type):
@@ -26,6 +26,10 @@ def setup(self, index_type):
2626
periods=N,
2727
freq='s',
2828
tz='US/Eastern'),
29+
'tz_local': date_range(start='2000',
30+
periods=N,
31+
freq='s',
32+
tz=dateutil.tz.tzlocal()),
2933
'tz_naive': date_range(start='2000',
3034
periods=N,
3135
freq='s')}

doc/source/basics.rst

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -99,27 +99,6 @@ are two possibly useful representations:
9999

100100
Timezones may be preserved with ``dtype=object``
101101

102-
.. ipython:: python
103-
104-
ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
105-
ser.to_numpy(dtype=object)
106-
107-
Or thrown away with ``dtype='datetime64[ns]'``
108-
109-
ser.to_numpy(dtype="datetime64[ns]")
110-
111-
:meth:`~Series.to_numpy` gives some control over the ``dtype`` of the
112-
resulting :class:`ndarray`. For example, consider datetimes with timezones.
113-
NumPy doesn't have a dtype to represent timezone-aware datetimes, so there
114-
are two possibly useful representations:
115-
116-
1. An object-dtype :class:`ndarray` with :class:`Timestamp` objects, each
117-
with the correct ``tz``
118-
2. A ``datetime64[ns]`` -dtype :class:`ndarray`, where the values have
119-
been converted to UTC and the timezone discarded
120-
121-
Timezones may be preserved with ``dtype=object``
122-
123102
.. ipython:: python
124103
125104
ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))

doc/source/contributing_docstring.rst

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -457,12 +457,14 @@ For example, with a single value:
457457
float
458458
Random number generated.
459459
"""
460-
return random.random()
460+
return np.random.random()
461461
462462
With more than one value:
463463

464464
.. code-block:: python
465465
466+
import string
467+
466468
def random_letters():
467469
"""
468470
Generate and return a sequence of random letters.
@@ -477,8 +479,8 @@ With more than one value:
477479
letters : str
478480
String of random letters.
479481
"""
480-
length = random.randint(1, 10)
481-
letters = ''.join(random.choice(string.ascii_lowercase)
482+
length = np.random.randint(1, 10)
483+
letters = ''.join(np.random.choice(string.ascii_lowercase)
482484
for i in range(length))
483485
return length, letters
484486
@@ -499,7 +501,7 @@ If the method yields its value:
499501
Random number generated.
500502
"""
501503
while True:
502-
yield random.random()
504+
yield np.random.random()
503505
504506
.. _docstring.see_also:
505507

@@ -686,8 +688,8 @@ shown:
686688

687689
.. code-block:: python
688690
689-
import numpy as np # noqa: F401
690-
import pandas as pd # noqa: F401
691+
import numpy as np
692+
import pandas as pd
691693
692694
Any other module used in the examples must be explicitly imported, one per line (as
693695
recommended in :pep:`8#imports`)
@@ -776,7 +778,7 @@ positional arguments ``head(3)``.
776778
777779
Examples
778780
--------
779-
>>> s = pd.Series('Antelope', 'Lion', 'Zebra', numpy.nan)
781+
>>> s = pd.Series('Antelope', 'Lion', 'Zebra', np.nan)
780782
>>> s.contains(pattern='a')
781783
0 False
782784
1 False
@@ -834,7 +836,7 @@ positional arguments ``head(3)``.
834836
--------
835837
>>> import numpy as np
836838
>>> import pandas as pd
837-
>>> df = pd.DataFrame(numpy.random.randn(3, 3),
839+
>>> df = pd.DataFrame(np.random.randn(3, 3),
838840
... columns=('a', 'b', 'c'))
839841
>>> df.method(1)
840842
21

doc/source/install.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ The Python core team plans to stop supporting Python 2.7 on January 1st, 2020.
2424
In line with `NumPy's plans`_, all pandas releases through December 31, 2018
2525
will support Python 2.
2626

27-
The final release before **December 31, 2018** will be the last release to
27+
The 0.24.x feature release will be the last release to
2828
support Python 2. The released package will continue to be available on
2929
PyPI and through conda.
3030

31-
Starting **January 1, 2019**, all releases will be Python 3 only.
31+
Starting **January 1, 2019**, all new feature releases (> 0.24) will be Python 3 only.
3232

3333
If there are people interested in continued support for Python 2.7 past December
3434
31, 2018 (either backporting bug fixes or funding) please reach out to the

doc/source/text.rst

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -303,23 +303,24 @@ The same alignment can be used when ``others`` is a ``DataFrame``:
303303
Concatenating a Series and many objects into a Series
304304
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
305305

306-
All one-dimensional list-likes can be combined in a list-like container (including iterators, ``dict``-views, etc.):
306+
Several array-like items (specifically: ``Series``, ``Index``, and 1-dimensional variants of ``np.ndarray``)
307+
can be combined in a list-like container (including iterators, ``dict``-views, etc.).
307308

308309
.. ipython:: python
309310
310311
s
311312
u
312-
s.str.cat([u.array,
313-
u.index.astype(str).array], na_rep='-')
313+
s.str.cat([u, u.to_numpy()], join='left')
314314
315-
All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None:
315+
All elements without an index (e.g. ``np.ndarray``) within the passed list-like must match in length to the calling ``Series`` (or ``Index``),
316+
but ``Series`` and ``Index`` may have arbitrary length (as long as alignment is not disabled with ``join=None``):
316317

317318
.. ipython:: python
318319
319320
v
320-
s.str.cat([u, v], join='outer', na_rep='-')
321+
s.str.cat([v, u, u.to_numpy()], join='outer', na_rep='-')
321322
322-
If using ``join='right'`` on a list of ``others`` that contains different indexes,
323+
If using ``join='right'`` on a list-like of ``others`` that contains different indexes,
323324
the union of these indexes will be used as the basis for the final concatenation:
324325

325326
.. ipython:: python

doc/source/timeseries.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2425,21 +2425,25 @@ a convert on an aware stamp.
24252425
.. note::
24262426

24272427
Using :meth:`Series.to_numpy` on a ``Series``, returns a NumPy array of the data.
2428-
These values are converted to UTC, as NumPy does not currently support timezones (even though it is *printing* in the local timezone!).
2428+
NumPy does not currently support timezones (even though it is *printing* in the local timezone!),
2429+
therefore an object array of Timestamps is returned for timezone aware data:
24292430

24302431
.. ipython:: python
24312432
24322433
s_naive.to_numpy()
24332434
s_aware.to_numpy()
24342435
2435-
Further note that once converted to a NumPy array these would lose the tz tenor.
2436+
By converting to an object array of Timestamps, it preserves the timezone
2437+
information. For example, when converting back to a Series:
24362438

24372439
.. ipython:: python
24382440
24392441
pd.Series(s_aware.to_numpy())
24402442
2441-
However, these can be easily converted:
2443+
However, if you want an actual NumPy ``datetime64[ns]`` array (with the values
2444+
converted to UTC) instead of an array of objects, you can specify the
2445+
``dtype`` argument:
24422446

24432447
.. ipython:: python
24442448
2445-
pd.Series(s_aware.to_numpy()).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
2449+
s_aware.to_numpy(dtype='datetime64[ns]')

doc/source/whatsnew/v0.24.0.rst

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@ What's New in 0.24.0 (January XX, 2019)
66
.. warning::
77

88
The 0.24.x series of releases will be the last to support Python 2. Future feature
9-
releases will support Python 3 only.
10-
11-
See :ref:`install.dropping-27` for more.
9+
releases will support Python 3 only. See :ref:`install.dropping-27` for more.
1210

1311
{{ header }}
1412

@@ -432,7 +430,7 @@ Backwards incompatible API changes
432430
- The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`)
433431
- Incorrectly passing a :class:`DatetimeIndex` to :meth:`MultiIndex.from_tuples`, rather than a sequence of tuples, now raises a ``TypeError`` rather than a ``ValueError`` (:issue:`24024`)
434432
- :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`)
435-
- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes (:issue:`21681`)
433+
- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes; pandas will still raise on a merge between a numeric and an ``object`` dtyped column that is composed only of strings (:issue:`21681`)
436434

437435
Percentage change on groupby
438436
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1222,6 +1220,7 @@ Deprecations
12221220
- :func:`pandas.api.types.is_datetimetz` is deprecated in favor of `pandas.api.types.is_datetime64tz` (:issue:`23917`)
12231221
- Creating a :class:`TimedeltaIndex`, :class:`DatetimeIndex`, or :class:`PeriodIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range`, :func:`date_range`, or :func:`period_range` (:issue:`23919`)
12241222
- Passing a string alias like ``'datetime64[ns, UTC]'`` as the ``unit`` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`).
1223+
- The ``skipna`` parameter of :meth:`~pandas.api.types.infer_dtype` will switch to ``True`` by default in a future version of pandas (:issue:`17066`, :issue:`24050`)
12251224
- In :meth:`Series.where` with Categorical data, providing an ``other`` that is not present in the categories is deprecated. Convert the categorical to a different dtype or add the ``other`` to the categories first (:issue:`24077`).
12261225
- :meth:`Series.clip_lower`, :meth:`Series.clip_upper`, :meth:`DataFrame.clip_lower` and :meth:`DataFrame.clip_upper` are deprecated and will be removed in a future version. Use ``Series.clip(lower=threshold)``, ``Series.clip(upper=threshold)`` and the equivalent ``DataFrame`` methods (:issue:`24203`)
12271226

@@ -1381,6 +1380,7 @@ Performance Improvements
13811380
- Improved performance of iterating over a :class:`Series`. Using :meth:`DataFrame.itertuples` now creates iterators
13821381
without internally allocating lists of all elements (:issue:`20783`)
13831382
- Improved performance of :class:`Period` constructor, additionally benefitting ``PeriodArray`` and ``PeriodIndex`` creation (:issue:`24084` and :issue:`24118`)
1383+
- Improved performance of tz-aware :class:`DatetimeArray` binary operations (:issue:`24491`)
13841384

13851385
.. _whatsnew_0240.docs:
13861386

@@ -1592,6 +1592,7 @@ Missing
15921592
- Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
15931593
- :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
15941594
- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
1595+
- :func:`DataFrame` and :func:`Series` now properly handle numpy masked arrays with hardened masks. Previously, constructing a DataFrame or Series from a masked array with a hard mask would create a pandas object containing the underlying value, rather than the expected NaN. (:issue:`24574`)
15951596

15961597

15971598
MultiIndex

pandas/_libs/algos_common_helper.pxi.in

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,6 @@ def ensure_object(object arr):
109109
return arr
110110
else:
111111
return arr.astype(np.object_)
112-
elif hasattr(arr, '_box_values_as_index'):
113-
return arr._box_values_as_index()
114112
else:
115113
return np.array(arr, dtype=np.object_)
116114

pandas/_libs/interval.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,8 +389,8 @@ cdef class Interval(IntervalMixin):
389389
390390
See Also
391391
--------
392-
IntervalArray.overlaps : The corresponding method for IntervalArray
393-
IntervalIndex.overlaps : The corresponding method for IntervalIndex
392+
IntervalArray.overlaps : The corresponding method for IntervalArray.
393+
IntervalIndex.overlaps : The corresponding method for IntervalIndex.
394394
395395
Examples
396396
--------

pandas/_libs/lib.pyx

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ from fractions import Fraction
44
from numbers import Number
55

66
import sys
7+
import warnings
78

89
import cython
910
from cython import Py_ssize_t
@@ -1079,7 +1080,7 @@ cdef _try_infer_map(v):
10791080
return None
10801081

10811082

1082-
def infer_dtype(value: object, skipna: bool=False) -> str:
1083+
def infer_dtype(value: object, skipna: object=None) -> str:
10831084
"""
10841085
Efficiently infer the type of a passed val, or list-like
10851086
array of values. Return a string describing the type.
@@ -1088,8 +1089,7 @@ def infer_dtype(value: object, skipna: bool=False) -> str:
10881089
----------
10891090
value : scalar, list, ndarray, or pandas type
10901091
skipna : bool, default False
1091-
Ignore NaN values when inferring the type. The default of ``False``
1092-
will be deprecated in a later version of pandas.
1092+
Ignore NaN values when inferring the type.
10931093

10941094
.. versionadded:: 0.21.0
10951095

@@ -1186,6 +1186,12 @@ def infer_dtype(value: object, skipna: bool=False) -> str:
11861186
bint seen_pdnat = False
11871187
bint seen_val = False
11881188

1189+
if skipna is None:
1190+
msg = ('A future version of pandas will default to `skipna=True`. To '
1191+
'silence this warning, pass `skipna=True|False` explicitly.')
1192+
warnings.warn(msg, FutureWarning, stacklevel=2)
1193+
skipna = False
1194+
11891195
if util.is_array(value):
11901196
values = value
11911197
elif hasattr(value, 'dtype'):

0 commit comments

Comments
 (0)