Skip to content

Commit 7c3c349

Browse files
committed
Merge remote-tracking branch 'upstream/master' into grp-desc-perf
2 parents 02eb336 + 601d71f commit 7c3c349

File tree

100 files changed

+7506
-6777
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+7506
-6777
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989

9090

9191

92-
## What is it
92+
## What is it?
9393

9494
**pandas** is a Python package providing fast, flexible, and expressive data
9595
structures designed to make working with "relational" or "labeled" data both

asv_bench/benchmarks/series_methods.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,64 @@ def time_isin(self, dtypes):
3838
self.s.isin(self.values)
3939

4040

41+
class IsInFloat64(object):
42+
43+
def setup(self):
44+
self.small = Series([1, 2], dtype=np.float64)
45+
self.many_different_values = np.arange(10**6, dtype=np.float64)
46+
self.few_different_values = np.zeros(10**7, dtype=np.float64)
47+
self.only_nans_values = np.full(10**7, np.nan, dtype=np.float64)
48+
49+
def time_isin_many_different(self):
50+
# runtime is dominated by creation of the lookup-table
51+
self.small.isin(self.many_different_values)
52+
53+
def time_isin_few_different(self):
54+
# runtime is dominated by creation of the lookup-table
55+
self.small.isin(self.few_different_values)
56+
57+
def time_isin_nan_values(self):
58+
# runtime is dominated by creation of the lookup-table
59+
self.small.isin(self.few_different_values)
60+
61+
62+
class IsInForObjects(object):
63+
64+
def setup(self):
65+
self.s_nans = Series(np.full(10**4, np.nan)).astype(np.object)
66+
self.vals_nans = np.full(10**4, np.nan).astype(np.object)
67+
self.s_short = Series(np.arange(2)).astype(np.object)
68+
self.s_long = Series(np.arange(10**5)).astype(np.object)
69+
self.vals_short = np.arange(2).astype(np.object)
70+
self.vals_long = np.arange(10**5).astype(np.object)
71+
# because of nans floats are special:
72+
self.s_long_floats = Series(np.arange(10**5,
73+
dtype=np.float)).astype(np.object)
74+
self.vals_long_floats = np.arange(10**5,
75+
dtype=np.float).astype(np.object)
76+
77+
def time_isin_nans(self):
78+
# if nan-objects are different objects,
79+
# this has the potential to trigger O(n^2) running time
80+
self.s_nans.isin(self.vals_nans)
81+
82+
def time_isin_short_series_long_values(self):
83+
# running time dominated by the preprocessing
84+
self.s_short.isin(self.vals_long)
85+
86+
def time_isin_long_series_short_values(self):
87+
# running time dominated by look-up
88+
self.s_long.isin(self.vals_short)
89+
90+
def time_isin_long_series_long_values(self):
91+
# no dominating part
92+
self.s_long.isin(self.vals_long)
93+
94+
def time_isin_long_series_long_values_floats(self):
95+
# no dominating part
96+
self.s_long_floats.isin(self.vals_long_floats)
97+
98+
4199
class NSort(object):
42100

43101
goal_time = 0.2

doc/source/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,7 @@ These can be accessed like ``Series.dt.<property>``.
545545

546546
Series.dt.date
547547
Series.dt.time
548+
Series.dt.timetz
548549
Series.dt.year
549550
Series.dt.month
550551
Series.dt.day
@@ -1739,6 +1740,7 @@ Time/Date Components
17391740
DatetimeIndex.nanosecond
17401741
DatetimeIndex.date
17411742
DatetimeIndex.time
1743+
DatetimeIndex.timetz
17421744
DatetimeIndex.dayofyear
17431745
DatetimeIndex.weekofyear
17441746
DatetimeIndex.week

doc/source/install.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ Dependencies
225225

226226
* `setuptools <https://setuptools.readthedocs.io/en/latest/>`__: 24.2.0 or higher
227227
* `NumPy <http://www.numpy.org>`__: 1.9.0 or higher
228-
* `python-dateutil <//https://dateutil.readthedocs.io/en/stable/>`__: 2.5.0 or higher
228+
* `python-dateutil <https://dateutil.readthedocs.io/en/stable/>`__: 2.5.0 or higher
229229
* `pytz <http://pytz.sourceforge.net/>`__
230230

231231
.. _install.recommended_dependencies:

doc/source/timeseries.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,7 @@ There are several time/date properties that one can access from ``Timestamp`` or
724724
nanosecond,"The nanoseconds of the datetime"
725725
date,"Returns datetime.date (does not contain timezone information)"
726726
time,"Returns datetime.time (does not contain timezone information)"
727+
timetz,"Returns datetime.time as local time with timezone information"
727728
dayofyear,"The ordinal day of year"
728729
weekofyear,"The week ordinal of the year"
729730
week,"The week ordinal of the year"

doc/source/whatsnew/v0.23.5.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ and bug fixes. We recommend that all users upgrade to this version.
2020
Fixed Regressions
2121
~~~~~~~~~~~~~~~~~
2222

23+
- Constructing a DataFrame with an index argument that wasn't already an
24+
instance of :class:`~pandas.core.Index` was broken in `4efb39f
25+
<https://github.com/pandas-dev/pandas/commit/4efb39f01f5880122fa38d91e12d217ef70fad9e>`_ (:issue:`22227`).
2326
-
2427
-
2528

@@ -30,10 +33,14 @@ Bug Fixes
3033

3134
**Groupby/Resample/Rolling**
3235

33-
-
36+
- Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`).
3437
-
3538

3639
**Missing**
3740

3841
-
3942
-
43+
44+
**I/O**
45+
46+
- Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`)

doc/source/whatsnew/v0.24.0.txt

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ Other Enhancements
181181
The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`).
182182
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
183183
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
184+
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
184185

185186
.. _whatsnew_0240.api_breaking:
186187

@@ -477,6 +478,7 @@ Deprecations
477478
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
478479
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
479480
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
481+
- The signature of :meth:`Series.to_csv` has been uniformed to that of doc:meth:`DataFrame.to_csv`: the name of the first argument is now 'path_or_buf', the order of subsequent arguments has changed, the 'header' argument now defaults to True. (:issue:`19715`)
480482
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
481483
- :func:`pandas.read_table` is deprecated. Instead, use :func:`pandas.read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)
482484

@@ -618,13 +620,14 @@ Missing
618620

619621
- Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`)
620622
- Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
623+
- :func:`Series.isin` now treats all nans as equal also for `np.object`-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
621624

622625
MultiIndex
623626
^^^^^^^^^^
624627

625-
- Removed compatibility for MultiIndex pickles prior to version 0.8.0; compatibility with MultiIndex pickles from version 0.13 forward is maintained (:issue:`21654`)
626-
-
627-
-
628+
- Removed compatibility for :class:`MultiIndex` pickles prior to version 0.8.0; compatibility with :class:`MultiIndex` pickles from version 0.13 forward is maintained (:issue:`21654`)
629+
- :meth:`MultiIndex.get_loc_level` (and as a consequence, ``.loc`` on a :class:``MultiIndex``ed object) will now raise a ``KeyError``, rather than returning an empty ``slice``, if asked a label which is present in the ``levels`` but is unused (:issue:`22221`)
630+
- Fix ``TypeError`` in Python 3 when creating :class:`MultiIndex` in which some levels have mixed types, e.g. when some labels are tuples (:issue:`15457`)
628631

629632
I/O
630633
^^^
@@ -668,6 +671,7 @@ Reshaping
668671
- :func:`pandas.core.groupby.GroupBy.rank` now raises a ``ValueError`` when an invalid value is passed for argument ``na_option`` (:issue:`22124`)
669672
- Bug in :func:`get_dummies` with Unicode attributes in Python 2 (:issue:`22084`)
670673
- Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`)
674+
- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`)
671675
-
672676

673677
Build Changes

pandas/_libs/algos_common_helper.pxi.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
523523
Py_ssize_t i, j, k
524524

525525
k = len(values)
526-
for j from 0 <= j < k:
526+
for j in range(k):
527527
i = indexer[j]
528528
out[i] = values[j, loc]
529529

pandas/_libs/algos_take_helper.pxi.in

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def get_dispatch(dtypes):
4646
fv = fill_value
4747

4848
%(nogil_str)s
49-
%(tab)sfor i from 0 <= i < n:
49+
%(tab)sfor i in range(n):
5050
%(tab)s idx = indexer[i]
5151
%(tab)s if idx == -1:
5252
%(tab)s out[i] = fv
@@ -74,24 +74,24 @@ def get_dispatch(dtypes):
7474
values.strides[1] == sizeof(%(c_type_out)s) and
7575
sizeof(%(c_type_out)s) * n >= 256):
7676

77-
for i from 0 <= i < n:
77+
for i in range(n):
7878
idx = indexer[i]
7979
if idx == -1:
80-
for j from 0 <= j < k:
80+
for j in range(k):
8181
out[i, j] = fv
8282
else:
8383
v = &values[idx, 0]
8484
o = &out[i, 0]
8585
memmove(o, v, <size_t>(sizeof(%(c_type_out)s) * k))
8686
return
8787

88-
for i from 0 <= i < n:
88+
for i in range(n):
8989
idx = indexer[i]
9090
if idx == -1:
91-
for j from 0 <= j < k:
91+
for j in range(k):
9292
out[i, j] = fv
9393
else:
94-
for j from 0 <= j < k:
94+
for j in range(k):
9595
out[i, j] = %(preval)svalues[idx, j]%(postval)s
9696
"""
9797

@@ -108,8 +108,8 @@ def get_dispatch(dtypes):
108108

109109
fv = fill_value
110110

111-
for i from 0 <= i < n:
112-
for j from 0 <= j < k:
111+
for i in range(n):
112+
for j in range(k):
113113
idx = indexer[j]
114114
if idx == -1:
115115
out[i, j] = fv
@@ -246,13 +246,13 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
246246
k = len(idx1)
247247

248248
fv = fill_value
249-
for i from 0 <= i < n:
249+
for i in range(n):
250250
idx = idx0[i]
251251
if idx == -1:
252-
for j from 0 <= j < k:
252+
for j in range(k):
253253
out[i, j] = fv
254254
else:
255-
for j from 0 <= j < k:
255+
for j in range(k):
256256
if idx1[j] == -1:
257257
out[i, j] = fv
258258
else:

pandas/_libs/hashtable_func_helper.pxi.in

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -161,18 +161,18 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
161161
{{endif}}
162162
elif keep == 'first':
163163
{{if dtype == 'object'}}
164-
for i from 0 <= i < n:
164+
for i in range(n):
165165
kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
166166
out[i] = ret == 0
167167
{{else}}
168168
with nogil:
169-
for i from 0 <= i < n:
169+
for i in range(n):
170170
kh_put_{{ttype}}(table, values[i], &ret)
171171
out[i] = ret == 0
172172
{{endif}}
173173
else:
174174
{{if dtype == 'object'}}
175-
for i from 0 <= i < n:
175+
for i in range(n):
176176
value = values[i]
177177
k = kh_get_{{ttype}}(table, <PyObject*> value)
178178
if k != table.n_buckets:
@@ -185,7 +185,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
185185
out[i] = 0
186186
{{else}}
187187
with nogil:
188-
for i from 0 <= i < n:
188+
for i in range(n):
189189
value = values[i]
190190
k = kh_get_{{ttype}}(table, value)
191191
if k != table.n_buckets:
@@ -210,10 +210,10 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
210210
@cython.boundscheck(False)
211211
{{if dtype == 'object'}}
212212

213-
def ismember_{{dtype}}(ndarray[{{scalar}}] arr, ndarray[{{scalar}}] values, bint hasnans=0):
213+
def ismember_{{dtype}}(ndarray[{{scalar}}] arr, ndarray[{{scalar}}] values):
214214
{{else}}
215215

216-
def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values, bint hasnans=0):
216+
def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
217217
{{endif}}
218218

219219
"""
@@ -224,7 +224,6 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values, bint hasnans=0):
224224
----------
225225
arr : {{dtype}} ndarray
226226
values : {{dtype}} ndarray
227-
hasnans : bint, optional
228227

229228
Returns
230229
-------
@@ -259,19 +258,13 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values, bint hasnans=0):
259258
for i in range(n):
260259
val = arr[i]
261260
k = kh_get_{{ttype}}(table, <PyObject*> val)
262-
if k != table.n_buckets:
263-
result[i] = 1
264-
else:
265-
result[i] = hasnans and val != val
261+
result[i] = (k != table.n_buckets)
266262
{{else}}
267263
with nogil:
268264
for i in range(n):
269265
val = arr[i]
270266
k = kh_get_{{ttype}}(table, val)
271-
if k != table.n_buckets:
272-
result[i] = 1
273-
else:
274-
result[i] = hasnans and val != val
267+
result[i] = (k != table.n_buckets)
275268
{{endif}}
276269

277270
kh_destroy_{{ttype}}(table)

0 commit comments

Comments
 (0)