Skip to content

Commit 6b97e0b

Browse files
committed
Merge branch 'master' into value_counts_part1
2 parents 25b6c14 + 73ef54b commit 6b97e0b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+1308
-813
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""
2+
ipython analogue:
3+
4+
tr = TimeResolution()
5+
mi = pd.MultiIndex.from_product(tr.params[:-1] + ([str(x) for x in tr.params[-1]],))
6+
df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"])
7+
8+
for unit in tr.params[0]:
9+
for size in tr.params[1]:
10+
for tz in tr.params[2]:
11+
tr.setup(unit, size, tz)
12+
key = (unit, size, str(tz))
13+
print(key)
14+
15+
val = %timeit -o tr.time_get_resolution(unit, size, tz)
16+
17+
df.loc[key] = (val.average, val.stdev)
18+
19+
"""
20+
from datetime import timedelta, timezone
21+
22+
from dateutil.tz import gettz, tzlocal
23+
import numpy as np
24+
import pytz
25+
26+
from pandas._libs.tslibs.resolution import get_resolution
27+
28+
29+
class TimeResolution:
30+
params = (
31+
["D", "h", "m", "s", "us", "ns"],
32+
[1, 100, 10 ** 4, 10 ** 6],
33+
[
34+
None,
35+
timezone.utc,
36+
timezone(timedelta(minutes=60)),
37+
pytz.timezone("US/Pacific"),
38+
gettz("Asia/Tokyo"),
39+
tzlocal(),
40+
],
41+
)
42+
param_names = ["unit", "size", "tz"]
43+
44+
def setup(self, unit, size, tz):
45+
arr = np.random.randint(0, 10, size=size, dtype="i8")
46+
arr = arr.view(f"M8[{unit}]").astype("M8[ns]").view("i8")
47+
self.i8data = arr
48+
49+
def time_get_resolution(self, unit, size, tz):
50+
get_resolution(self.i8data, tz)

asv_bench/benchmarks/tslibs/timestamp.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,29 @@
1-
import datetime
1+
from datetime import datetime, timedelta, timezone
22

3-
import dateutil
3+
from dateutil.tz import gettz, tzlocal, tzutc
44
import numpy as np
55
import pytz
66

77
from pandas import Timestamp
88

9+
# One case for each type of tzinfo object that has its own code path
10+
# in tzconversion code.
11+
_tzs = [
12+
None,
13+
pytz.timezone("Europe/Amsterdam"),
14+
gettz("US/Central"),
15+
pytz.UTC,
16+
tzutc(),
17+
timezone(timedelta(minutes=60)),
18+
tzlocal(),
19+
]
20+
921

1022
class TimestampConstruction:
1123
def setup(self):
1224
self.npdatetime64 = np.datetime64("2020-01-01 00:00:00")
13-
self.dttime_unaware = datetime.datetime(2020, 1, 1, 0, 0, 0)
14-
self.dttime_aware = datetime.datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
25+
self.dttime_unaware = datetime(2020, 1, 1, 0, 0, 0)
26+
self.dttime_aware = datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
1527
self.ts = Timestamp("2020-01-01 00:00:00")
1628

1729
def time_parse_iso8601_no_tz(self):
@@ -49,7 +61,6 @@ def time_from_pd_timestamp(self):
4961

5062

5163
class TimestampProperties:
52-
_tzs = [None, pytz.timezone("Europe/Amsterdam"), pytz.UTC, dateutil.tz.tzutc()]
5364
_freqs = [None, "B"]
5465
params = [_tzs, _freqs]
5566
param_names = ["tz", "freq"]
@@ -63,9 +74,6 @@ def time_tz(self, tz, freq):
6374
def time_dayofweek(self, tz, freq):
6475
self.ts.dayofweek
6576

66-
def time_weekday_name(self, tz, freq):
67-
self.ts.day_name
68-
6977
def time_dayofyear(self, tz, freq):
7078
self.ts.dayofyear
7179

@@ -108,9 +116,12 @@ def time_microsecond(self, tz, freq):
108116
def time_month_name(self, tz, freq):
109117
self.ts.month_name()
110118

119+
def time_weekday_name(self, tz, freq):
120+
self.ts.day_name()
121+
111122

112123
class TimestampOps:
113-
params = [None, "US/Eastern", pytz.UTC, dateutil.tz.tzutc()]
124+
params = _tzs
114125
param_names = ["tz"]
115126

116127
def setup(self, tz):
@@ -148,7 +159,7 @@ def time_ceil(self, tz):
148159

149160
class TimestampAcrossDst:
150161
def setup(self):
151-
dt = datetime.datetime(2016, 3, 27, 1)
162+
dt = datetime(2016, 3, 27, 1)
152163
self.tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo
153164
self.ts2 = Timestamp(dt)
154165

asv_bench/benchmarks/tslibs/tslib.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
ipython analogue:
3+
4+
tr = TimeIntsToPydatetime()
5+
mi = pd.MultiIndex.from_product(
6+
tr.params[:-1] + ([str(x) for x in tr.params[-1]],)
7+
)
8+
df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"])
9+
for box in tr.params[0]:
10+
for size in tr.params[1]:
11+
for tz in tr.params[2]:
12+
tr.setup(box, size, tz)
13+
key = (box, size, str(tz))
14+
print(key)
15+
val = %timeit -o tr.time_ints_to_pydatetime(box, size, tz)
16+
df.loc[key] = (val.average, val.stdev)
17+
"""
18+
from datetime import timedelta, timezone
19+
20+
from dateutil.tz import gettz, tzlocal
21+
import numpy as np
22+
import pytz
23+
24+
from pandas._libs.tslib import ints_to_pydatetime
25+
26+
_tzs = [
27+
None,
28+
timezone.utc,
29+
timezone(timedelta(minutes=60)),
30+
pytz.timezone("US/Pacific"),
31+
gettz("Asia/Tokyo"),
32+
tzlocal(),
33+
]
34+
_sizes = [0, 1, 100, 10 ** 4, 10 ** 6]
35+
36+
37+
class TimeIntsToPydatetime:
38+
params = (
39+
["time", "date", "datetime", "timestamp"],
40+
_sizes,
41+
_tzs,
42+
)
43+
param_names = ["box", "size", "tz"]
44+
# TODO: fold? freq?
45+
46+
def setup(self, box, size, tz):
47+
arr = np.random.randint(0, 10, size=size, dtype="i8")
48+
self.i8data = arr
49+
50+
def time_ints_to_pydatetime(self, box, size, tz):
51+
if box == "date":
52+
# ints_to_pydatetime does not allow non-None tz with date;
53+
# this will mean doing some duplicate benchmarks
54+
tz = None
55+
ints_to_pydatetime(self.i8data, tz, box=box)

doc/source/user_guide/computation.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,18 @@ You can view other examples of ``BaseIndexer`` subclasses `here <https://github.
597597

598598
.. versionadded:: 1.1
599599

600+
One subclass of note within those examples is the ``VariableOffsetWindowIndexer`` that allows
601+
rolling operations over a non-fixed offset like a ``BusinessDay``.
602+
603+
.. ipython:: python
604+
605+
from pandas.api.indexers import VariableOffsetWindowIndexer
606+
df = pd.DataFrame(range(10), index=pd.date_range('2020', periods=10))
607+
offset = pd.offsets.BDay(1)
608+
indexer = VariableOffsetWindowIndexer(index=df.index, offset=offset)
609+
df
610+
df.rolling(indexer).sum()
611+
600612
For some problems knowledge of the future is available for analysis. For example, this occurs when
601613
each data point is a full time series read from an experiment, and the task is to extract underlying
602614
conditions. In these cases it can be useful to perform forward-looking rolling window computations.

doc/source/whatsnew/v1.1.0.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ Other enhancements
328328
- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list or dict to change only some specific columns' width (:issue:`28917`).
329329
- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`)
330330
- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`).
331+
- :meth:`read_csv` now accepts string values like "0", "0.0", "1", "1.0" as convertible to the nullable boolean dtype (:issue:`34859`)
331332

332333
.. ---------------------------------------------------------------------------
333334
@@ -673,6 +674,7 @@ Other API changes
673674
- ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`)
674675
- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median``, ``skew``, ``cov``, ``corr`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`)
675676
- Added a :func:`pandas.api.indexers.FixedForwardWindowIndexer` class to support forward-looking windows during ``rolling`` operations.
677+
- Added a :func:`pandas.api.indexers.VariableOffsetWindowIndexer` class to support ``rolling`` operations with non-fixed offsets (:issue:`34994`)
676678
- Added :class:`pandas.errors.InvalidIndexError` (:issue:`34570`).
677679
- :meth:`DataFrame.swaplevels` now raises a ``TypeError`` if the axis is not a :class:`MultiIndex`.
678680
Previously an ``AttributeError`` was raised (:issue:`31126`)
@@ -692,9 +694,9 @@ Other API changes
692694
- :func: `pandas.api.dtypes.is_string_dtype` no longer incorrectly identifies categorical series as string.
693695
- :func:`read_excel` no longer takes ``**kwds`` arguments. This means that passing in keyword ``chunksize`` now raises a ``TypeError``
694696
(previously raised a ``NotImplementedError``), while passing in keyword ``encoding`` now raises a ``TypeError`` (:issue:`34464`)
695-
- :func: `merge` now checks ``suffixes`` parameter type to be ``tuple`` and raises ``TypeError``, whereas before a ``list`` or ``set`` were accepted and that the ``set`` could produce unexpected results (:issue:`33740`)
696697
- :class:`Period` no longer accepts tuples for the ``freq`` argument (:issue:`34658`)
697698
- :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` now raises ValueError if ``limit_direction`` is 'forward' or 'both' and ``method`` is 'backfill' or 'bfill' or ``limit_direction`` is 'backward' or 'both' and ``method`` is 'pad' or 'ffill' (:issue:`34746`)
699+
- The :class:`DataFrame` constructor no longer accepts a list of ``DataFrame`` objects. Because of changes to NumPy, ``DataFrame`` objects are now consistently treated as 2D objects, so a list of ``DataFrames`` is considered 3D, and no longer acceptible for the ``DataFrame`` constructor (:issue:`32289`).
698700

699701

700702
Increased minimum versions for dependencies
@@ -785,6 +787,7 @@ Deprecations
785787
- :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`)
786788
- :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`)
787789
- The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`)
790+
- Providing ``suffixes`` as a ``set`` in :func:`pandas.merge` is deprecated. Provide a tuple instead (:issue:`33740`, :issue:`34741`).
788791
- :meth:`Index.is_mixed` is deprecated and will be removed in a future version, check ``index.inferred_type`` directly instead (:issue:`32922`)
789792

790793
- Passing any arguments but the first one to :func:`read_html` as
@@ -816,6 +819,7 @@ Deprecations
816819
- :meth:`util.testing.assert_almost_equal` now accepts both relative and absolute
817820
precision through the ``rtol``, and ``atol`` parameters, thus deprecating the
818821
``check_less_precise`` parameter. (:issue:`13357`).
822+
- :func:`DataFrame.melt` accepting a value_name that already exists is deprecated, and will be removed in a future version (:issue:`34731`)
819823

820824
.. ---------------------------------------------------------------------------
821825
@@ -1041,6 +1045,9 @@ I/O
10411045
- Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`)
10421046
- Bug in :meth:`ujson.encode` was raising an `OverflowError` with numbers larger than sys.maxsize (:issue: `34395`)
10431047
- Bug in :meth:`HDFStore.append_to_multiple` was raising a ``ValueError`` when the min_itemsize parameter is set (:issue:`11238`)
1048+
- Bug in :meth:`~HDFStore.create_table` now raises an error when `column` argument was not specified in `data_columns` on input (:issue:`28156`)
1049+
- :meth:`read_json` now could read line-delimited json file from a file url while `lines` and `chunksize` are set.
1050+
- Bug in :meth:`DataFrame.to_sql` when reading DataFrames with ``-np.inf`` entries with MySQL now has a more explicit ``ValueError`` (:issue:`34431`)
10441051

10451052
Plotting
10461053
^^^^^^^^
@@ -1120,6 +1127,7 @@ ExtensionArray
11201127
^^^^^^^^^^^^^^
11211128

11221129
- Fixed bug where :meth:`Series.value_counts` would raise on empty input of ``Int64`` dtype (:issue:`33317`)
1130+
- Fixed bug in :func:`concat` when concatenating DataFrames with non-overlaping columns resulting in object-dtype columns rather than preserving the extension dtype (:issue:`27692`, :issue:`33027`)
11231131
- Fixed bug where :meth:`StringArray.isna` would return ``False`` for NA values when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`33655`)
11241132
- Fixed bug in :class:`Series` construction with EA dtype and index but no data or scalar data fails (:issue:`26469`)
11251133
- Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`).

pandas/_libs/lib.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ from pandas._libs.tslibs.nattype cimport (
7373
)
7474
from pandas._libs.tslibs.conversion cimport convert_to_tsobject
7575
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
76-
from pandas._libs.tslibs.timezones cimport get_timezone, tz_compare
76+
from pandas._libs.tslibs.timezones cimport tz_compare
7777
from pandas._libs.tslibs.period cimport is_period_object
7878
from pandas._libs.tslibs.offsets cimport is_offset_object
7979

@@ -1789,7 +1789,7 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool:
17891789
for i in range(n):
17901790
base_val = values[i]
17911791
if base_val is not NaT:
1792-
base_tz = get_timezone(getattr(base_val, 'tzinfo', None))
1792+
base_tz = getattr(base_val, 'tzinfo', None)
17931793
break
17941794

17951795
for j in range(i, n):

0 commit comments

Comments
 (0)