Skip to content

Commit e2dfa71

Browse files
committed
Merge branch 'main' of https://github.com/pandas-dev/pandas into factorize_na_v2
2 parents aba2551 + f478e30 commit e2dfa71

File tree

16 files changed

+139
-26
lines changed

16 files changed

+139
-26
lines changed

doc/source/whatsnew/v1.5.0.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ Timedelta
481481

482482
Time Zones
483483
^^^^^^^^^^
484-
-
484+
- Bug in :class:`Timestamp` constructor raising when passed a ``ZoneInfo`` tzinfo object (:issue:`46425`)
485485
-
486486

487487
Numeric
@@ -569,6 +569,8 @@ I/O
569569
- Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`)
570570
- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`)
571571
- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`)
572+
- :meth:`to_html` now excludes the ``border`` attribute from ``<table>`` elements when ``border`` keyword is set to ``False``.
573+
-
572574

573575
Period
574576
^^^^^^
@@ -599,7 +601,7 @@ Groupby/resample/rolling
599601
- Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`)
600602
- Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`)
601603
- Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`)
602-
-
604+
- Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`)
603605

604606
Reshaping
605607
^^^^^^^^^

pandas/_libs/tslibs/conversion.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ from pandas._libs.tslibs.timezones cimport (
5656
is_fixed_offset,
5757
is_tzlocal,
5858
is_utc,
59+
is_zoneinfo,
5960
maybe_get_tz,
6061
tz_compare,
6162
utc_pytz as UTC,
@@ -532,7 +533,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
532533
# see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
533534
if is_utc(tz):
534535
pass
535-
elif is_tzlocal(tz):
536+
elif is_tzlocal(tz) or is_zoneinfo(tz):
536537
localize_tzinfo_api(obj.value, tz, &obj.fold)
537538
else:
538539
trans, deltas, typ = get_dst_info(tz)

pandas/_libs/tslibs/timezones.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ cdef tzinfo utc_pytz
99

1010
cpdef bint is_utc(tzinfo tz)
1111
cdef bint is_tzlocal(tzinfo tz)
12+
cdef bint is_zoneinfo(tzinfo tz)
1213

1314
cdef bint treat_tz_as_pytz(tzinfo tz)
1415

pandas/_libs/tslibs/timezones.pyx

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@ from datetime import (
33
timezone,
44
)
55

6+
try:
7+
# py39+
8+
import zoneinfo
9+
from zoneinfo import ZoneInfo
10+
except ImportError:
11+
zoneinfo = None
12+
ZoneInfo = None
13+
614
from cpython.datetime cimport (
715
datetime,
816
timedelta,
@@ -42,18 +50,43 @@ cdef tzinfo utc_stdlib = timezone.utc
4250
cdef tzinfo utc_pytz = UTC
4351
cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc()
4452

53+
cdef tzinfo utc_zoneinfo = None
54+
4555

4656
# ----------------------------------------------------------------------
4757

58+
cdef inline bint is_utc_zoneinfo(tzinfo tz):
59+
# Workaround for cases with missing tzdata
60+
# https://github.com/pandas-dev/pandas/pull/46425#discussion_r830633025
61+
if tz is None or zoneinfo is None:
62+
return False
63+
64+
global utc_zoneinfo
65+
if utc_zoneinfo is None:
66+
try:
67+
utc_zoneinfo = ZoneInfo("UTC")
68+
except zoneinfo.ZoneInfoNotFoundError:
69+
return False
70+
71+
return tz is utc_zoneinfo
72+
73+
4874
cpdef inline bint is_utc(tzinfo tz):
4975
return (
5076
tz is utc_pytz
5177
or tz is utc_stdlib
5278
or isinstance(tz, _dateutil_tzutc)
5379
or tz is utc_dateutil_str
80+
or is_utc_zoneinfo(tz)
5481
)
5582

5683

84+
cdef inline bint is_zoneinfo(tzinfo tz):
85+
if ZoneInfo is None:
86+
return False
87+
return isinstance(tz, ZoneInfo)
88+
89+
5790
cdef inline bint is_tzlocal(tzinfo tz):
5891
return isinstance(tz, _dateutil_tzlocal)
5992

@@ -210,6 +243,8 @@ cdef inline bint is_fixed_offset(tzinfo tz):
210243
return 1
211244
else:
212245
return 0
246+
elif is_zoneinfo(tz):
247+
return 0
213248
# This also implicitly accepts datetime.timezone objects which are
214249
# considered fixed
215250
return 1
@@ -264,6 +299,8 @@ cdef object get_dst_info(tzinfo tz):
264299
# e.g. pytz.FixedOffset, matplotlib.dates._UTC,
265300
# psycopg2.tz.FixedOffsetTimezone
266301
num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
302+
# If we have e.g. ZoneInfo here, the get_utcoffset call will return None,
303+
# so the total_seconds() call will raise AttributeError.
267304
return (np.array([NPY_NAT + 1], dtype=np.int64),
268305
np.array([num], dtype=np.int64),
269306
"unknown")
@@ -291,13 +328,13 @@ cdef object get_dst_info(tzinfo tz):
291328
# deltas
292329
deltas = np.array([v.offset for v in (
293330
tz._ttinfo_before,) + tz._trans_idx], dtype='i8')
294-
deltas *= 1000000000
331+
deltas *= 1_000_000_000
295332
typ = 'dateutil'
296333

297334
elif is_fixed_offset(tz):
298335
trans = np.array([NPY_NAT + 1], dtype=np.int64)
299336
deltas = np.array([tz._ttinfo_std.offset],
300-
dtype='i8') * 1000000000
337+
dtype='i8') * 1_000_000_000
301338
typ = 'fixed'
302339
else:
303340
# 2018-07-12 this is not reached in the tests, and this case

pandas/_libs/tslibs/tzconversion.pyx

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ from pandas._libs.tslibs.timezones cimport (
4242
is_fixed_offset,
4343
is_tzlocal,
4444
is_utc,
45+
is_zoneinfo,
4546
utc_pytz,
4647
)
4748

@@ -60,7 +61,7 @@ cdef int64_t tz_localize_to_utc_single(
6061
elif is_utc(tz) or tz is None:
6162
return val
6263

63-
elif is_tzlocal(tz):
64+
elif is_tzlocal(tz) or is_zoneinfo(tz):
6465
return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True)
6566

6667
elif is_fixed_offset(tz):
@@ -135,7 +136,7 @@ timedelta-like}
135136

136137
result = np.empty(n, dtype=np.int64)
137138

138-
if is_tzlocal(tz):
139+
if is_tzlocal(tz) or is_zoneinfo(tz):
139140
for i in range(n):
140141
v = vals[i]
141142
if v == NPY_NAT:
@@ -484,8 +485,8 @@ cdef int64_t tz_convert_from_utc_single(
484485

485486
if is_utc(tz):
486487
return utc_val
487-
elif is_tzlocal(tz):
488-
return utc_val + _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False)
488+
elif is_tzlocal(tz) or is_zoneinfo(tz):
489+
return utc_val + _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False, fold=fold)
489490
else:
490491
trans, deltas, typ = get_dst_info(tz)
491492
tdata = <int64_t*>cnp.PyArray_DATA(trans)
@@ -569,7 +570,7 @@ cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz):
569570

570571
if is_utc(tz) or tz is None:
571572
use_utc = True
572-
elif is_tzlocal(tz):
573+
elif is_tzlocal(tz) or is_zoneinfo(tz):
573574
use_tzlocal = True
574575
else:
575576
trans, deltas, typ = get_dst_info(tz)

pandas/_libs/tslibs/vectorized.pyx

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ from .timezones cimport (
4040
get_dst_info,
4141
is_tzlocal,
4242
is_utc,
43+
is_zoneinfo,
4344
)
4445
from .tzconversion cimport (
4546
bisect_right_i8,
@@ -117,7 +118,7 @@ def ints_to_pydatetime(
117118

118119
if is_utc(tz) or tz is None:
119120
use_utc = True
120-
elif is_tzlocal(tz):
121+
elif is_tzlocal(tz) or is_zoneinfo(tz):
121122
use_tzlocal = True
122123
else:
123124
trans, deltas, typ = get_dst_info(tz)
@@ -204,7 +205,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
204205

205206
if is_utc(tz) or tz is None:
206207
use_utc = True
207-
elif is_tzlocal(tz):
208+
elif is_tzlocal(tz) or is_zoneinfo(tz):
208209
use_tzlocal = True
209210
else:
210211
trans, deltas, typ = get_dst_info(tz)
@@ -272,7 +273,7 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
272273

273274
if is_utc(tz) or tz is None:
274275
use_utc = True
275-
elif is_tzlocal(tz):
276+
elif is_tzlocal(tz) or is_zoneinfo(tz):
276277
use_tzlocal = True
277278
else:
278279
trans, deltas, typ = get_dst_info(tz)
@@ -334,7 +335,7 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
334335

335336
if is_utc(tz) or tz is None:
336337
use_utc = True
337-
elif is_tzlocal(tz):
338+
elif is_tzlocal(tz) or is_zoneinfo(tz):
338339
use_tzlocal = True
339340
else:
340341
trans, deltas, typ = get_dst_info(tz)
@@ -385,7 +386,7 @@ def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz):
385386

386387
if is_utc(tz) or tz is None:
387388
use_utc = True
388-
elif is_tzlocal(tz):
389+
elif is_tzlocal(tz) or is_zoneinfo(tz):
389390
use_tzlocal = True
390391
else:
391392
trans, deltas, typ = get_dst_info(tz)

pandas/_libs/window/aggregations.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1592,7 +1592,7 @@ def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights,
15921592

15931593
with nogil:
15941594

1595-
for i in range(win_n):
1595+
for i in range(min(win_n, n)):
15961596
add_weighted_var(values[i], weights[i], &t,
15971597
&sum_w, &mean, &nobs)
15981598

pandas/conftest.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@
7575
del pa
7676
has_pyarrow = True
7777

78+
zoneinfo = None
79+
if pd.compat.PY39:
80+
# Import "zoneinfo" could not be resolved (reportMissingImports)
81+
import zoneinfo # type: ignore[no-redef]
82+
7883
# Until https://github.com/numpy/numpy/issues/19078 is sorted out, just suppress
7984
suppress_npdev_promotion_warning = pytest.mark.filterwarnings(
8085
"ignore:Promotion of numbers and bools:FutureWarning"
@@ -1166,6 +1171,8 @@ def iris(datapath):
11661171
timezone(timedelta(hours=1)),
11671172
timezone(timedelta(hours=-1), name="foo"),
11681173
]
1174+
if zoneinfo is not None:
1175+
TIMEZONES.extend([zoneinfo.ZoneInfo("US/Pacific"), zoneinfo.ZoneInfo("UTC")])
11691176
TIMEZONE_IDS = [repr(i) for i in TIMEZONES]
11701177

11711178

@@ -1191,7 +1198,12 @@ def tz_aware_fixture(request):
11911198
tz_aware_fixture2 = tz_aware_fixture
11921199

11931200

1194-
@pytest.fixture(params=["utc", "dateutil/UTC", utc, tzutc(), timezone.utc])
1201+
_UTCS = ["utc", "dateutil/UTC", utc, tzutc(), timezone.utc]
1202+
if zoneinfo is not None:
1203+
_UTCS.append(zoneinfo.ZoneInfo("UTC"))
1204+
1205+
1206+
@pytest.fixture(params=_UTCS)
11951207
def utc_fixture(request):
11961208
"""
11971209
Fixture to provide variants of UTC timezone strings and tzinfo objects.

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2892,7 +2892,7 @@ def to_html(
28922892
classes: str | list | tuple | None = None,
28932893
escape: bool = True,
28942894
notebook: bool = False,
2895-
border: int | None = None,
2895+
border: int | bool | None = None,
28962896
table_id: str | None = None,
28972897
render_links: bool = False,
28982898
encoding: str | None = None,

pandas/io/formats/format.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1058,7 +1058,7 @@ def to_html(
10581058
encoding: str | None = None,
10591059
classes: str | list | tuple | None = None,
10601060
notebook: bool = False,
1061-
border: int | None = None,
1061+
border: int | bool | None = None,
10621062
table_id: str | None = None,
10631063
render_links: bool = False,
10641064
) -> str | None:

pandas/io/formats/html.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def __init__(
4444
self,
4545
formatter: DataFrameFormatter,
4646
classes: str | list[str] | tuple[str, ...] | None = None,
47-
border: int | None = None,
47+
border: int | bool | None = None,
4848
table_id: str | None = None,
4949
render_links: bool = False,
5050
) -> None:
@@ -57,8 +57,11 @@ def __init__(
5757
self.bold_rows = self.fmt.bold_rows
5858
self.escape = self.fmt.escape
5959
self.show_dimensions = self.fmt.show_dimensions
60-
if border is None:
60+
if border is None or border is True:
6161
border = cast(int, get_option("display.html.border"))
62+
elif not border:
63+
border = None
64+
6265
self.border = border
6366
self.table_id = table_id
6467
self.render_links = render_links
@@ -237,8 +240,13 @@ def _write_table(self, indent: int = 0) -> None:
237240
else:
238241
id_section = f' id="{self.table_id}"'
239242

243+
if self.border is None:
244+
border_attr = ""
245+
else:
246+
border_attr = f' border="{self.border}"'
247+
240248
self.write(
241-
f'<table border="{self.border}" class="{" ".join(_classes)}"{id_section}>',
249+
f'<table{border_attr} class="{" ".join(_classes)}"{id_section}>',
242250
indent,
243251
)
244252

pandas/tests/arrays/categorical/test_constructors.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,3 +759,14 @@ def test_constructor_datetime64_non_nano(self):
759759

760760
cat = Categorical(values, categories=categories)
761761
assert (cat == values).all()
762+
763+
def test_constructor_preserves_freq(self):
764+
# GH33830 freq retention in categorical
765+
dti = date_range("2016-01-01", periods=5)
766+
767+
expected = dti.freq
768+
769+
cat = Categorical(dti)
770+
result = cat.categories.freq
771+
772+
assert expected == result

pandas/tests/indexes/datetimes/test_constructors.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
OutOfBoundsDatetime,
1616
conversion,
1717
)
18+
from pandas.compat import PY39
1819

1920
import pandas as pd
2021
from pandas import (
@@ -31,6 +32,9 @@
3132
period_array,
3233
)
3334

35+
if PY39:
36+
import zoneinfo
37+
3438

3539
class TestDatetimeIndex:
3640
@pytest.mark.parametrize(
@@ -1128,7 +1132,12 @@ def test_timestamp_constructor_retain_fold(tz, fold):
11281132
assert result == expected
11291133

11301134

1131-
@pytest.mark.parametrize("tz", ["dateutil/Europe/London"])
1135+
_tzs = ["dateutil/Europe/London"]
1136+
if PY39:
1137+
_tzs = ["dateutil/Europe/London", zoneinfo.ZoneInfo("Europe/London")]
1138+
1139+
1140+
@pytest.mark.parametrize("tz", _tzs)
11321141
@pytest.mark.parametrize(
11331142
"ts_input,fold_out",
11341143
[
@@ -1148,6 +1157,7 @@ def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out):
11481157
result = ts.fold
11491158
expected = fold_out
11501159
assert result == expected
1160+
# TODO: belongs in Timestamp tests?
11511161

11521162

11531163
@pytest.mark.parametrize("tz", ["dateutil/Europe/London"])

0 commit comments

Comments
 (0)