Skip to content

Commit 481ed71

Browse files
authored
Merge branch 'main' into bug4638144642
2 parents eb2000f + d1543ae commit 481ed71

25 files changed

+353
-144
lines changed

ci/code_checks.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ fi
7878
### DOCSTRINGS ###
7979
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8080

81-
MSG='Validate docstrings (GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS01, SS02, SS03, SS04, SS05, PR03, PR04, PR05, PR06, PR08, PR09, PR10, EX04, RT01, RT04, RT05, SA02, SA03)' ; echo $MSG
82-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS02,SS03,SS04,SS05,PR03,PR04,PR05,PR06,PR08,PR09,PR10,EX04,RT01,RT04,RT05,SA02,SA03
81+
MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT04, RT05, SA02, SA03, SS01, SS02, SS03, SS04, SS05)' ; echo $MSG
82+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SS01,SS02,SS03,SS04,SS05
8383
RET=$(($RET + $?)) ; echo $MSG "DONE"
8484

8585
fi

doc/source/getting_started/tutorials.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,16 @@ Excel charts with pandas, vincent and xlsxwriter
7575

7676
* `Using Pandas and XlsxWriter to create Excel charts <https://pandas-xlsxwriter-charts.readthedocs.io/>`_
7777

78+
Joyful pandas
79+
-------------
80+
81+
A tutorial written in Chinese by Yuanhao Geng. It covers the basic operations
82+
for NumPy and pandas, 4 main data manipulation methods (including indexing, groupby, reshaping
83+
and concatenation) and 4 main data types (including missing data, string data, categorical
84+
data and time series data). At the end of each chapter, corresponding exercises are posted.
85+
All the datasets and related materials can be found in the GitHub repository
86+
`datawhalechina/joyful-pandas <https://github.com/datawhalechina/joyful-pandas>`_.
87+
7888
Video tutorials
7989
---------------
8090

doc/source/user_guide/duplicates.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ going forward, to ensure that your data pipeline doesn't introduce duplicates.
172172
>>> deduplicated = raw.groupby(level=0).first() # remove duplicates
173173
>>> deduplicated.flags.allows_duplicate_labels = False # disallow going forward
174174
175-
Setting ``allows_duplicate_labels=True`` on a ``Series`` or ``DataFrame`` with duplicate
175+
Setting ``allows_duplicate_labels=False`` on a ``Series`` or ``DataFrame`` with duplicate
176176
labels or performing an operation that introduces duplicate labels on a ``Series`` or
177177
``DataFrame`` that disallows duplicates will raise an
178178
:class:`errors.DuplicateLabelError`.

doc/source/whatsnew/v1.4.2.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Bug fixes
3131
~~~~~~~~~
3232
- Fix some cases for subclasses that define their ``_constructor`` properties as general callables (:issue:`46018`)
3333
- Fixed "longtable" formatting in :meth:`.Styler.to_latex` when ``column_format`` is given in extended format (:issue:`46037`)
34-
-
34+
- Fixed incorrect rendering in :meth:`.Styler.format` with ``hyperlinks="html"`` when the url contains a colon or other special characters (:issue:`46389`)
3535

3636
.. ---------------------------------------------------------------------------
3737

doc/source/whatsnew/v1.5.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ Groupby/resample/rolling
527527
- Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`)
528528
- Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`)
529529
- Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`)
530+
- Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`)
530531
-
531532

532533
Reshaping

environment.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies:
1818
- cython>=0.29.24
1919

2020
# code checks
21-
- black=21.5b2
21+
- black=22.1.0
2222
- cpplint
2323
- flake8=4.0.1
2424
- flake8-bugbear=21.3.2 # used by flake8, find likely bugs
@@ -86,7 +86,7 @@ dependencies:
8686
- bottleneck>=1.3.1
8787
- ipykernel
8888
- ipython>=7.11.1
89-
- jinja2 # pandas.Styler
89+
- jinja2<=3.0.3 # pandas.Styler
9090
- matplotlib>=3.3.2 # pandas.plotting, Series.plot, DataFrame.plot
9191
- numexpr>=2.7.1
9292
- scipy>=1.4.1

pandas/_libs/tslibs/conversion.pxd

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,3 @@ cdef int64_t get_datetime64_nanos(object val) except? -1
3131
cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
3232
cdef int64_t cast_from_unit(object ts, str unit) except? -1
3333
cpdef (int64_t, int) precision_from_unit(str unit)
34-
35-
cdef int64_t normalize_i8_stamp(int64_t local_val) nogil

pandas/_libs/tslibs/conversion.pyx

Lines changed: 7 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ from pandas._libs.tslibs.np_datetime cimport (
3131
NPY_DATETIMEUNIT,
3232
NPY_FR_ns,
3333
_string_to_dts,
34+
astype_overflowsafe,
3435
check_dts_bounds,
3536
dt64_to_dtstruct,
3637
dtstruct_to_dt64,
@@ -71,6 +72,7 @@ from pandas._libs.tslibs.nattype cimport (
7172
)
7273
from pandas._libs.tslibs.tzconversion cimport (
7374
bisect_right_i8,
75+
infer_datetuil_fold,
7476
localize_tzinfo_api,
7577
tz_localize_to_utc_single,
7678
)
@@ -215,54 +217,20 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True):
215217
-------
216218
ndarray with dtype datetime64[ns]
217219
"""
218-
cdef:
219-
Py_ssize_t i, n = arr.size
220-
const int64_t[:] ivalues
221-
int64_t[:] iresult
222-
NPY_DATETIMEUNIT unit
223-
npy_datetimestruct dts
224-
225-
shape = (<object>arr).shape
226-
227220
if (<object>arr).dtype.byteorder == ">":
228221
# GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap
229222
dtype = arr.dtype
230223
arr = arr.astype(dtype.newbyteorder("<"))
231224

232225
if arr.size == 0:
226+
# Fastpath; doesn't matter but we have old tests for result.base
227+
# being arr.
233228
result = arr.view(DT64NS_DTYPE)
234229
if copy:
235230
result = result.copy()
236231
return result
237232

238-
if arr.dtype.kind != "M":
239-
raise TypeError("ensure_datetime64ns arr must have datetime64 dtype")
240-
unit = get_unit_from_dtype(arr.dtype)
241-
if unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
242-
# without raising explicitly here, we end up with a SystemError
243-
# built-in function ensure_datetime64ns returned a result with an error
244-
raise ValueError("datetime64/timedelta64 must have a unit specified")
245-
246-
if unit == NPY_FR_ns:
247-
# Check this before allocating result for perf, might save some memory
248-
if copy:
249-
return arr.copy()
250-
return arr
251-
252-
ivalues = arr.view(np.int64).ravel("K")
253-
254-
result = np.empty_like(arr, dtype=DT64NS_DTYPE)
255-
iresult = result.ravel("K").view(np.int64)
256-
257-
for i in range(n):
258-
if ivalues[i] != NPY_NAT:
259-
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
260-
iresult[i] = dtstruct_to_dt64(&dts)
261-
check_dts_bounds(&dts)
262-
else:
263-
iresult[i] = NPY_NAT
264-
265-
return result
233+
return astype_overflowsafe(arr, DT64NS_DTYPE, copy=copy)
266234

267235

268236
def ensure_timedelta64ns(arr: ndarray, copy: bool = True):
@@ -563,7 +531,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
563531
if typ == 'dateutil':
564532
tdata = <int64_t*>cnp.PyArray_DATA(trans)
565533
pos = bisect_right_i8(tdata, obj.value, trans.shape[0]) - 1
566-
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
534+
obj.fold = infer_datetuil_fold(obj.value, trans, deltas, pos)
567535

568536
# Keep the converter same as PyDateTime's
569537
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
@@ -747,7 +715,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
747715
local_val = obj.value + deltas[pos]
748716

749717
# dateutil supports fold, so we infer fold from value
750-
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
718+
obj.fold = infer_datetuil_fold(obj.value, trans, deltas, pos)
751719
else:
752720
# All other cases have len(deltas) == 1. As of 2018-07-17
753721
# (and 2022-03-07), all test cases that get here have
@@ -759,49 +727,6 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
759727
obj.tzinfo = tz
760728

761729

762-
cdef inline bint _infer_tsobject_fold(
763-
_TSObject obj,
764-
const int64_t[:] trans,
765-
const int64_t[:] deltas,
766-
intp_t pos,
767-
):
768-
"""
769-
Infer _TSObject fold property from value by assuming 0 and then setting
770-
to 1 if necessary.
771-
772-
Parameters
773-
----------
774-
obj : _TSObject
775-
trans : ndarray[int64_t]
776-
ndarray of offset transition points in nanoseconds since epoch.
777-
deltas : int64_t[:]
778-
array of offsets corresponding to transition points in trans.
779-
pos : intp_t
780-
Position of the last transition point before taking fold into account.
781-
782-
Returns
783-
-------
784-
bint
785-
Due to daylight saving time, one wall clock time can occur twice
786-
when shifting from summer to winter time; fold describes whether the
787-
datetime-like corresponds to the first (0) or the second time (1)
788-
the wall clock hits the ambiguous time
789-
790-
References
791-
----------
792-
.. [1] "PEP 495 - Local Time Disambiguation"
793-
https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
794-
"""
795-
cdef:
796-
bint fold = 0
797-
798-
if pos > 0:
799-
fold_delta = deltas[pos - 1] - deltas[pos]
800-
if obj.value - fold_delta < trans[pos]:
801-
fold = 1
802-
803-
return fold
804-
805730
cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz):
806731
"""
807732
Take a datetime/Timestamp in UTC and localizes to timezone tz.
@@ -835,24 +760,3 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz):
835760
elif isinstance(dt, ABCTimestamp):
836761
return dt.tz_localize(tz)
837762
return _localize_pydatetime(dt, tz)
838-
839-
840-
# ----------------------------------------------------------------------
841-
# Normalization
842-
843-
@cython.cdivision(False)
844-
cdef inline int64_t normalize_i8_stamp(int64_t local_val) nogil:
845-
"""
846-
Round the localized nanosecond timestamp down to the previous midnight.
847-
848-
Parameters
849-
----------
850-
local_val : int64_t
851-
852-
Returns
853-
-------
854-
int64_t
855-
"""
856-
cdef:
857-
int64_t day_nanos = 24 * 3600 * 1_000_000_000
858-
return local_val - (local_val % day_nanos)

pandas/_libs/tslibs/np_datetime.pxd

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ cdef extern from "numpy/ndarraytypes.h":
5252
NPY_FR_as
5353
NPY_FR_GENERIC
5454

55+
int64_t NPY_DATETIME_NAT # elswhere we call this NPY_NAT
56+
5557
cdef extern from "src/datetime/np_datetime.h":
5658
ctypedef struct pandas_timedeltastruct:
5759
int64_t days
@@ -67,7 +69,7 @@ cdef extern from "src/datetime/np_datetime.h":
6769

6870
cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1
6971

70-
cdef check_dts_bounds(npy_datetimestruct *dts)
72+
cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?)
7173

7274
cdef int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil
7375
cdef void dt64_to_dtstruct(int64_t dt64, npy_datetimestruct* out) nogil
@@ -86,3 +88,9 @@ cdef int _string_to_dts(str val, npy_datetimestruct* dts,
8688
bint want_exc) except? -1
8789

8890
cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype)
91+
92+
cpdef cnp.ndarray astype_overflowsafe(
93+
cnp.ndarray values, # ndarray[datetime64[anyunit]]
94+
cnp.dtype dtype, # ndarray[datetime64[anyunit]]
95+
bint copy=*,
96+
)

pandas/_libs/tslibs/np_datetime.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ class OutOfBoundsDatetime(ValueError): ...
44

55
# only exposed for testing
66
def py_get_unit_from_dtype(dtype: np.dtype): ...
7+
def astype_overflowsafe(
8+
arr: np.ndarray, dtype: np.dtype, copy: bool = ...
9+
) -> np.ndarray: ...

0 commit comments

Comments
 (0)