Skip to content

Commit 1336afb

Browse files
Merge remote-tracking branch 'upstream/master' into numba
2 parents 8bfeeb2 + f904213 commit 1336afb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+977
-1109
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ repos:
138138
entry: python scripts/check_for_inconsistent_pandas_namespace.py
139139
language: python
140140
types: [python]
141-
files: ^pandas/tests/
141+
files: ^pandas/tests/frame/
142142
- id: FrameOrSeriesUnion
143143
name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
144144
entry: Union\[.*(Series,.*DataFrame|DataFrame,.*Series).*\]

doc/source/whatsnew/v0.20.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ For example, after running the following, ``styled.xlsx`` renders as below:
374374
df.iloc[0, 2] = np.nan
375375
df
376376
styled = (df.style
377-
.applymap(lambda val: 'color: %s' % 'red' if val < 0 else 'black')
377+
.applymap(lambda val: 'color:red;' if val < 0 else 'color:black;')
378378
.highlight_max())
379379
styled.to_excel('styled.xlsx', engine='openpyxl')
380380

doc/source/whatsnew/v1.3.0.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ Other enhancements
6565
- :meth:`DataFrame.plot.scatter` can now accept a categorical column as the argument to ``c`` (:issue:`12380`, :issue:`31357`)
6666
- :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes (:issue:`35643`, :issue:`21266`, :issue:`39317`)
6767
- :meth:`.Styler.set_tooltips_class` and :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`)
68-
- :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None``. (:issue:`39359`)
68+
- :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`)
69+
- :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`)
6970
- :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`)
7071
- :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files.
7172
- Add support for parsing ``ISO 8601``-like timestamps with negative signs to :meth:`pandas.Timedelta` (:issue:`37172`)
@@ -324,6 +325,7 @@ Numeric
324325

325326
Conversion
326327
^^^^^^^^^^
328+
- Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`)
327329
-
328330
-
329331

pandas/core/arrays/datetimelike.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
Substitution,
6161
cache_readonly,
6262
)
63+
from pandas.util._exceptions import find_stack_level
6364

6465
from pandas.core.dtypes.common import (
6566
is_categorical_dtype,
@@ -397,12 +398,13 @@ def astype(self, dtype, copy=True):
397398
elif is_integer_dtype(dtype):
398399
# we deliberately ignore int32 vs. int64 here.
399400
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
401+
level = find_stack_level()
400402
warnings.warn(
401403
f"casting {self.dtype} values to int64 with .astype(...) is "
402404
"deprecated and will raise in a future version. "
403405
"Use .view(...) instead.",
404406
FutureWarning,
405-
stacklevel=3,
407+
stacklevel=level,
406408
)
407409

408410
values = self.asi8

pandas/core/arrays/datetimes.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,10 @@ def astype(self, dtype, copy=True):
616616
elif is_datetime64_ns_dtype(dtype):
617617
return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False)
618618

619+
elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype:
620+
# unit conversion e.g. datetime64[s]
621+
return self._data.astype(dtype)
622+
619623
elif is_period_dtype(dtype):
620624
return self.to_period(freq=dtype.freq)
621625
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)

pandas/core/dtypes/cast.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
is_datetime64_dtype,
7272
is_datetime64_ns_dtype,
7373
is_datetime64tz_dtype,
74+
is_datetime_or_timedelta_dtype,
7475
is_dtype_equal,
7576
is_extension_array_dtype,
7677
is_float,
@@ -170,6 +171,29 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal
170171
return value
171172

172173

174+
def maybe_box_native(value: Scalar) -> Scalar:
175+
"""
176+
If passed a scalar cast the scalar to a python native type.
177+
178+
Parameters
179+
----------
180+
value : scalar or Series
181+
182+
Returns
183+
-------
184+
scalar or Series
185+
"""
186+
if is_datetime_or_timedelta_dtype(value):
187+
value = maybe_box_datetimelike(value)
188+
elif is_float(value):
189+
value = float(value)
190+
elif is_integer(value):
191+
value = int(value)
192+
elif is_bool(value):
193+
value = bool(value)
194+
return value
195+
196+
173197
def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
174198
"""
175199
Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting

pandas/core/frame.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@
9595
find_common_type,
9696
infer_dtype_from_scalar,
9797
invalidate_string_dtypes,
98-
maybe_box_datetimelike,
98+
maybe_box_native,
9999
maybe_convert_platform,
100100
maybe_downcast_to_dtype,
101101
maybe_infer_to_datetimelike,
@@ -1655,15 +1655,15 @@ def to_dict(self, orient: str = "dict", into=dict):
16551655
(
16561656
"data",
16571657
[
1658-
list(map(maybe_box_datetimelike, t))
1658+
list(map(maybe_box_native, t))
16591659
for t in self.itertuples(index=False, name=None)
16601660
],
16611661
),
16621662
)
16631663
)
16641664

16651665
elif orient == "series":
1666-
return into_c((k, maybe_box_datetimelike(v)) for k, v in self.items())
1666+
return into_c((k, v) for k, v in self.items())
16671667

16681668
elif orient == "records":
16691669
columns = self.columns.tolist()
@@ -1672,8 +1672,7 @@ def to_dict(self, orient: str = "dict", into=dict):
16721672
for row in self.itertuples(index=False, name=None)
16731673
)
16741674
return [
1675-
into_c((k, maybe_box_datetimelike(v)) for k, v in row.items())
1676-
for row in rows
1675+
into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows
16771676
]
16781677

16791678
elif orient == "index":

pandas/core/indexes/extension.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,10 @@ def astype(self, dtype, copy=True):
309309
return self
310310
return self.copy()
311311

312+
if isinstance(dtype, np.dtype) and dtype.kind == "M" and dtype != "M8[ns]":
313+
# For now Datetime supports this by unwrapping ndarray, but DTI doesn't
314+
raise TypeError(f"Cannot cast {type(self._data).__name__} to dtype")
315+
312316
new_values = self._data.astype(dtype, copy=copy)
313317

314318
# pass copy=False because any copying will be done in the

pandas/core/indexing.py

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2403,57 +2403,3 @@ def need_slice(obj) -> bool:
24032403
or obj.stop is not None
24042404
or (obj.step is not None and obj.step != 1)
24052405
)
2406-
2407-
2408-
def non_reducing_slice(slice_):
2409-
"""
2410-
Ensure that a slice doesn't reduce to a Series or Scalar.
2411-
2412-
Any user-passed `subset` should have this called on it
2413-
to make sure we're always working with DataFrames.
2414-
"""
2415-
# default to column slice, like DataFrame
2416-
# ['A', 'B'] -> IndexSlices[:, ['A', 'B']]
2417-
kinds = (ABCSeries, np.ndarray, Index, list, str)
2418-
if isinstance(slice_, kinds):
2419-
slice_ = IndexSlice[:, slice_]
2420-
2421-
def pred(part) -> bool:
2422-
"""
2423-
Returns
2424-
-------
2425-
bool
2426-
True if slice does *not* reduce,
2427-
False if `part` is a tuple.
2428-
"""
2429-
# true when slice does *not* reduce, False when part is a tuple,
2430-
# i.e. MultiIndex slice
2431-
if isinstance(part, tuple):
2432-
# GH#39421 check for sub-slice:
2433-
return any((isinstance(s, slice) or is_list_like(s)) for s in part)
2434-
else:
2435-
return isinstance(part, slice) or is_list_like(part)
2436-
2437-
if not is_list_like(slice_):
2438-
if not isinstance(slice_, slice):
2439-
# a 1-d slice, like df.loc[1]
2440-
slice_ = [[slice_]]
2441-
else:
2442-
# slice(a, b, c)
2443-
slice_ = [slice_] # to tuplize later
2444-
else:
2445-
slice_ = [part if pred(part) else [part] for part in slice_]
2446-
return tuple(slice_)
2447-
2448-
2449-
def maybe_numeric_slice(df, slice_, include_bool: bool = False):
2450-
"""
2451-
Want nice defaults for background_gradient that don't break
2452-
with non-numeric data. But if slice_ is passed go with that.
2453-
"""
2454-
if slice_ is None:
2455-
dtypes = [np.number]
2456-
if include_bool:
2457-
dtypes.append(bool)
2458-
slice_ = IndexSlice[:, df.select_dtypes(include=dtypes).columns]
2459-
return slice_

pandas/core/internals/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
ObjectBlock,
1212
TimeDeltaBlock,
1313
make_block,
14-
safe_reshape,
1514
)
1615
from pandas.core.internals.concat import concatenate_block_managers
1716
from pandas.core.internals.managers import (
@@ -31,7 +30,6 @@
3130
"FloatBlock",
3231
"ObjectBlock",
3332
"TimeDeltaBlock",
34-
"safe_reshape",
3533
"make_block",
3634
"DataManager",
3735
"ArrayManager",

pandas/core/internals/blocks.py

Lines changed: 38 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ def make_block(self, values, placement=None) -> Block:
308308
if placement is None:
309309
placement = self.mgr_locs
310310
if self.is_extension:
311-
values = _block_shape(values, ndim=self.ndim)
311+
values = ensure_block_shape(values, ndim=self.ndim)
312312

313313
return make_block(values, placement=placement, ndim=self.ndim)
314314

@@ -533,7 +533,7 @@ def make_a_block(nv, ref_loc):
533533
else:
534534
# Put back the dimension that was taken from it and make
535535
# a block out of the result.
536-
nv = _block_shape(nv, ndim=self.ndim)
536+
nv = ensure_block_shape(nv, ndim=self.ndim)
537537
block = self.make_block(values=nv, placement=ref_loc)
538538
return block
539539

@@ -673,6 +673,18 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
673673

674674
def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike:
675675
values = self.values
676+
if values.dtype.kind in ["m", "M"]:
677+
values = self.array_values()
678+
679+
if (
680+
values.dtype.kind in ["m", "M"]
681+
and dtype.kind in ["i", "u"]
682+
and isinstance(dtype, np.dtype)
683+
and dtype.itemsize != 8
684+
):
685+
# TODO(2.0) remove special case once deprecation on DTA/TDA is enforced
686+
msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]"
687+
raise TypeError(msg)
676688

677689
if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
678690
return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True)
@@ -1569,7 +1581,9 @@ def putmask(self, mask, new) -> List[Block]:
15691581
if isinstance(new, (np.ndarray, ExtensionArray)) and len(new) == len(mask):
15701582
new = new[mask]
15711583

1572-
mask = safe_reshape(mask, new_values.shape)
1584+
if mask.ndim == new_values.ndim + 1:
1585+
# TODO(EA2D): unnecessary with 2D EAs
1586+
mask = mask.reshape(new_values.shape)
15731587

15741588
new_values[mask] = new
15751589
return [self.make_block(values=new_values)]
@@ -2048,6 +2062,21 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Blo
20482062
new_values = values.shift(periods, fill_value=fill_value, axis=axis)
20492063
return [self.make_block_same_class(new_values)]
20502064

2065+
def fillna(
2066+
self, value, limit=None, inplace: bool = False, downcast=None
2067+
) -> List[Block]:
2068+
2069+
if not self._can_hold_element(value) and self.dtype.kind != "m":
2070+
# We support filling a DatetimeTZ with a `value` whose timezone
2071+
# is different by coercing to object.
2072+
# TODO: don't special-case td64
2073+
return self.astype(object).fillna(value, limit, inplace, downcast)
2074+
2075+
values = self.array_values()
2076+
values = values if inplace else values.copy()
2077+
new_values = values.fillna(value=value, limit=limit)
2078+
return [self.make_block_same_class(values=new_values)]
2079+
20512080

20522081
class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock):
20532082
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""
@@ -2134,6 +2163,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
21342163
fill_value = NaT
21352164
where = DatetimeBlock.where
21362165
putmask = DatetimeLikeBlockMixin.putmask
2166+
fillna = DatetimeLikeBlockMixin.fillna
21372167

21382168
array_values = ExtensionBlock.array_values
21392169

@@ -2172,19 +2202,6 @@ def external_values(self):
21722202
# Avoid FutureWarning in .astype in casting from dt64tz to dt64
21732203
return self.values._data
21742204

2175-
def fillna(
2176-
self, value, limit=None, inplace: bool = False, downcast=None
2177-
) -> List[Block]:
2178-
# We support filling a DatetimeTZ with a `value` whose timezone
2179-
# is different by coercing to object.
2180-
if self._can_hold_element(value):
2181-
return super().fillna(value, limit, inplace, downcast)
2182-
2183-
# different timezones, or a non-tz
2184-
return self.astype(object).fillna(
2185-
value, limit=limit, inplace=inplace, downcast=downcast
2186-
)
2187-
21882205

21892206
class TimeDeltaBlock(DatetimeLikeBlockMixin):
21902207
__slots__ = ()
@@ -2194,14 +2211,6 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin):
21942211
fill_value = np.timedelta64("NaT", "ns")
21952212
_dtype = fill_value.dtype
21962213

2197-
def fillna(
2198-
self, value, limit=None, inplace: bool = False, downcast=None
2199-
) -> List[Block]:
2200-
values = self.array_values()
2201-
values = values if inplace else values.copy()
2202-
new_values = values.fillna(value=value, limit=limit)
2203-
return [self.make_block_same_class(values=new_values)]
2204-
22052214

22062215
class ObjectBlock(Block):
22072216
__slots__ = ()
@@ -2431,36 +2440,15 @@ def extend_blocks(result, blocks=None) -> List[Block]:
24312440
return blocks
24322441

24332442

2434-
def _block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
2435-
""" guarantee the shape of the values to be at least 1 d """
2443+
def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
2444+
"""
2445+
Reshape if possible to have values.ndim == ndim.
2446+
"""
24362447
if values.ndim < ndim:
2437-
shape = values.shape
24382448
if not is_extension_array_dtype(values.dtype):
24392449
# TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023
24402450
# block.shape is incorrect for "2D" ExtensionArrays
24412451
# We can't, and don't need to, reshape.
24422452

2443-
# error: "ExtensionArray" has no attribute "reshape"
2444-
values = values.reshape(tuple((1,) + shape)) # type: ignore[attr-defined]
2453+
values = np.asarray(values).reshape(1, -1)
24452454
return values
2446-
2447-
2448-
def safe_reshape(arr: ArrayLike, new_shape: Shape) -> ArrayLike:
2449-
"""
2450-
Reshape `arr` to have shape `new_shape`, unless it is an ExtensionArray,
2451-
in which case it will be returned unchanged (see gh-13012).
2452-
2453-
Parameters
2454-
----------
2455-
arr : np.ndarray or ExtensionArray
2456-
new_shape : Tuple[int]
2457-
2458-
Returns
2459-
-------
2460-
np.ndarray or ExtensionArray
2461-
"""
2462-
if not is_extension_array_dtype(arr.dtype):
2463-
# Note: this will include TimedeltaArray and tz-naive DatetimeArray
2464-
# TODO(EA2D): special case will be unnecessary with 2D EAs
2465-
arr = np.asarray(arr).reshape(new_shape)
2466-
return arr

0 commit comments

Comments
 (0)