Skip to content

Commit d980ebe

Browse files
committed
Merge remote-tracking branch 'upstream/main' into nonattype
2 parents 5abe369 + bb9a985 commit d980ebe

File tree

5 files changed

+36
-57
lines changed

5 files changed

+36
-57
lines changed

doc/source/getting_started/intro_tutorials/03_subset_data.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ I want to work with passenger data for which the age is known.
242242
age_no_na.head()
243243
244244
The :meth:`~Series.notna` conditional function returns a ``True`` for each row the
245-
values are not an ``Null`` value. As such, this can be combined with the
245+
values are not a ``Null`` value. As such, this can be combined with the
246246
selection brackets ``[]`` to filter the data table.
247247

248248
.. raw:: html

pandas/_libs/groupby.pyx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,7 @@ def group_nth(
12381238
if nobs[i, j] < min_count:
12391239
if uses_mask:
12401240
result_mask[i, j] = True
1241+
out[i, j] = 0
12411242
elif iu_64_floating_obj_t is int64_t:
12421243
# TODO: only if datetimelike?
12431244
out[i, j] = NPY_NAT

pandas/core/groupby/ops.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ def _reconstruct_ea_result(
367367
"""
368368
Construct an ExtensionArray result from an ndarray result.
369369
"""
370+
dtype: BaseMaskedDtype | StringDtype
370371

371372
if isinstance(values.dtype, StringDtype):
372373
dtype = values.dtype
@@ -375,19 +376,17 @@ def _reconstruct_ea_result(
375376

376377
elif isinstance(values.dtype, BaseMaskedDtype):
377378
new_dtype = self._get_result_dtype(values.dtype.numpy_dtype)
379+
dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype)
378380
# error: Incompatible types in assignment (expression has type
379-
# "BaseMaskedDtype", variable has type "StringDtype")
380-
dtype = BaseMaskedDtype.from_numpy_dtype( # type: ignore[assignment]
381-
new_dtype
382-
)
383-
cls = dtype.construct_array_type()
381+
# "Type[BaseMaskedArray]", variable has type "Type[BaseStringArray]")
382+
cls = dtype.construct_array_type() # type: ignore[assignment]
384383
return cls._from_sequence(res_values, dtype=dtype)
385384

386-
elif needs_i8_conversion(values.dtype):
387-
assert res_values.dtype.kind != "f" # just to be on the safe side
388-
i8values = res_values.view("i8")
389-
# error: Too many arguments for "ExtensionArray"
390-
return type(values)(i8values, dtype=values.dtype) # type: ignore[call-arg]
385+
elif isinstance(values, (DatetimeArray, TimedeltaArray, PeriodArray)):
386+
# In to_cython_values we took a view as M8[ns]
387+
assert res_values.dtype == "M8[ns]"
388+
res_values = res_values.view(values._ndarray.dtype)
389+
return values._from_backing_data(res_values)
391390

392391
raise NotImplementedError
393392

@@ -425,12 +424,8 @@ def _masked_ea_wrap_cython_operation(
425424
**kwargs,
426425
)
427426

428-
new_dtype = self._get_result_dtype(orig_values.dtype.numpy_dtype)
429-
dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype)
430-
# TODO: avoid cast as res_values *should* already have the right
431-
# dtype; last attempt ran into trouble on 32bit linux build
432-
res_values = res_values.astype(dtype.type, copy=False)
433-
427+
# res_values should already have the correct dtype, we just need to
428+
# wrap in a MaskedArray
434429
return orig_values._maybe_mask_result(res_values, result_mask)
435430

436431
@final

pandas/core/internals/concat.py

Lines changed: 16 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ def concatenate_managers(
212212
for placement, join_units in concat_plan:
213213
unit = join_units[0]
214214
blk = unit.block
215+
# Assertion disabled for performance
216+
# assert len(join_units) == len(mgrs_indexers)
215217

216218
if len(join_units) == 1:
217219
values = blk.values
@@ -329,27 +331,20 @@ def _get_mgr_concatenation_plan(mgr: BlockManager):
329331
plan : list of (BlockPlacement, JoinUnit) tuples
330332
331333
"""
332-
# Calculate post-reindex shape , save for item axis which will be separate
333-
# for each block anyway.
334-
mgr_shape_list = list(mgr.shape)
335-
mgr_shape = tuple(mgr_shape_list)
336334

337335
if mgr.is_single_block:
338336
blk = mgr.blocks[0]
339-
return [(blk.mgr_locs, JoinUnit(blk, mgr_shape))]
337+
return [(blk.mgr_locs, JoinUnit(blk))]
340338

341339
blknos = mgr.blknos
342340
blklocs = mgr.blklocs
343341

344342
plan = []
345343
for blkno, placements in libinternals.get_blkno_placements(blknos, group=False):
346344

347-
assert placements.is_slice_like
348-
assert blkno != -1
349-
350-
shape_list = list(mgr_shape)
351-
shape_list[0] = len(placements)
352-
shape = tuple(shape_list)
345+
# Assertions disabled for performance; these should always hold
346+
# assert placements.is_slice_like
347+
# assert blkno != -1
353348

354349
blk = mgr.blocks[blkno]
355350
ax0_blk_indexer = blklocs[placements.indexer]
@@ -379,19 +374,16 @@ def _get_mgr_concatenation_plan(mgr: BlockManager):
379374

380375
# Assertions disabled for performance
381376
# assert blk._mgr_locs.as_slice == placements.as_slice
382-
# assert blk.shape[0] == shape[0]
383-
unit = JoinUnit(blk, shape)
377+
unit = JoinUnit(blk)
384378

385379
plan.append((placements, unit))
386380

387381
return plan
388382

389383

390384
class JoinUnit:
391-
def __init__(self, block: Block, shape: Shape):
392-
# Passing shape explicitly is required for cases when block is None.
385+
def __init__(self, block: Block):
393386
self.block = block
394-
self.shape = shape
395387

396388
def __repr__(self) -> str:
397389
return f"{type(self).__name__}({repr(self.block)})"
@@ -404,22 +396,11 @@ def is_na(self) -> bool:
404396
return False
405397

406398
def get_reindexed_values(self, empty_dtype: DtypeObj) -> ArrayLike:
407-
values: ArrayLike
408-
409399
if self.is_na:
410-
return make_na_array(empty_dtype, self.shape)
400+
return make_na_array(empty_dtype, self.block.shape)
411401

412402
else:
413-
414-
if not self.block._can_consolidate:
415-
# preserve these for validation in concat_compat
416-
return self.block.values
417-
418-
# No dtype upcasting is done here, it will be performed during
419-
# concatenation itself.
420-
values = self.block.values
421-
422-
return values
403+
return self.block.values
423404

424405

425406
def make_na_array(dtype: DtypeObj, shape: Shape) -> ArrayLike:
@@ -558,6 +539,9 @@ def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
558539
first = join_units[0].block
559540
if first.dtype.kind == "V":
560541
return False
542+
elif len(join_units) == 1:
543+
# only use this path when there is something to concatenate
544+
return False
561545
return (
562546
# exclude cases where a) ju.block is None or b) we have e.g. Int64+int64
563547
all(type(ju.block) is type(first) for ju in join_units)
@@ -570,13 +554,8 @@ def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
570554
or ju.block.dtype.kind in ["b", "i", "u"]
571555
for ju in join_units
572556
)
573-
and
574-
# no blocks that would get missing values (can lead to type upcasts)
575-
# unless we're an extension dtype.
576-
all(not ju.is_na or ju.block.is_extension for ju in join_units)
577-
and
578-
# only use this path when there is something to concatenate
579-
len(join_units) > 1
557+
# this also precludes any blocks with dtype.kind == "V", since
558+
# we excluded that case for `first` above.
580559
)
581560

582561

@@ -598,10 +577,7 @@ def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit:
598577
extra_block = join_unit.block.getitem_block(slice(length, None))
599578
join_unit.block = join_unit.block.getitem_block(slice(length))
600579

601-
extra_shape = (join_unit.shape[0] - length,) + join_unit.shape[1:]
602-
join_unit.shape = (length,) + join_unit.shape[1:]
603-
604-
return JoinUnit(block=extra_block, shape=extra_shape)
580+
return JoinUnit(block=extra_block)
605581

606582

607583
def _combine_concat_plans(plans):

pandas/tests/frame/methods/test_dtypes.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,10 @@ def test_dtypes_timedeltas(self):
131131
index=list("ABCD"),
132132
)
133133
tm.assert_series_equal(result, expected)
134+
135+
def test_frame_apply_np_array_return_type(self):
136+
# GH 35517
137+
df = DataFrame([["foo"]])
138+
result = df.apply(lambda col: np.array("bar"))
139+
expected = Series(["bar"])
140+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)