Skip to content

Commit 7f73a89

Browse files
committed
down to about 100 failing tests
1 parent 87eafb4 commit 7f73a89

File tree

15 files changed

+169
-79
lines changed

15 files changed

+169
-79
lines changed

pandas/core/apply.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,7 @@ def series_generator(self):
10021002
# We create one Series object, and will swap out the data inside
10031003
# of it. Kids: don't do this at home.
10041004
ser = self.obj._ixs(0, axis=0)
1005+
index = ser.index
10051006
mgr = ser._mgr
10061007

10071008
if is_extension_array_dtype(ser.dtype):
@@ -1013,9 +1014,10 @@ def series_generator(self):
10131014

10141015
else:
10151016
for (arr, name) in zip(values, self.index):
1016-
# GH#35462 re-pin mgr in case setitem changed it
1017+
# GH#35462 re-pin mgr, index in case setitem changed it
10171018
ser._mgr = mgr
10181019
mgr.set_values(arr)
1020+
ser._index = index
10191021
object.__setattr__(ser, "_name", name)
10201022
yield ser
10211023

pandas/core/frame.py

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4272,6 +4272,7 @@ def _ensure_valid_index(self, value) -> None:
42724272
index_copy.name = self.index.name
42734273

42744274
self._mgr = self._mgr.reindex_axis(index_copy, axis=1, fill_value=np.nan)
4275+
self._index = index_copy
42754276

42764277
def _box_col_values(self, values: SingleDataManager, loc: int) -> Series:
42774278
"""
@@ -4501,6 +4502,8 @@ def query(self, expr: str, inplace: bool = False, **kwargs) -> DataFrame | None:
45014502

45024503
if inplace:
45034504
self._update_inplace(result)
4505+
self._index = result._index
4506+
self._columns = result._columns
45044507
return None
45054508
else:
45064509
return result
@@ -4757,8 +4760,7 @@ def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
47574760
and not is_bool_dtype(dtype)
47584761
)
47594762

4760-
def predicate(arr: ArrayLike) -> bool:
4761-
dtype = arr.dtype
4763+
def predicate(dtype: DtypeObj) -> bool:
47624764
if include:
47634765
if not dtype_predicate(dtype, include):
47644766
return False
@@ -4769,10 +4771,16 @@ def predicate(arr: ArrayLike) -> bool:
47694771

47704772
return True
47714773

4772-
mgr = self._mgr._get_data_subset(predicate).copy(deep=None)
4774+
def arr_predicate(arr: ArrayLike) -> bool:
4775+
dtype = arr.dtype
4776+
return predicate(dtype)
4777+
4778+
mgr, taker = self._mgr._get_data_subset(arr_predicate).copy(deep=None)
47734779
# FIXME: get axes without mgr.axes
4774-
assert mgr.axes[1] is self.index # WTF why does passing columns/index cause segfault?
4775-
return type(self)(mgr, columns=mgr.axes[0], index=mgr.axes[1]).__finalize__(self)
4780+
# FIXME: return taker from _get_data_subset, this is really slow
4781+
#taker = self.dtypes.apply(predicate).values.nonzero()[0]
4782+
columns = self.columns.take(taker)
4783+
return type(self)(mgr, columns=columns, index=self.index).__finalize__(self)
47764784

47774785
def insert(
47784786
self,
@@ -4841,6 +4849,7 @@ def insert(
48414849

48424850
value = self._sanitize_column(value)
48434851
self._mgr.insert(loc, column, value)
4852+
self._columns = self.columns.insert(loc, column)
48444853

48454854
def assign(self, **kwargs) -> DataFrame:
48464855
r"""
@@ -6605,6 +6614,8 @@ def dropna(
66056614
if not inplace:
66066615
return result
66076616
self._update_inplace(result)
6617+
self._columns = result._columns
6618+
self._index = result._index
66086619
return None
66096620

66106621
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"])
@@ -6703,6 +6714,8 @@ def drop_duplicates(
67036714

67046715
if inplace:
67056716
self._update_inplace(result)
6717+
self._index = result._index
6718+
self._columns = result._columns
67066719
return None
67076720
else:
67086721
return result
@@ -9268,7 +9281,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
92689281
axis = 0
92699282

92709283
new_data = self._mgr.diff(n=periods, axis=axis)
9271-
return self._constructor(new_data).__finalize__(self, "diff")
9284+
return self._constructor(new_data, index=self.index, columns=self.columns).__finalize__(self, "diff")
92729285

92739286
# ----------------------------------------------------------------------
92749287
# Function application
@@ -10879,8 +10892,9 @@ def _reduce(
1087910892
# cols = self.columns[~dt64_cols]
1088010893
# self = self[cols]
1088110894
predicate = lambda x: not is_datetime64_any_dtype(x.dtype)
10882-
mgr = self._mgr._get_data_subset(predicate)
10883-
self = type(self)(mgr)
10895+
mgr, taker = self._mgr._get_data_subset(predicate)
10896+
columns = self.columns[taker]
10897+
self = type(self)(mgr, index=self.index, columns=columns)
1088410898

1088510899
# TODO: Make other agg func handle axis=None properly GH#21597
1088610900
axis = self._get_axis_number(axis)
@@ -10928,11 +10942,20 @@ def _get_data() -> DataFrame:
1092810942

1092910943
# After possibly _get_data and transposing, we are now in the
1093010944
# simple case where we can use BlockManager.reduce
10931-
res, indexer = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
10945+
res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
1093210946
index = Index([None], dtype=object)
1093310947
assert index.equals(res.axes[1])
10934-
columns = self.columns.take(indexer)
10935-
assert columns.equals(res.axes[0])
10948+
if ignore_failures:
10949+
if len(res.items) == len(df.columns):
10950+
# i.e. nothing was dropped
10951+
columns = df.columns
10952+
else:
10953+
# FIXME: get axes without mgr.axes; THIS IS WRONG TOO
10954+
columns = res.axes[0]
10955+
else:
10956+
columns = df.columns
10957+
assert columns.equals(res.axes[0])
10958+
1093610959
out = df._constructor(res, index=index, columns=columns).iloc[0]
1093710960
if out_dtype is not None:
1093810961
out = out.astype(out_dtype)
@@ -11736,8 +11759,9 @@ def _to_dict_of_blocks(self, copy: bool = True):
1173611759
# convert to BlockManager if needed -> this way support ArrayManager as well
1173711760
mgr = mgr_to_mgr(mgr, "block")
1173811761
mgr = cast(BlockManager, mgr)
11762+
# FIXME: get axes without mgr.axes
1173911763
return {
11740-
k: self._constructor(v).__finalize__(self)
11764+
k: self._constructor(v, index=self.index, columns=v.axes[0]).__finalize__(self)
1174111765
for k, v, in mgr.to_dict(copy=copy).items()
1174211766
}
1174311767

0 commit comments

Comments
 (0)