@@ -4272,6 +4272,7 @@ def _ensure_valid_index(self, value) -> None:
4272
4272
index_copy .name = self .index .name
4273
4273
4274
4274
self ._mgr = self ._mgr .reindex_axis (index_copy , axis = 1 , fill_value = np .nan )
4275
+ self ._index = index_copy
4275
4276
4276
4277
def _box_col_values (self , values : SingleDataManager , loc : int ) -> Series :
4277
4278
"""
@@ -4501,6 +4502,8 @@ def query(self, expr: str, inplace: bool = False, **kwargs) -> DataFrame | None:
4501
4502
4502
4503
if inplace :
4503
4504
self ._update_inplace (result )
4505
+ self ._index = result ._index
4506
+ self ._columns = result ._columns
4504
4507
return None
4505
4508
else :
4506
4509
return result
@@ -4757,8 +4760,7 @@ def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
4757
4760
and not is_bool_dtype (dtype )
4758
4761
)
4759
4762
4760
- def predicate (arr : ArrayLike ) -> bool :
4761
- dtype = arr .dtype
4763
+ def predicate (dtype : DtypeObj ) -> bool :
4762
4764
if include :
4763
4765
if not dtype_predicate (dtype , include ):
4764
4766
return False
@@ -4769,10 +4771,16 @@ def predicate(arr: ArrayLike) -> bool:
4769
4771
4770
4772
return True
4771
4773
4772
- mgr = self ._mgr ._get_data_subset (predicate ).copy (deep = None )
4774
+ def arr_predicate (arr : ArrayLike ) -> bool :
4775
+ dtype = arr .dtype
4776
+ return predicate (dtype )
4777
+
4778
+ mgr , taker = self ._mgr ._get_data_subset (arr_predicate ).copy (deep = None )
4773
4779
# FIXME: get axes without mgr.axes
4774
- assert mgr .axes [1 ] is self .index # WTF why does passing columns/index cause segfault?
4775
- return type (self )(mgr , columns = mgr .axes [0 ], index = mgr .axes [1 ]).__finalize__ (self )
4780
+ # FIXME: return taker from _get_data_subset, this is really slow
4781
+ #taker = self.dtypes.apply(predicate).values.nonzero()[0]
4782
+ columns = self .columns .take (taker )
4783
+ return type (self )(mgr , columns = columns , index = self .index ).__finalize__ (self )
4776
4784
4777
4785
def insert (
4778
4786
self ,
@@ -4841,6 +4849,7 @@ def insert(
4841
4849
4842
4850
value = self ._sanitize_column (value )
4843
4851
self ._mgr .insert (loc , column , value )
4852
+ self ._columns = self .columns .insert (loc , column )
4844
4853
4845
4854
def assign (self , ** kwargs ) -> DataFrame :
4846
4855
r"""
@@ -6605,6 +6614,8 @@ def dropna(
6605
6614
if not inplace :
6606
6615
return result
6607
6616
self ._update_inplace (result )
6617
+ self ._columns = result ._columns
6618
+ self ._index = result ._index
6608
6619
return None
6609
6620
6610
6621
@deprecate_nonkeyword_arguments (version = None , allowed_args = ["self" , "subset" ])
@@ -6703,6 +6714,8 @@ def drop_duplicates(
6703
6714
6704
6715
if inplace :
6705
6716
self ._update_inplace (result )
6717
+ self ._index = result ._index
6718
+ self ._columns = result ._columns
6706
6719
return None
6707
6720
else :
6708
6721
return result
@@ -9268,7 +9281,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
9268
9281
axis = 0
9269
9282
9270
9283
new_data = self ._mgr .diff (n = periods , axis = axis )
9271
- return self ._constructor (new_data ).__finalize__ (self , "diff" )
9284
+ return self ._constructor (new_data , index = self . index , columns = self . columns ).__finalize__ (self , "diff" )
9272
9285
9273
9286
# ----------------------------------------------------------------------
9274
9287
# Function application
@@ -10879,8 +10892,9 @@ def _reduce(
10879
10892
# cols = self.columns[~dt64_cols]
10880
10893
# self = self[cols]
10881
10894
predicate = lambda x : not is_datetime64_any_dtype (x .dtype )
10882
- mgr = self ._mgr ._get_data_subset (predicate )
10883
- self = type (self )(mgr )
10895
+ mgr , taker = self ._mgr ._get_data_subset (predicate )
10896
+ columns = self .columns [taker ]
10897
+ self = type (self )(mgr , index = self .index , columns = columns )
10884
10898
10885
10899
# TODO: Make other agg func handle axis=None properly GH#21597
10886
10900
axis = self ._get_axis_number (axis )
@@ -10928,11 +10942,20 @@ def _get_data() -> DataFrame:
10928
10942
10929
10943
# After possibly _get_data and transposing, we are now in the
10930
10944
# simple case where we can use BlockManager.reduce
10931
- res , indexer = df ._mgr .reduce (blk_func , ignore_failures = ignore_failures )
10945
+ res , _ = df ._mgr .reduce (blk_func , ignore_failures = ignore_failures )
10932
10946
index = Index ([None ], dtype = object )
10933
10947
assert index .equals (res .axes [1 ])
10934
- columns = self .columns .take (indexer )
10935
- assert columns .equals (res .axes [0 ])
10948
+ if ignore_failures :
10949
+ if len (res .items ) == len (df .columns ):
10950
+ # i.e. nothing was dropped
10951
+ columns = df .columns
10952
+ else :
10953
+ # FIXME: get axes without mgr.axes; THIS IS WRONG TOO
10954
+ columns = res .axes [0 ]
10955
+ else :
10956
+ columns = df .columns
10957
+ assert columns .equals (res .axes [0 ])
10958
+
10936
10959
out = df ._constructor (res , index = index , columns = columns ).iloc [0 ]
10937
10960
if out_dtype is not None :
10938
10961
out = out .astype (out_dtype )
@@ -11736,8 +11759,9 @@ def _to_dict_of_blocks(self, copy: bool = True):
11736
11759
# convert to BlockManager if needed -> this way support ArrayManager as well
11737
11760
mgr = mgr_to_mgr (mgr , "block" )
11738
11761
mgr = cast (BlockManager , mgr )
11762
+ # FIXME: get axes without mgr.axes
11739
11763
return {
11740
- k : self ._constructor (v ).__finalize__ (self )
11764
+ k : self ._constructor (v , index = self . index , columns = v . axes [ 0 ] ).__finalize__ (self )
11741
11765
for k , v , in mgr .to_dict (copy = copy ).items ()
11742
11766
}
11743
11767
0 commit comments