61
61
)
62
62
from pandas .core .series import Series
63
63
from pandas .core .sorting import (
64
+ compress_group_index ,
65
+ decons_obs_group_ids ,
66
+ get_group_index ,
64
67
get_group_index_sorter ,
65
68
get_indexer_dict ,
66
69
)
@@ -758,51 +761,42 @@ def ids(self) -> np.ndarray:
758
761
759
762
@cache_readonly
760
763
def result_index_and_ids (self ) -> tuple [Index , np .ndarray ]:
761
- from pandas .core .sorting import (
762
- compress_group_index ,
763
- decons_obs_group_ids ,
764
- get_group_index ,
765
- )
766
-
767
- codes_and_uniques = [ping ._codes_and_uniques for ping in self .groupings ]
768
-
769
- codes = [e [0 ] for e in codes_and_uniques ]
770
- levels = [Index ._with_infer (e [1 ]) for e in codes_and_uniques ]
771
- for k , (ping , level ) in enumerate (zip (self .groupings , levels )):
772
- if ping ._passed_categorical :
773
- # TODO: Modify in Grouping.groups instead?
774
- levels [k ] = level .set_categories (ping ._orig_cats )
775
764
names = self .names
776
-
765
+ codes = [ping .codes for ping in self .groupings ]
766
+ levels = [Index ._with_infer (ping .uniques ) for ping in self .groupings ]
777
767
obs = [
778
768
ping ._observed or not ping ._passed_categorical for ping in self .groupings
779
769
]
770
+ # When passed a categorical grouping, keep all categories
771
+ for k , (ping , level ) in enumerate (zip (self .groupings , levels )):
772
+ if ping ._passed_categorical :
773
+ levels [k ] = level .set_categories (ping ._orig_cats )
780
774
781
775
if len (self .groupings ) == 1 :
782
776
result_index = levels [0 ]
783
777
result_index .name = names [0 ]
784
- ids = codes [0 ]
778
+ ids = codes [0 ]. astype ( "intp" , copy = False )
785
779
return result_index , ids
786
- elif any (obs ):
787
- ob_codes = [e for e , o in zip (codes , obs ) if o ]
788
- ob_levels = [e for e , o in zip (levels , obs ) if o ]
789
- ob_names = [e for e , o in zip (names , obs ) if o ]
780
+
781
+ if any (obs ):
782
+ ob_codes = [code for code , ob in zip (codes , obs ) if ob ]
783
+ ob_levels = [level for level , ob in zip (levels , obs ) if ob ]
784
+ ob_names = [name for name , ob in zip (names , obs ) if ob ]
790
785
791
786
shape = tuple (len (level ) for level in ob_levels )
792
787
group_index = get_group_index (ob_codes , shape , sort = True , xnull = True )
793
788
ob_ids , obs_group_ids = compress_group_index (group_index , sort = self ._sort )
794
789
ob_ids = ensure_platform_int (ob_ids )
795
- ids , obs_ids = ob_ids , obs_group_ids
796
790
ob_index_codes = decons_obs_group_ids (
797
- ids , obs_ids , shape , ob_codes , xnull = True
791
+ ob_ids , obs_group_ids , shape , ob_codes , xnull = True
798
792
)
799
-
800
793
ob_index = MultiIndex (
801
794
levels = ob_levels ,
802
795
codes = ob_index_codes ,
803
796
names = ob_names ,
804
797
verify_integrity = False ,
805
798
)
799
+
806
800
if not all (obs ):
807
801
unob_codes = [e for e , o in zip (codes , obs ) if not o ]
808
802
unob_levels = [e for e , o in zip (levels , obs ) if not o ]
@@ -811,7 +805,6 @@ def result_index_and_ids(self) -> tuple[Index, np.ndarray]:
811
805
shape = tuple (len (level ) for level in unob_levels )
812
806
unob_ids = get_group_index (unob_codes , shape , sort = True , xnull = True )
813
807
unob_ids = ensure_platform_int (unob_ids )
814
-
815
808
unob_index = MultiIndex .from_product (unob_levels , names = unob_names )
816
809
817
810
if all (obs ):
@@ -821,32 +814,35 @@ def result_index_and_ids(self) -> tuple[Index, np.ndarray]:
821
814
result_index = unob_index
822
815
ids = unob_ids
823
816
else :
824
- ob_indices = [ k for k , e in enumerate ( obs ) if e ]
817
+ # Combine unobserved and observed parts of result_index
825
818
unob_indices = [k for k , e in enumerate (obs ) if not e ]
826
- _ , index , inverse = np .unique (
827
- unob_indices + ob_indices , return_index = True , return_inverse = True
828
- )
819
+ ob_indices = [k for k , e in enumerate (obs ) if e ]
829
820
result_index_codes = np .concatenate (
830
821
[
831
822
np .tile (unob_index .codes , len (ob_index )),
832
823
np .repeat (ob_index .codes , len (unob_index ), axis = 1 ),
833
824
],
834
825
axis = 0 ,
835
826
)
827
+ _ , index = np .unique (unob_indices + ob_indices , return_index = True )
836
828
result_index = MultiIndex (
837
- levels = [ levels [ k ] for k in inverse ] ,
829
+ levels = list ( unob_index . levels ) + list ( ob_index . levels ) ,
838
830
codes = result_index_codes ,
839
- names = [ names [ k ] for k in inverse ] ,
831
+ names = list ( unob_index . names ) + list ( ob_index . names ) ,
840
832
).reorder_levels (index )
841
-
842
833
ids = len (unob_index ) * ob_ids + unob_ids
843
- sorter = result_index .argsort ()
844
- result_index = result_index .take (sorter )
845
- _ , inverse = np .unique (sorter , return_index = True )
846
- ids = inverse .take (ids )
847
834
848
- if len (levels ) == 1 :
849
- result_index = result_index .get_level_values (0 )
835
+ if self ._sort :
836
+ sorter = result_index .argsort ()
837
+ result_index = result_index .take (sorter )
838
+ _ , inverse = np .unique (sorter , return_index = True )
839
+ ids = inverse .take (ids )
840
+ else :
841
+ ids , uniques = compress_group_index (ids , sort = False )
842
+ taker = np .concatenate (
843
+ [uniques , np .delete (np .arange (len (result_index )), uniques )]
844
+ )
845
+ result_index = result_index .take (taker )
850
846
851
847
return result_index , ids
852
848
0 commit comments