@@ -1771,139 +1771,6 @@ def to_numpy(
1771
1771
1772
1772
return result
1773
1773
1774
- def _to_dict_helper (self , orient , into_c , into ):
1775
- """Helper function to do main work to convert frame into dict based on
1776
- `orient` and `into`
1777
-
1778
- As part of GH46470 also takes care in when to use maybe_box_native as this
1779
- function can perform badly and is not necessary for non object cols
1780
- """
1781
- object_dtype_cols = {
1782
- col for col , dtype in self .dtypes .items () if is_object_dtype (dtype )
1783
- }
1784
- are_all_object_dtype_cols = len (object_dtype_cols ) == len (self .dtypes )
1785
- if orient == "dict" :
1786
- return into_c ((k , v .to_dict (into )) for k , v in self .items ())
1787
- elif orient == "list" :
1788
- return into_c (
1789
- (
1790
- k ,
1791
- list (map (maybe_box_native , v .tolist ()))
1792
- if k in object_dtype_cols
1793
- else v .tolist (),
1794
- )
1795
- for k , v in self .items ()
1796
- )
1797
- elif orient == "split" :
1798
- if are_all_object_dtype_cols :
1799
- data = [
1800
- list (map (maybe_box_native , t ))
1801
- for t in self .itertuples (index = False , name = None )
1802
- ]
1803
- elif object_dtype_cols :
1804
- is_object_dtype_by_index = [
1805
- col in object_dtype_cols for col in self .columns
1806
- ]
1807
- data = [
1808
- [
1809
- maybe_box_native (v ) if is_object_dtype_by_index [i ] else v
1810
- for i , v in enumerate (t )
1811
- ]
1812
- for t in self .itertuples (index = False , name = None )
1813
- ]
1814
- else :
1815
- data = [list (t ) for t in self .itertuples (index = False , name = None )]
1816
- return into_c (
1817
- (
1818
- ("index" , self .index .tolist ()),
1819
- ("columns" , self .columns .tolist ()),
1820
- ("data" , data ),
1821
- )
1822
- )
1823
- elif orient == "series" :
1824
- return into_c ((k , v ) for k , v in self .items ())
1825
- elif orient == "records" :
1826
- columns = self .columns .tolist ()
1827
- if object_dtype_cols :
1828
- is_object_dtype_by_index = [col in object_dtype_cols for col in columns ]
1829
- return [
1830
- into_c (
1831
- zip (
1832
- columns ,
1833
- [
1834
- maybe_box_native (v )
1835
- if is_object_dtype_by_index [i ]
1836
- else v
1837
- for i , v in enumerate (t )
1838
- ],
1839
- )
1840
- )
1841
- for t in self .itertuples (index = False , name = None )
1842
- ]
1843
- else :
1844
- return [
1845
- into_c (zip (columns , t ))
1846
- for t in self .itertuples (index = False , name = None )
1847
- ]
1848
- elif orient == "index" :
1849
- if not self .index .is_unique :
1850
- raise ValueError ("DataFrame index must be unique for orient='index'." )
1851
- columns = self .columns .tolist ()
1852
- if object_dtype_cols :
1853
- is_object_dtype_by_index = [
1854
- col in object_dtype_cols for col in self .columns
1855
- ]
1856
- return into_c (
1857
- (
1858
- t [0 ],
1859
- {
1860
- columns [i ]: maybe_box_native (v )
1861
- if is_object_dtype_by_index [i ]
1862
- else v
1863
- for i , v in enumerate (t [1 :])
1864
- },
1865
- )
1866
- for t in self .itertuples (name = None )
1867
- )
1868
- else :
1869
- return into_c (
1870
- (
1871
- t [0 ],
1872
- {columns [i ]: v for i , v in enumerate (t [1 :])},
1873
- )
1874
- for t in self .itertuples (name = None )
1875
- )
1876
- elif orient == "tight" :
1877
- if are_all_object_dtype_cols :
1878
- data = [
1879
- list (map (maybe_box_native , t ))
1880
- for t in self .itertuples (index = False , name = None )
1881
- ]
1882
- elif object_dtype_cols :
1883
- is_object_dtype_by_index = [
1884
- col in object_dtype_cols for col in self .columns
1885
- ]
1886
- data = [
1887
- [
1888
- maybe_box_native (v ) if is_object_dtype_by_index [i ] else v
1889
- for i , v in enumerate (t )
1890
- ]
1891
- for t in self .itertuples (index = False , name = None )
1892
- ]
1893
- else :
1894
- data = [list (t ) for t in self .itertuples (index = False , name = None )]
1895
- return into_c (
1896
- (
1897
- ("index" , self .index .tolist ()),
1898
- ("columns" , self .columns .tolist ()),
1899
- ("data" , data ),
1900
- ("index_names" , list (self .index .names )),
1901
- ("column_names" , list (self .columns .names )),
1902
- )
1903
- )
1904
- else :
1905
- raise ValueError (f"orient '{ orient } ' not understood" )
1906
-
1907
1774
def to_dict (self , orient : str = "dict" , into = dict ):
1908
1775
"""
1909
1776
Convert the DataFrame to a dictionary.
@@ -2042,67 +1909,129 @@ def to_dict(self, orient: str = "dict", into=dict):
2042
1909
elif orient .startswith ("i" ):
2043
1910
orient = "index"
2044
1911
1912
+ object_dtype_cols = {
1913
+ col for col , dtype in self .dtypes .items () if is_object_dtype (dtype )
1914
+ }
1915
+ are_all_object_dtype_cols = len (object_dtype_cols ) == len (self .dtypes )
2045
1916
if orient == "dict" :
2046
1917
return into_c ((k , v .to_dict (into )) for k , v in self .items ())
2047
-
2048
1918
elif orient == "list" :
2049
- return into_c (
2050
- (k , list (map (maybe_box_native , v .tolist ()))) for k , v in self .items ()
2051
- )
2052
-
2053
- elif orient == "split" :
2054
1919
return into_c (
2055
1920
(
2056
- ("index" , self .index .tolist ()),
2057
- ("columns" , self .columns .tolist ()),
2058
- (
2059
- "data" ,
2060
- [
2061
- list (map (maybe_box_native , t ))
2062
- for t in self .itertuples (index = False , name = None )
2063
- ],
2064
- ),
1921
+ k ,
1922
+ list (map (maybe_box_native , v .tolist ()))
1923
+ if k in object_dtype_cols
1924
+ else v .tolist (),
2065
1925
)
1926
+ for k , v in self .items ()
2066
1927
)
2067
-
2068
- elif orient == "tight" :
1928
+ elif orient == "split" :
1929
+ if are_all_object_dtype_cols :
1930
+ data = [
1931
+ list (map (maybe_box_native , t ))
1932
+ for t in self .itertuples (index = False , name = None )
1933
+ ]
1934
+ elif object_dtype_cols :
1935
+ is_object_dtype_by_index = [
1936
+ col in object_dtype_cols for col in self .columns
1937
+ ]
1938
+ data = [
1939
+ [
1940
+ maybe_box_native (v ) if is_object_dtype_by_index [i ] else v
1941
+ for i , v in enumerate (t )
1942
+ ]
1943
+ for t in self .itertuples (index = False , name = None )
1944
+ ]
1945
+ else :
1946
+ data = [list (t ) for t in self .itertuples (index = False , name = None )]
2069
1947
return into_c (
2070
1948
(
2071
1949
("index" , self .index .tolist ()),
2072
1950
("columns" , self .columns .tolist ()),
2073
- (
2074
- "data" ,
2075
- [
2076
- list (map (maybe_box_native , t ))
2077
- for t in self .itertuples (index = False , name = None )
2078
- ],
2079
- ),
2080
- ("index_names" , list (self .index .names )),
2081
- ("column_names" , list (self .columns .names )),
1951
+ ("data" , data ),
2082
1952
)
2083
1953
)
2084
-
2085
1954
elif orient == "series" :
2086
1955
return into_c ((k , v ) for k , v in self .items ())
2087
-
2088
1956
elif orient == "records" :
2089
1957
columns = self .columns .tolist ()
2090
- rows = (
2091
- dict (zip (columns , row ))
2092
- for row in self .itertuples (index = False , name = None )
2093
- )
2094
- return [
2095
- into_c ((k , maybe_box_native (v )) for k , v in row .items ()) for row in rows
2096
- ]
2097
-
1958
+ if object_dtype_cols :
1959
+ is_object_dtype_by_index = [col in object_dtype_cols for col in columns ]
1960
+ return [
1961
+ into_c (
1962
+ zip (
1963
+ columns ,
1964
+ [
1965
+ maybe_box_native (v )
1966
+ if is_object_dtype_by_index [i ]
1967
+ else v
1968
+ for i , v in enumerate (t )
1969
+ ],
1970
+ )
1971
+ )
1972
+ for t in self .itertuples (index = False , name = None )
1973
+ ]
1974
+ else :
1975
+ return [
1976
+ into_c (zip (columns , t ))
1977
+ for t in self .itertuples (index = False , name = None )
1978
+ ]
2098
1979
elif orient == "index" :
2099
1980
if not self .index .is_unique :
2100
1981
raise ValueError ("DataFrame index must be unique for orient='index'." )
1982
+ columns = self .columns .tolist ()
1983
+ if object_dtype_cols :
1984
+ is_object_dtype_by_index = [
1985
+ col in object_dtype_cols for col in self .columns
1986
+ ]
1987
+ return into_c (
1988
+ (
1989
+ t [0 ],
1990
+ {
1991
+ columns [i ]: maybe_box_native (v )
1992
+ if is_object_dtype_by_index [i ]
1993
+ else v
1994
+ for i , v in enumerate (t [1 :])
1995
+ },
1996
+ )
1997
+ for t in self .itertuples (name = None )
1998
+ )
1999
+ else :
2000
+ return into_c (
2001
+ (
2002
+ t [0 ],
2003
+ {columns [i ]: v for i , v in enumerate (t [1 :])},
2004
+ )
2005
+ for t in self .itertuples (name = None )
2006
+ )
2007
+ elif orient == "tight" :
2008
+ if are_all_object_dtype_cols :
2009
+ data = [
2010
+ list (map (maybe_box_native , t ))
2011
+ for t in self .itertuples (index = False , name = None )
2012
+ ]
2013
+ elif object_dtype_cols :
2014
+ is_object_dtype_by_index = [
2015
+ col in object_dtype_cols for col in self .columns
2016
+ ]
2017
+ data = [
2018
+ [
2019
+ maybe_box_native (v ) if is_object_dtype_by_index [i ] else v
2020
+ for i , v in enumerate (t )
2021
+ ]
2022
+ for t in self .itertuples (index = False , name = None )
2023
+ ]
2024
+ else :
2025
+ data = [list (t ) for t in self .itertuples (index = False , name = None )]
2101
2026
return into_c (
2102
- (t [0 ], dict (zip (self .columns , map (maybe_box_native , t [1 :]))))
2103
- for t in self .itertuples (name = None )
2027
+ (
2028
+ ("index" , self .index .tolist ()),
2029
+ ("columns" , self .columns .tolist ()),
2030
+ ("data" , data ),
2031
+ ("index_names" , list (self .index .names )),
2032
+ ("column_names" , list (self .columns .names )),
2033
+ )
2104
2034
)
2105
-
2106
2035
else :
2107
2036
raise ValueError (f"orient '{ orient } ' not understood" )
2108
2037
0 commit comments