Skip to content

Commit d9f9786

Browse files
author
Roger Thomas
committed
Update
1 parent 8d93fec commit d9f9786

File tree

1 file changed

+101
-172
lines changed

1 file changed

+101
-172
lines changed

pandas/core/frame.py

Lines changed: 101 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,139 +1771,6 @@ def to_numpy(
17711771

17721772
return result
17731773

1774-
def _to_dict_helper(self, orient, into_c, into):
1775-
"""Helper function to do main work to convert frame into dict based on
1776-
`orient` and `into`
1777-
1778-
As part of GH46470 also takes care in when to use maybe_box_native as this
1779-
function can perform badly and is not necessary for non object cols
1780-
"""
1781-
object_dtype_cols = {
1782-
col for col, dtype in self.dtypes.items() if is_object_dtype(dtype)
1783-
}
1784-
are_all_object_dtype_cols = len(object_dtype_cols) == len(self.dtypes)
1785-
if orient == "dict":
1786-
return into_c((k, v.to_dict(into)) for k, v in self.items())
1787-
elif orient == "list":
1788-
return into_c(
1789-
(
1790-
k,
1791-
list(map(maybe_box_native, v.tolist()))
1792-
if k in object_dtype_cols
1793-
else v.tolist(),
1794-
)
1795-
for k, v in self.items()
1796-
)
1797-
elif orient == "split":
1798-
if are_all_object_dtype_cols:
1799-
data = [
1800-
list(map(maybe_box_native, t))
1801-
for t in self.itertuples(index=False, name=None)
1802-
]
1803-
elif object_dtype_cols:
1804-
is_object_dtype_by_index = [
1805-
col in object_dtype_cols for col in self.columns
1806-
]
1807-
data = [
1808-
[
1809-
maybe_box_native(v) if is_object_dtype_by_index[i] else v
1810-
for i, v in enumerate(t)
1811-
]
1812-
for t in self.itertuples(index=False, name=None)
1813-
]
1814-
else:
1815-
data = [list(t) for t in self.itertuples(index=False, name=None)]
1816-
return into_c(
1817-
(
1818-
("index", self.index.tolist()),
1819-
("columns", self.columns.tolist()),
1820-
("data", data),
1821-
)
1822-
)
1823-
elif orient == "series":
1824-
return into_c((k, v) for k, v in self.items())
1825-
elif orient == "records":
1826-
columns = self.columns.tolist()
1827-
if object_dtype_cols:
1828-
is_object_dtype_by_index = [col in object_dtype_cols for col in columns]
1829-
return [
1830-
into_c(
1831-
zip(
1832-
columns,
1833-
[
1834-
maybe_box_native(v)
1835-
if is_object_dtype_by_index[i]
1836-
else v
1837-
for i, v in enumerate(t)
1838-
],
1839-
)
1840-
)
1841-
for t in self.itertuples(index=False, name=None)
1842-
]
1843-
else:
1844-
return [
1845-
into_c(zip(columns, t))
1846-
for t in self.itertuples(index=False, name=None)
1847-
]
1848-
elif orient == "index":
1849-
if not self.index.is_unique:
1850-
raise ValueError("DataFrame index must be unique for orient='index'.")
1851-
columns = self.columns.tolist()
1852-
if object_dtype_cols:
1853-
is_object_dtype_by_index = [
1854-
col in object_dtype_cols for col in self.columns
1855-
]
1856-
return into_c(
1857-
(
1858-
t[0],
1859-
{
1860-
columns[i]: maybe_box_native(v)
1861-
if is_object_dtype_by_index[i]
1862-
else v
1863-
for i, v in enumerate(t[1:])
1864-
},
1865-
)
1866-
for t in self.itertuples(name=None)
1867-
)
1868-
else:
1869-
return into_c(
1870-
(
1871-
t[0],
1872-
{columns[i]: v for i, v in enumerate(t[1:])},
1873-
)
1874-
for t in self.itertuples(name=None)
1875-
)
1876-
elif orient == "tight":
1877-
if are_all_object_dtype_cols:
1878-
data = [
1879-
list(map(maybe_box_native, t))
1880-
for t in self.itertuples(index=False, name=None)
1881-
]
1882-
elif object_dtype_cols:
1883-
is_object_dtype_by_index = [
1884-
col in object_dtype_cols for col in self.columns
1885-
]
1886-
data = [
1887-
[
1888-
maybe_box_native(v) if is_object_dtype_by_index[i] else v
1889-
for i, v in enumerate(t)
1890-
]
1891-
for t in self.itertuples(index=False, name=None)
1892-
]
1893-
else:
1894-
data = [list(t) for t in self.itertuples(index=False, name=None)]
1895-
return into_c(
1896-
(
1897-
("index", self.index.tolist()),
1898-
("columns", self.columns.tolist()),
1899-
("data", data),
1900-
("index_names", list(self.index.names)),
1901-
("column_names", list(self.columns.names)),
1902-
)
1903-
)
1904-
else:
1905-
raise ValueError(f"orient '{orient}' not understood")
1906-
19071774
def to_dict(self, orient: str = "dict", into=dict):
19081775
"""
19091776
Convert the DataFrame to a dictionary.
@@ -2042,67 +1909,129 @@ def to_dict(self, orient: str = "dict", into=dict):
20421909
elif orient.startswith("i"):
20431910
orient = "index"
20441911

1912+
object_dtype_cols = {
1913+
col for col, dtype in self.dtypes.items() if is_object_dtype(dtype)
1914+
}
1915+
are_all_object_dtype_cols = len(object_dtype_cols) == len(self.dtypes)
20451916
if orient == "dict":
20461917
return into_c((k, v.to_dict(into)) for k, v in self.items())
2047-
20481918
elif orient == "list":
2049-
return into_c(
2050-
(k, list(map(maybe_box_native, v.tolist()))) for k, v in self.items()
2051-
)
2052-
2053-
elif orient == "split":
20541919
return into_c(
20551920
(
2056-
("index", self.index.tolist()),
2057-
("columns", self.columns.tolist()),
2058-
(
2059-
"data",
2060-
[
2061-
list(map(maybe_box_native, t))
2062-
for t in self.itertuples(index=False, name=None)
2063-
],
2064-
),
1921+
k,
1922+
list(map(maybe_box_native, v.tolist()))
1923+
if k in object_dtype_cols
1924+
else v.tolist(),
20651925
)
1926+
for k, v in self.items()
20661927
)
2067-
2068-
elif orient == "tight":
1928+
elif orient == "split":
1929+
if are_all_object_dtype_cols:
1930+
data = [
1931+
list(map(maybe_box_native, t))
1932+
for t in self.itertuples(index=False, name=None)
1933+
]
1934+
elif object_dtype_cols:
1935+
is_object_dtype_by_index = [
1936+
col in object_dtype_cols for col in self.columns
1937+
]
1938+
data = [
1939+
[
1940+
maybe_box_native(v) if is_object_dtype_by_index[i] else v
1941+
for i, v in enumerate(t)
1942+
]
1943+
for t in self.itertuples(index=False, name=None)
1944+
]
1945+
else:
1946+
data = [list(t) for t in self.itertuples(index=False, name=None)]
20691947
return into_c(
20701948
(
20711949
("index", self.index.tolist()),
20721950
("columns", self.columns.tolist()),
2073-
(
2074-
"data",
2075-
[
2076-
list(map(maybe_box_native, t))
2077-
for t in self.itertuples(index=False, name=None)
2078-
],
2079-
),
2080-
("index_names", list(self.index.names)),
2081-
("column_names", list(self.columns.names)),
1951+
("data", data),
20821952
)
20831953
)
2084-
20851954
elif orient == "series":
20861955
return into_c((k, v) for k, v in self.items())
2087-
20881956
elif orient == "records":
20891957
columns = self.columns.tolist()
2090-
rows = (
2091-
dict(zip(columns, row))
2092-
for row in self.itertuples(index=False, name=None)
2093-
)
2094-
return [
2095-
into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows
2096-
]
2097-
1958+
if object_dtype_cols:
1959+
is_object_dtype_by_index = [col in object_dtype_cols for col in columns]
1960+
return [
1961+
into_c(
1962+
zip(
1963+
columns,
1964+
[
1965+
maybe_box_native(v)
1966+
if is_object_dtype_by_index[i]
1967+
else v
1968+
for i, v in enumerate(t)
1969+
],
1970+
)
1971+
)
1972+
for t in self.itertuples(index=False, name=None)
1973+
]
1974+
else:
1975+
return [
1976+
into_c(zip(columns, t))
1977+
for t in self.itertuples(index=False, name=None)
1978+
]
20981979
elif orient == "index":
20991980
if not self.index.is_unique:
21001981
raise ValueError("DataFrame index must be unique for orient='index'.")
1982+
columns = self.columns.tolist()
1983+
if object_dtype_cols:
1984+
is_object_dtype_by_index = [
1985+
col in object_dtype_cols for col in self.columns
1986+
]
1987+
return into_c(
1988+
(
1989+
t[0],
1990+
{
1991+
columns[i]: maybe_box_native(v)
1992+
if is_object_dtype_by_index[i]
1993+
else v
1994+
for i, v in enumerate(t[1:])
1995+
},
1996+
)
1997+
for t in self.itertuples(name=None)
1998+
)
1999+
else:
2000+
return into_c(
2001+
(
2002+
t[0],
2003+
{columns[i]: v for i, v in enumerate(t[1:])},
2004+
)
2005+
for t in self.itertuples(name=None)
2006+
)
2007+
elif orient == "tight":
2008+
if are_all_object_dtype_cols:
2009+
data = [
2010+
list(map(maybe_box_native, t))
2011+
for t in self.itertuples(index=False, name=None)
2012+
]
2013+
elif object_dtype_cols:
2014+
is_object_dtype_by_index = [
2015+
col in object_dtype_cols for col in self.columns
2016+
]
2017+
data = [
2018+
[
2019+
maybe_box_native(v) if is_object_dtype_by_index[i] else v
2020+
for i, v in enumerate(t)
2021+
]
2022+
for t in self.itertuples(index=False, name=None)
2023+
]
2024+
else:
2025+
data = [list(t) for t in self.itertuples(index=False, name=None)]
21012026
return into_c(
2102-
(t[0], dict(zip(self.columns, map(maybe_box_native, t[1:]))))
2103-
for t in self.itertuples(name=None)
2027+
(
2028+
("index", self.index.tolist()),
2029+
("columns", self.columns.tolist()),
2030+
("data", data),
2031+
("index_names", list(self.index.names)),
2032+
("column_names", list(self.columns.names)),
2033+
)
21042034
)
2105-
21062035
else:
21072036
raise ValueError(f"orient '{orient}' not understood")
21082037

0 commit comments

Comments
 (0)