Skip to content

Commit bed9772

Browse files
committed
Add orient=tight format for dictionaries
1 parent 492e3e9 commit bed9772

File tree

3 files changed

+111
-6
lines changed

3 files changed

+111
-6
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,23 @@ change, as ``fsspec`` will still bring in the same packages as before.
271271

272272
.. _fsspec docs: https://filesystem-spec.readthedocs.io/en/latest/
273273

274+
.. _whatsnew_110.dict_tight:
275+
276+
DataFrame.from_dict and DataFrame.to_dict have new ``'tight'`` option
277+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
278+
279+
A new ``'tight'`` dictionary format that preserves :class:`MultiIndex` entries and names
280+
is now available, and can be used with the standard `json` library to produce a tight
281+
representation of :class:`DataFrame` objects (:issue:`4889`).
282+
283+
.. ipython:: python
284+
285+
df = pd.DataFrame.from_records([[1, 3], [2, 4]],
286+
index=pd.MultiIndex.from_tuples([("a", "b"), ("a", "c")], names=["n1", "n2"]),
287+
columns=pd.MultiIndex.from_tuples([("x", 1), ("y", 2)], names=["z1", "z2"]))
288+
df
289+
df.to_dict(orient='tight')
290+
274291
.. _whatsnew_110.enhancements.other:
275292

276293
Other enhancements

pandas/core/frame.py

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,15 +1236,21 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> "DataFra
12361236
----------
12371237
data : dict
12381238
Of the form {field : array-like} or {field : dict}.
1239-
orient : {'columns', 'index'}, default 'columns'
1239+
orient : {'columns', 'index', 'tight'}, default 'columns'
12401240
The "orientation" of the data. If the keys of the passed dict
12411241
should be the columns of the resulting DataFrame, pass 'columns'
12421242
(default). Otherwise if the keys should be rows, pass 'index'.
1243+
If 'tight', assume a dict with keys ['index', 'columns', 'data',
1244+
'index_names', 'column_names']
1245+
1246+
.. versionadded:: 1.1.0
1247+
'tight' as an allowed value for the ``orient`` argument
1248+
12431249
dtype : dtype, default None
12441250
Data type to force, otherwise infer.
12451251
columns : list, default None
12461252
Column labels to use when ``orient='index'``. Raises a ValueError
1247-
if used with ``orient='columns'``.
1253+
if used with ``orient='columns'`` or ``orient='tight'``.
12481254
12491255
.. versionadded:: 0.23.0
12501256
@@ -1257,6 +1263,7 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> "DataFra
12571263
DataFrame.from_records : DataFrame from structured ndarray, sequence
12581264
of tuples or dicts, or DataFrame.
12591265
DataFrame : DataFrame object creation using constructor.
1266+
DataFrame.to_dict : Convert the DataFrame to a dictionary.
12601267
12611268
Examples
12621269
--------
@@ -1279,6 +1286,20 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> "DataFra
12791286
row_1 3 2 1 0
12801287
row_2 a b c d
12811288
1289+
Specify ``orient='tight'`` to create the DataFrame using a 'tight'
1290+
format.
1291+
>>> data = {'index': [('a', 'b'), ('a', 'c')],
1292+
'columns': [('x', 1), ('y', 2)],
1293+
'data': [[1, 3], [2, 4]],
1294+
'index_names': ['n1', 'n2'],
1295+
'column_names': ['z1', 'z2']}
1296+
>>> pd.DataFrame.from_dict(data, orient='tight')
1297+
z1 x y
1298+
z2 1 2
1299+
n1 n2
1300+
a b 1 3
1301+
c 2 4
1302+
12821303
When using the 'index' orientation, the column names can be
12831304
specified manually:
12841305
@@ -1297,13 +1318,27 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> "DataFra
12971318
data = _from_nested_dict(data)
12981319
else:
12991320
data, index = list(data.values()), list(data.keys())
1300-
elif orient == "columns":
1321+
elif orient == "columns" or orient == "tight":
13011322
if columns is not None:
1302-
raise ValueError("cannot use columns parameter with orient='columns'")
1323+
raise ValueError(f"cannot use columns parameter with orient='{orient}'")
13031324
else: # pragma: no cover
13041325
raise ValueError("only recognize index or columns for orient")
13051326

1306-
return cls(data, index=index, columns=columns, dtype=dtype)
1327+
if orient != "tight":
1328+
return cls(data, index=index, columns=columns, dtype=dtype)
1329+
else:
1330+
realdata = data["data"]
1331+
1332+
def create_index(indexlist, namelist):
1333+
if len(namelist) > 1:
1334+
index = MultiIndex.from_tuples(indexlist, names=namelist)
1335+
else:
1336+
index = Index(indexlist, name=namelist[0])
1337+
return index
1338+
1339+
index = create_index(data["index"], data["index_names"])
1340+
columns = create_index(data["columns"], data["column_names"])
1341+
return cls(realdata, index=index, columns=columns, dtype=dtype)
13071342

13081343
def to_numpy(
13091344
self, dtype=None, copy: bool = False, na_value=lib.no_default
@@ -1388,13 +1423,19 @@ def to_dict(self, orient="dict", into=dict):
13881423
- 'series' : dict like {column -> Series(values)}
13891424
- 'split' : dict like
13901425
{'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
1426+
- 'tight' : dict like
1427+
{'index' -> [index], 'columns' -> [columns], 'data' -> [values],
1428+
'index_names' -> [index.names], 'column_names' -> [column.names]}
13911429
- 'records' : list like
13921430
[{column -> value}, ... , {column -> value}]
13931431
- 'index' : dict like {index -> {column -> value}}
13941432
13951433
Abbreviations are allowed. `s` indicates `series` and `sp`
13961434
indicates `split`.
13971435
1436+
.. versionadded:: 1.1.0
1437+
'tight' as an allowed value for the ``orient`` argument
1438+
13981439
into : class, default dict
13991440
The collections.abc.Mapping subclass used for all Mappings
14001441
in the return value. Can be the actual class or an empty
@@ -1444,6 +1485,10 @@ def to_dict(self, orient="dict", into=dict):
14441485
>>> df.to_dict('index')
14451486
{'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
14461487
1488+
>>> df.to_dict('tight')
1489+
{'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
1490+
'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]}
1491+
14471492
You can also specify the mapping type.
14481493
14491494
>>> from collections import OrderedDict, defaultdict
@@ -1519,6 +1564,23 @@ def to_dict(self, orient="dict", into=dict):
15191564
)
15201565
)
15211566

1567+
elif orient == "tight":
1568+
return into_c(
1569+
(
1570+
("index", self.index.tolist()),
1571+
("columns", self.columns.tolist()),
1572+
(
1573+
"data",
1574+
[
1575+
list(map(com.maybe_box_datetimelike, t))
1576+
for t in self.itertuples(index=False, name=None)
1577+
],
1578+
),
1579+
("index_names", list(self.index.names)),
1580+
("column_names", list(self.columns.names)),
1581+
)
1582+
)
1583+
15221584
elif orient == "series":
15231585
return into_c((k, com.maybe_box_datetimelike(v)) for k, v in self.items())
15241586

pandas/tests/frame/methods/test_to_dict.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pytest
66
import pytz
77

8-
from pandas import DataFrame, Series, Timestamp
8+
from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
99
import pandas._testing as tm
1010

1111

@@ -271,3 +271,29 @@ def test_to_dict_orient_dtype(self):
271271
"c": type(df_dict["c"]),
272272
}
273273
assert result == expected
274+
275+
@pytest.mark.parametrize(
276+
"index",
277+
[
278+
None,
279+
Index(["aa", "bb"]),
280+
Index(["aa", "bb"], name="cc"),
281+
MultiIndex.from_tuples([("a", "b"), ("a", "c")]),
282+
MultiIndex.from_tuples([("a", "b"), ("a", "c")], names=["n1", "n2"]),
283+
],
284+
)
285+
@pytest.mark.parametrize(
286+
"columns",
287+
[
288+
["x", "y"],
289+
Index(["x", "y"]),
290+
Index(["x", "y"], name="z"),
291+
MultiIndex.from_tuples([("x", 1), ("y", 2)]),
292+
MultiIndex.from_tuples([("x", 1), ("y", 2)], names=["z1", "z2"]),
293+
],
294+
)
295+
def test_to_dict_orient_tight(self, index, columns):
296+
df = DataFrame.from_records([[1, 3], [2, 4]], columns=columns, index=index,)
297+
roundtrip = DataFrame.from_dict(df.to_dict(orient="tight"), orient="tight")
298+
299+
tm.assert_frame_equal(df, roundtrip)

0 commit comments

Comments
 (0)