Add orient=tight format for dictionaries

Dr-Irv · Dr-Irv · commit bed97724ea4b · 2020-07-15T13:28:00.000-04:00
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -271,6 +271,23 @@ change, as ``fsspec`` will still bring in the same packages as before.
 
 .. _fsspec docs: https://filesystem-spec.readthedocs.io/en/latest/
 
+.. _whatsnew_110.dict_tight:
+
+DataFrame.from_dict and DataFrame.to_dict have new ``'tight'`` option
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A new ``'tight'`` dictionary format that preserves :class:`MultiIndex` entries and names
+is now available, and can be used with the standard `json` library to produce a tight
+representation of :class:`DataFrame` objects (:issue:`4889`).
+
+.. ipython:: python
+
+    df = pd.DataFrame.from_records([[1, 3], [2, 4]],
+            index=pd.MultiIndex.from_tuples([("a", "b"), ("a", "c")], names=["n1", "n2"]),
+            columns=pd.MultiIndex.from_tuples([("x", 1), ("y", 2)], names=["z1", "z2"]))
+    df
+    df.to_dict(orient='tight')
+
 .. _whatsnew_110.enhancements.other:
 
 Other enhancements
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1236,15 +1236,21 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> "DataFra
         ----------
         data : dict
             Of the form {field : array-like} or {field : dict}.
-        orient : {'columns', 'index'}, default 'columns'
+        orient : {'columns', 'index', 'tight'}, default 'columns'
             The "orientation" of the data. If the keys of the passed dict
             should be the columns of the resulting DataFrame, pass 'columns'
             (default). Otherwise if the keys should be rows, pass 'index'.
+            If 'tight', assume a dict with keys ['index', 'columns', 'data',
+            'index_names', 'column_names']
+
+        .. versionadded:: 1.1.0
+           'tight' as an allowed value for the ``orient`` argument
+
         dtype : dtype, default None
             Data type to force, otherwise infer.
         columns : list, default None
             Column labels to use when ``orient='index'``. Raises a ValueError
-            if used with ``orient='columns'``.
+            if used with ``orient='columns'`` or ``orient='tight'``.
 
             .. versionadded:: 0.23.0
 
@@ -1257,6 +1263,7 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> "DataFra
         DataFrame.from_records : DataFrame from structured ndarray, sequence
             of tuples or dicts, or DataFrame.
         DataFrame : DataFrame object creation using constructor.
+        DataFrame.to_dict : Convert the DataFrame to a dictionary.
 
         Examples
         --------
@@ -1279,6 +1286,20 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> "DataFra
         row_1  3  2  1  0
         row_2  a  b  c  d
 
+        Specify ``orient='tight'`` to create the DataFrame using a 'tight'
+        format.
+        >>> data = {'index': [('a', 'b'), ('a', 'c')],
+                    'columns': [('x', 1), ('y', 2)],
+                    'data': [[1, 3], [2, 4]],
+                    'index_names': ['n1', 'n2'],
+                    'column_names': ['z1', 'z2']}
+        >>> pd.DataFrame.from_dict(data, orient='tight')
+        z1     x  y
+        z2     1  2
+        n1 n2
+        a  b   1  3
+           c   2  4
+
         When using the 'index' orientation, the column names can be
         specified manually:
 
@@ -1297,13 +1318,27 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> "DataFra
                     data = _from_nested_dict(data)
                 else:
                     data, index = list(data.values()), list(data.keys())
-        elif orient == "columns":
+        elif orient == "columns" or orient == "tight":
             if columns is not None:
-                raise ValueError("cannot use columns parameter with orient='columns'")
+                raise ValueError(f"cannot use columns parameter with orient='{orient}'")
         else:  # pragma: no cover
             raise ValueError("only recognize index or columns for orient")
 
-        return cls(data, index=index, columns=columns, dtype=dtype)
+        if orient != "tight":
+            return cls(data, index=index, columns=columns, dtype=dtype)
+        else:
+            realdata = data["data"]
+
+            def create_index(indexlist, namelist):
+                if len(namelist) > 1:
+                    index = MultiIndex.from_tuples(indexlist, names=namelist)
+                else:
+                    index = Index(indexlist, name=namelist[0])
+                return index
+
+            index = create_index(data["index"], data["index_names"])
+            columns = create_index(data["columns"], data["column_names"])
+            return cls(realdata, index=index, columns=columns, dtype=dtype)
 
     def to_numpy(
         self, dtype=None, copy: bool = False, na_value=lib.no_default
@@ -1388,13 +1423,19 @@ def to_dict(self, orient="dict", into=dict):
             - 'series' : dict like {column -> Series(values)}
             - 'split' : dict like
               {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
+            - 'tight' : dict like
+              {'index' -> [index], 'columns' -> [columns], 'data' -> [values],
+               'index_names' -> [index.names], 'column_names' -> [column.names]}
             - 'records' : list like
               [{column -> value}, ... , {column -> value}]
             - 'index' : dict like {index -> {column -> value}}
 
             Abbreviations are allowed. `s` indicates `series` and `sp`
             indicates `split`.
 
+        .. versionadded:: 1.1.0
+           'tight' as an allowed value for the ``orient`` argument
+
         into : class, default dict
             The collections.abc.Mapping subclass used for all Mappings
             in the return value.  Can be the actual class or an empty
@@ -1444,6 +1485,10 @@ def to_dict(self, orient="dict", into=dict):
         >>> df.to_dict('index')
         {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
 
+        >>> df.to_dict('tight')
+        {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
+         'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]}
+
         You can also specify the mapping type.
 
         >>> from collections import OrderedDict, defaultdict
@@ -1519,6 +1564,23 @@ def to_dict(self, orient="dict", into=dict):
                 )
             )
 
+        elif orient == "tight":
+            return into_c(
+                (
+                    ("index", self.index.tolist()),
+                    ("columns", self.columns.tolist()),
+                    (
+                        "data",
+                        [
+                            list(map(com.maybe_box_datetimelike, t))
+                            for t in self.itertuples(index=False, name=None)
+                        ],
+                    ),
+                    ("index_names", list(self.index.names)),
+                    ("column_names", list(self.columns.names)),
+                )
+            )
+
         elif orient == "series":
             return into_c((k, com.maybe_box_datetimelike(v)) for k, v in self.items())
 
diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py
@@ -5,7 +5,7 @@
 import pytest
 import pytz
 
-from pandas import DataFrame, Series, Timestamp
+from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
 import pandas._testing as tm
 
 
@@ -271,3 +271,29 @@ def test_to_dict_orient_dtype(self):
                 "c": type(df_dict["c"]),
             }
             assert result == expected
+
+    @pytest.mark.parametrize(
+        "index",
+        [
+            None,
+            Index(["aa", "bb"]),
+            Index(["aa", "bb"], name="cc"),
+            MultiIndex.from_tuples([("a", "b"), ("a", "c")]),
+            MultiIndex.from_tuples([("a", "b"), ("a", "c")], names=["n1", "n2"]),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "columns",
+        [
+            ["x", "y"],
+            Index(["x", "y"]),
+            Index(["x", "y"], name="z"),
+            MultiIndex.from_tuples([("x", 1), ("y", 2)]),
+            MultiIndex.from_tuples([("x", 1), ("y", 2)], names=["z1", "z2"]),
+        ],
+    )
+    def test_to_dict_orient_tight(self, index, columns):
+        df = DataFrame.from_records([[1, 3], [2, 4]], columns=columns, index=index,)
+        roundtrip = DataFrame.from_dict(df.to_dict(orient="tight"), orient="tight")
+
+        tm.assert_frame_equal(df, roundtrip)