initialization from dicts for py>=3.6 maintains insertion order

tp · tp · commit 4e572534541f · 2018-02-24T19:05:09.000Z
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -3,7 +3,7 @@
 v0.23.0
 -------
 
-This is a major release from 0.21.1 and includes a number of API changes,
+This is a major release from 0.22.0 and includes a number of API changes,
 deprecations, new features, enhancements, and performance improvements along
 with a large number of bug fixes. We recommend that all users upgrade to this
 version.
@@ -240,7 +240,7 @@ The :func:`DataFrame.assign` now accepts dependent keyword arguments for python
   using ``.assign()`` to update an existing column. Previously, callables
   referring to other variables being updated would get the "old" values
 
-  Previous Behaviour:
+  Previous behaviour:
 
   .. code-block:: ipython
 
@@ -253,7 +253,7 @@ The :func:`DataFrame.assign` now accepts dependent keyword arguments for python
       1  3 -2
       2  4 -3
 
-  New Behaviour:
+  New behaviour:
 
   .. ipython:: python
 
@@ -320,6 +320,57 @@ If installed, we now require:
 | openpyxl        | 2.4.0           |          |
 +-----------------+-----------------+----------+
 
+.. _whatsnew_0230.api_breaking.dict_insertion_order:
+
+Creating dataframes and series from dicts preserves dict insertion order for python 3.6+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Until Python 3.6, dicts in Python had no formally defined ordering. Python
+version 3.6 and later have changed the ordering definition of dicts, so dicts
+in these newer versions are ordered by insertion order
+(see also `PEP 468 <https://www.python.org/dev/peps/pep-0468/>`_).
+Pandas will from version 0.23 use insertion order, when creating series or
+data frames from dicts (:issue:`19018`) .
+
+Previous behaviour (and current behaviour if on Python < 3.6):
+
+.. code-block:: ipython
+
+   In [1]: pd.Series({'Income': 2000,
+   ...                 'Expenses': -1500,
+   ...                 'Taxes': -200,
+   ...                 'Net result': 300})
+   Expenses     -1500
+   Income        2000
+   Net result     300
+   Taxes         -200
+   dtype: int64
+
+Note the series above is ordered alphabetically by the index values.
+
+New behaviour (for Python >= 3.6):
+
+.. ipython:: python
+
+    pd.Series({'Income': 2000,
+               'Expenses': -1500,
+               'Taxes': -200,
+               'Net result': 300})
+
+Notice that the series is now ordered by insertion order. This new behaviour is
+used for all relevant pandas types (``Series``, ``DataFrame``, ``SparseSeries``
+and ``SparseDataFrame``).
+
+If you wish to retain the old behaviour while using Python >= 3.6, you can use
+``sort_index``:
+
+.. ipython:: python
+
+    pd.Series({'Income': 2000,
+               'Expenses': -1500,
+               'Taxes': -200,
+               'Net result': 300}).sort_index()
+
 .. _whatsnew_0230.api_breaking.deprecate_panel:
 
 Deprecate Panel
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -460,7 +460,7 @@ def _init_dict(self, data, index, columns, dtype=None):
 
         else:
             keys = list(data.keys())
-            if not isinstance(data, OrderedDict):
+            if not PY36 and not isinstance(data, OrderedDict):
                 keys = com._try_sort(keys)
             columns = data_names = Index(keys)
             arrays = [data[k] for k in keys]
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -54,7 +54,7 @@
 from pandas import compat
 from pandas.io.formats.terminal import get_terminal_size
 from pandas.compat import (
-    zip, u, OrderedDict, StringIO, range, get_range_parameters)
+    zip, u, OrderedDict, StringIO, range, get_range_parameters, PY36)
 from pandas.compat.numpy import function as nv
 
 import pandas.core.ops as ops
@@ -286,7 +286,7 @@ def _init_dict(self, data, index=None, dtype=None):
         # Now we just make sure the order is respected, if any
         if index is not None:
             s = s.reindex(index, copy=False)
-        elif not isinstance(data, OrderedDict):
+        elif not PY36 and not isinstance(data, OrderedDict):
             try:
                 s = s.sort_index()
             except TypeError:
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
@@ -6,7 +6,7 @@
 # pylint: disable=E1101,E1103,W0231,E0202
 
 import warnings
-from pandas.compat import lmap
+from pandas.compat import lmap, OrderedDict, PY36
 from pandas import compat
 import numpy as np
 
@@ -138,7 +138,10 @@ def _init_dict(self, data, index, columns, dtype=None):
             columns = _ensure_index(columns)
             data = {k: v for k, v in compat.iteritems(data) if k in columns}
         else:
-            columns = Index(com._try_sort(list(data.keys())))
+            keys = list(data.keys())
+            if not PY36 and not isinstance(data, OrderedDict):
+                keys = com._try_sort(keys)
+            columns = Index(keys)
 
         if index is None:
             index = extract_index(list(data.values()))
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -15,7 +15,7 @@
 
 from pandas.core.dtypes.common import is_integer_dtype
 from pandas.compat import (lmap, long, zip, range, lrange, lzip,
-                           OrderedDict, is_platform_little_endian)
+                           OrderedDict, is_platform_little_endian, PY36)
 from pandas import compat
 from pandas import (DataFrame, Index, Series, isna,
                     MultiIndex, Timedelta, Timestamp,
@@ -290,6 +290,18 @@ def test_constructor_dict(self):
         with tm.assert_raises_regex(ValueError, msg):
             DataFrame({'a': 0.7}, columns=['b'])
 
+    def test_constructor_dict_order(self):
+        # GH19018
+        # initialization ordering: by insertion order if python>= 3.6, else
+        # order by value
+        d = {'b': self.ts2, 'a': self.ts1}
+        frame = DataFrame(data=d)
+        if compat.PY36:
+            expected = DataFrame(data=d, columns=list('ba'))
+        else:
+            expected = DataFrame(data=d, columns=list('ab'))
+        tm.assert_frame_equal(frame, expected)
+
     def test_constructor_multi_index(self):
         # GH 4078
         # construction error with mi and all-nan frame
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -22,7 +22,7 @@
 from pandas._libs import lib
 from pandas._libs.tslib import iNaT
 
-from pandas.compat import lrange, range, zip, long
+from pandas.compat import lrange, range, zip, long, PY36
 from pandas.util.testing import assert_series_equal
 import pandas.util.testing as tm
 
@@ -783,6 +783,18 @@ def test_constructor_dict(self):
         expected.iloc[1] = 1
         assert_series_equal(result, expected)
 
+    def test_constructor_dict_order(self):
+        # GH19018
+        # initialization ordering: by insertion order if python>= 3.6, else
+        # order by value
+        d = {'b': 1, 'a': 0, 'c': 2}
+        result = Series(d)
+        if PY36:
+            expected = Series([1, 0, 2], index=list('bac'))
+        else:
+            expected = Series([0, 1, 2], index=list('abc'))
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize("value", [2, np.nan, None, float('nan')])
     def test_constructor_dict_nan_key(self, value):
         # GH 18480
diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
@@ -139,6 +139,18 @@ def test_constructor(self):
 
         repr(self.frame)
 
+    def test_constructor_dict_order(self):
+        # GH19018
+        # initialization ordering: by insertion order if python>= 3.6, else
+        # order by value
+        d = {'b': [2, 3], 'a': [0, 1]}
+        frame = SparseDataFrame(data=d)
+        if compat.PY36:
+            expected = SparseDataFrame(data=d, columns=list('ba'))
+        else:
+            expected = SparseDataFrame(data=d, columns=list('ab'))
+        tm.assert_sp_frame_equal(frame, expected)
+
     def test_constructor_ndarray(self):
         # no index or columns
         sp = SparseDataFrame(self.frame.values)
diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
@@ -14,7 +14,7 @@
 from pandas.tseries.offsets import BDay
 import pandas.util.testing as tm
 import pandas.util._test_decorators as td
-from pandas.compat import range
+from pandas.compat import range, PY36
 from pandas.core.reshape.util import cartesian_product
 
 import pandas.core.sparse.frame as spf
@@ -114,6 +114,18 @@ def test_constructor_dict_input(self):
         result = SparseSeries(constructor_dict)
         tm.assert_sp_series_equal(result, expected)
 
+    def test_constructor_dict_order(self):
+        # GH19018
+        # initialization ordering: by insertion order if python>= 3.6, else
+        # order by value
+        d = {'b': 1, 'a': 0, 'c': 2}
+        result = SparseSeries(d)
+        if PY36:
+            expected = SparseSeries([1, 0, 2], index=list('bac'))
+        else:
+            expected = SparseSeries([0, 1, 2], index=list('abc'))
+        tm.assert_sp_series_equal(result, expected)
+
     def test_constructor_dtype(self):
         arr = SparseSeries([np.nan, 1, 2, np.nan])
         assert arr.dtype == np.float64