Skip to content

Commit 4e57253

Browse files
author
tp
committed
initialization from dicts for py>=3.6 maintains insertion order
1 parent feedf66 commit 4e57253

File tree

8 files changed

+113
-11
lines changed

8 files changed

+113
-11
lines changed

doc/source/whatsnew/v0.23.0.txt

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
v0.23.0
44
-------
55

6-
This is a major release from 0.21.1 and includes a number of API changes,
6+
This is a major release from 0.22.0 and includes a number of API changes,
77
deprecations, new features, enhancements, and performance improvements along
88
with a large number of bug fixes. We recommend that all users upgrade to this
99
version.
@@ -240,7 +240,7 @@ The :func:`DataFrame.assign` now accepts dependent keyword arguments for python
240240
using ``.assign()`` to update an existing column. Previously, callables
241241
referring to other variables being updated would get the "old" values
242242

243-
Previous Behaviour:
243+
Previous behaviour:
244244

245245
.. code-block:: ipython
246246

@@ -253,7 +253,7 @@ The :func:`DataFrame.assign` now accepts dependent keyword arguments for python
253253
1 3 -2
254254
2 4 -3
255255

256-
New Behaviour:
256+
New behaviour:
257257

258258
.. ipython:: python
259259

@@ -320,6 +320,57 @@ If installed, we now require:
320320
| openpyxl | 2.4.0 | |
321321
+-----------------+-----------------+----------+
322322

323+
.. _whatsnew_0230.api_breaking.dict_insertion_order:
324+
325+
Creating dataframes and series from dicts preserves dict insertion order for python 3.6+
326+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
327+
328+
Until Python 3.6, dicts in Python had no formally defined ordering. Python
329+
version 3.6 and later have changed the ordering definition of dicts, so dicts
330+
in these newer versions are ordered by insertion order
331+
(see also `PEP 468 <https://www.python.org/dev/peps/pep-0468/>`_).
332+
Pandas will from version 0.23 use insertion order, when creating series or
333+
data frames from dicts (:issue:`19018`) .
334+
335+
Previous behaviour (and current behaviour if on Python < 3.6):
336+
337+
.. code-block:: ipython
338+
339+
In [1]: pd.Series({'Income': 2000,
340+
... 'Expenses': -1500,
341+
... 'Taxes': -200,
342+
... 'Net result': 300})
343+
Expenses -1500
344+
Income 2000
345+
Net result 300
346+
Taxes -200
347+
dtype: int64
348+
349+
Note the series above is ordered alphabetically by the index values.
350+
351+
New behaviour (for Python >= 3.6):
352+
353+
.. ipython:: python
354+
355+
pd.Series({'Income': 2000,
356+
'Expenses': -1500,
357+
'Taxes': -200,
358+
'Net result': 300})
359+
360+
Notice that the series is now ordered by insertion order. This new behaviour is
361+
used for all relevant pandas types (``Series``, ``DataFrame``, ``SparseSeries``
362+
and ``SparseDataFrame``).
363+
364+
If you wish to retain the old behaviour while using Python >= 3.6, you can use
365+
``sort_index``:
366+
367+
.. ipython:: python
368+
369+
pd.Series({'Income': 2000,
370+
'Expenses': -1500,
371+
'Taxes': -200,
372+
'Net result': 300}).sort_index()
373+
323374
.. _whatsnew_0230.api_breaking.deprecate_panel:
324375

325376
Deprecate Panel

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ def _init_dict(self, data, index, columns, dtype=None):
460460

461461
else:
462462
keys = list(data.keys())
463-
if not isinstance(data, OrderedDict):
463+
if not PY36 and not isinstance(data, OrderedDict):
464464
keys = com._try_sort(keys)
465465
columns = data_names = Index(keys)
466466
arrays = [data[k] for k in keys]

pandas/core/series.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
from pandas import compat
5555
from pandas.io.formats.terminal import get_terminal_size
5656
from pandas.compat import (
57-
zip, u, OrderedDict, StringIO, range, get_range_parameters)
57+
zip, u, OrderedDict, StringIO, range, get_range_parameters, PY36)
5858
from pandas.compat.numpy import function as nv
5959

6060
import pandas.core.ops as ops
@@ -286,7 +286,7 @@ def _init_dict(self, data, index=None, dtype=None):
286286
# Now we just make sure the order is respected, if any
287287
if index is not None:
288288
s = s.reindex(index, copy=False)
289-
elif not isinstance(data, OrderedDict):
289+
elif not PY36 and not isinstance(data, OrderedDict):
290290
try:
291291
s = s.sort_index()
292292
except TypeError:

pandas/core/sparse/frame.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# pylint: disable=E1101,E1103,W0231,E0202
77

88
import warnings
9-
from pandas.compat import lmap
9+
from pandas.compat import lmap, OrderedDict, PY36
1010
from pandas import compat
1111
import numpy as np
1212

@@ -138,7 +138,10 @@ def _init_dict(self, data, index, columns, dtype=None):
138138
columns = _ensure_index(columns)
139139
data = {k: v for k, v in compat.iteritems(data) if k in columns}
140140
else:
141-
columns = Index(com._try_sort(list(data.keys())))
141+
keys = list(data.keys())
142+
if not PY36 and not isinstance(data, OrderedDict):
143+
keys = com._try_sort(keys)
144+
columns = Index(keys)
142145

143146
if index is None:
144147
index = extract_index(list(data.values()))

pandas/tests/frame/test_constructors.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from pandas.core.dtypes.common import is_integer_dtype
1717
from pandas.compat import (lmap, long, zip, range, lrange, lzip,
18-
OrderedDict, is_platform_little_endian)
18+
OrderedDict, is_platform_little_endian, PY36)
1919
from pandas import compat
2020
from pandas import (DataFrame, Index, Series, isna,
2121
MultiIndex, Timedelta, Timestamp,
@@ -290,6 +290,18 @@ def test_constructor_dict(self):
290290
with tm.assert_raises_regex(ValueError, msg):
291291
DataFrame({'a': 0.7}, columns=['b'])
292292

293+
def test_constructor_dict_order(self):
294+
# GH19018
295+
# initialization ordering: by insertion order if python>= 3.6, else
296+
# order by value
297+
d = {'b': self.ts2, 'a': self.ts1}
298+
frame = DataFrame(data=d)
299+
if compat.PY36:
300+
expected = DataFrame(data=d, columns=list('ba'))
301+
else:
302+
expected = DataFrame(data=d, columns=list('ab'))
303+
tm.assert_frame_equal(frame, expected)
304+
293305
def test_constructor_multi_index(self):
294306
# GH 4078
295307
# construction error with mi and all-nan frame

pandas/tests/series/test_constructors.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from pandas._libs import lib
2323
from pandas._libs.tslib import iNaT
2424

25-
from pandas.compat import lrange, range, zip, long
25+
from pandas.compat import lrange, range, zip, long, PY36
2626
from pandas.util.testing import assert_series_equal
2727
import pandas.util.testing as tm
2828

@@ -783,6 +783,18 @@ def test_constructor_dict(self):
783783
expected.iloc[1] = 1
784784
assert_series_equal(result, expected)
785785

786+
def test_constructor_dict_order(self):
787+
# GH19018
788+
# initialization ordering: by insertion order if python>= 3.6, else
789+
# order by value
790+
d = {'b': 1, 'a': 0, 'c': 2}
791+
result = Series(d)
792+
if PY36:
793+
expected = Series([1, 0, 2], index=list('bac'))
794+
else:
795+
expected = Series([0, 1, 2], index=list('abc'))
796+
tm.assert_series_equal(result, expected)
797+
786798
@pytest.mark.parametrize("value", [2, np.nan, None, float('nan')])
787799
def test_constructor_dict_nan_key(self, value):
788800
# GH 18480

pandas/tests/sparse/frame/test_frame.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,18 @@ def test_constructor(self):
139139

140140
repr(self.frame)
141141

142+
def test_constructor_dict_order(self):
143+
# GH19018
144+
# initialization ordering: by insertion order if python>= 3.6, else
145+
# order by value
146+
d = {'b': [2, 3], 'a': [0, 1]}
147+
frame = SparseDataFrame(data=d)
148+
if compat.PY36:
149+
expected = SparseDataFrame(data=d, columns=list('ba'))
150+
else:
151+
expected = SparseDataFrame(data=d, columns=list('ab'))
152+
tm.assert_sp_frame_equal(frame, expected)
153+
142154
def test_constructor_ndarray(self):
143155
# no index or columns
144156
sp = SparseDataFrame(self.frame.values)

pandas/tests/sparse/series/test_series.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pandas.tseries.offsets import BDay
1515
import pandas.util.testing as tm
1616
import pandas.util._test_decorators as td
17-
from pandas.compat import range
17+
from pandas.compat import range, PY36
1818
from pandas.core.reshape.util import cartesian_product
1919

2020
import pandas.core.sparse.frame as spf
@@ -114,6 +114,18 @@ def test_constructor_dict_input(self):
114114
result = SparseSeries(constructor_dict)
115115
tm.assert_sp_series_equal(result, expected)
116116

117+
def test_constructor_dict_order(self):
118+
# GH19018
119+
# initialization ordering: by insertion order if python>= 3.6, else
120+
# order by value
121+
d = {'b': 1, 'a': 0, 'c': 2}
122+
result = SparseSeries(d)
123+
if PY36:
124+
expected = SparseSeries([1, 0, 2], index=list('bac'))
125+
else:
126+
expected = SparseSeries([0, 1, 2], index=list('abc'))
127+
tm.assert_sp_series_equal(result, expected)
128+
117129
def test_constructor_dtype(self):
118130
arr = SparseSeries([np.nan, 1, 2, np.nan])
119131
assert arr.dtype == np.float64

0 commit comments

Comments
 (0)