Skip to content

TST/REF: implement tests.frame.constructors #38869

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
186 changes: 186 additions & 0 deletions pandas/tests/frame/constructors/test_from_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
from collections import OrderedDict

import numpy as np
import pytest

from pandas import DataFrame, Index, MultiIndex, Series
import pandas._testing as tm
from pandas.core.construction import create_series_with_explicit_dtype


class TestFromDict:
# Note: these tests are specific to the from_dict method, not for
# passing dictionaries to DataFrame.__init__

def test_from_dict_scalars_requires_index(self):
msg = "If using all scalar values, you must pass an index"
with pytest.raises(ValueError, match=msg):
DataFrame.from_dict(OrderedDict([("b", 8), ("a", 5), ("a", 6)]))

def test_constructor_list_of_odicts(self):
data = [
OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
OrderedDict([["a", 1.5], ["d", 6]]),
OrderedDict(),
OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
]

result = DataFrame(data)
expected = DataFrame.from_dict(
dict(zip(range(len(data)), data)), orient="index"
)
tm.assert_frame_equal(result, expected.reindex(result.index))

def test_constructor_single_row(self):
data = [OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]])]

result = DataFrame(data)
expected = DataFrame.from_dict(dict(zip([0], data)), orient="index").reindex(
result.index
)
tm.assert_frame_equal(result, expected)

def test_constructor_list_of_series(self):
data = [
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
]
sdict = OrderedDict(zip(["x", "y"], data))
idx = Index(["a", "b", "c"])

# all named
data2 = [
Series([1.5, 3, 4], idx, dtype="O", name="x"),
Series([1.5, 3, 6], idx, name="y"),
]
result = DataFrame(data2)
expected = DataFrame.from_dict(sdict, orient="index")
tm.assert_frame_equal(result, expected)

# some unnamed
data2 = [
Series([1.5, 3, 4], idx, dtype="O", name="x"),
Series([1.5, 3, 6], idx),
]
result = DataFrame(data2)

sdict = OrderedDict(zip(["x", "Unnamed 0"], data))
expected = DataFrame.from_dict(sdict, orient="index")
tm.assert_frame_equal(result, expected)

# none named
data = [
OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
OrderedDict([["a", 1.5], ["d", 6]]),
OrderedDict(),
OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
]
data = [
create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data
]

result = DataFrame(data)
sdict = OrderedDict(zip(range(len(data)), data))
expected = DataFrame.from_dict(sdict, orient="index")
tm.assert_frame_equal(result, expected.reindex(result.index))

result2 = DataFrame(data, index=np.arange(6))
tm.assert_frame_equal(result, result2)

result = DataFrame([Series(dtype=object)])
expected = DataFrame(index=[0])
tm.assert_frame_equal(result, expected)

data = [
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
]
sdict = OrderedDict(zip(range(len(data)), data))

idx = Index(["a", "b", "c"])
data2 = [Series([1.5, 3, 4], idx, dtype="O"), Series([1.5, 3, 6], idx)]
result = DataFrame(data2)
expected = DataFrame.from_dict(sdict, orient="index")
tm.assert_frame_equal(result, expected)

def test_constructor_orient(self, float_string_frame):
data_dict = float_string_frame.T._series
recons = DataFrame.from_dict(data_dict, orient="index")
expected = float_string_frame.reindex(index=recons.index)
tm.assert_frame_equal(recons, expected)

# dict of sequence
a = {"hi": [32, 3, 3], "there": [3, 5, 3]}
rs = DataFrame.from_dict(a, orient="index")
xp = DataFrame.from_dict(a).T.reindex(list(a.keys()))
tm.assert_frame_equal(rs, xp)

def test_constructor_from_ordered_dict(self):
# GH#8425
a = OrderedDict(
[
("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])),
("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])),
("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])),
]
)
expected = DataFrame.from_dict(a, orient="columns").T
result = DataFrame.from_dict(a, orient="index")
tm.assert_frame_equal(result, expected)

def test_from_dict_columns_parameter(self):
# GH#18529
# Test new columns parameter for from_dict that was added to make
# from_items(..., orient='index', columns=[...]) easier to replicate
result = DataFrame.from_dict(
OrderedDict([("A", [1, 2]), ("B", [4, 5])]),
orient="index",
columns=["one", "two"],
)
expected = DataFrame([[1, 2], [4, 5]], index=["A", "B"], columns=["one", "two"])
tm.assert_frame_equal(result, expected)

msg = "cannot use columns parameter with orient='columns'"
with pytest.raises(ValueError, match=msg):
DataFrame.from_dict(
{"A": [1, 2], "B": [4, 5]},
orient="columns",
columns=["one", "two"],
)
with pytest.raises(ValueError, match=msg):
DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"])

@pytest.mark.parametrize(
"data_dict, keys, orient",
[
({}, [], "index"),
([{("a",): 1}, {("a",): 2}], [("a",)], "columns"),
([OrderedDict([(("a",), 1), (("b",), 2)])], [("a",), ("b",)], "columns"),
([{("a", "b"): 1}], [("a", "b")], "columns"),
],
)
def test_constructor_from_dict_tuples(self, data_dict, keys, orient):
# GH#16769
df = DataFrame.from_dict(data_dict, orient)

result = df.columns
expected = Index(keys, dtype="object", tupleize_cols=False)

tm.assert_index_equal(result, expected)

def test_frame_dict_constructor_empty_series(self):
s1 = Series(
[1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])
)
s2 = Series(
[1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])
)
s3 = Series(dtype=object)

# it works!
DataFrame({"foo": s1, "bar": s2, "baz": s3})
DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2})
Loading