Skip to content

Commit 734b811

Browse files
committed
Fix protocol tests
Signed-off-by: Vasily Litvinov <vasilij.n.litvinov@intel.com>
1 parent 0ebd699 commit 734b811

File tree

1 file changed

+82
-133
lines changed

1 file changed

+82
-133
lines changed

pandas/tests/api/test_protocol.py

Lines changed: 82 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -1,140 +1,89 @@
11
import pytest
2-
import numpy as np
2+
import math
33

44
@pytest.mark.parametrize("test_data",
55
[
6-
({'a': [np.array([1, 2, 3]), np.array([4, 5, 6])],
7-
'b': [np.array([1.5, 2.0, 3.2]), np.array([4.1, 5.7, 6.9])]},
8-
np.object_, None),
9-
({'a': [1.5, 2.5, 3.5], 'b': [9.2, 10.5, 11.8]}, np.float64, None),
10-
({'A': [1, 2, 3, 4], 'B': [1, 2, 3, 4]}, np.int64, np.float64)
6+
{'a': ["foo", "bar"],
7+
'b': ["baz", "qux"]},
8+
{'a': [1.5, 2.5, 3.5], 'b': [9.2, 10.5, 11.8]},
9+
{'A': [1, 2, 3, 4], 'B': [1, 2, 3, 4]}
1110
],
12-
ids=["array_data", "float_data", "int_data"])
13-
def test_only_one_data(test_data, create_df_from_dict):
14-
data, dtype, new_dtype = test_data
15-
columns = list(data.keys())
16-
df = create_df_from_dict(data)
17-
df2 = df.__dataframe__()
18-
new_dtype = dtype if new_dtype is None else new_dtype
19-
assert df.columns.values.tolist() == columns
20-
val = len(df[columns[0]])-1
21-
column_size = df.size
22-
for column in columns:
23-
assert df[column].tolist() == df[column].tolist()
24-
assert df[column].dtype.type is dtype
25-
assert df2.get_column_by_name(column).null_count == 0
26-
assert df2.get_column_by_name(column).size == column_size
27-
assert df2.get_column_by_name(column).offset == 0
28-
assert not df2["x"].is_masked
29-
n = np.random.randint(0, val)
30-
(df[column])[n] = None
31-
assert df[column].dtype.type is new_dtype
32-
assert df2.get_column_by_name(column).null_count == 1
33-
34-
35-
def test_float_int(create_df_from_dict):
36-
df = create_df_from_dict({'a': [1, 2, 3], 'b': [3, 4, 5],
37-
'c': [1.5, 2.5, 3.5], 'd': [9, 10, 11]})
38-
df2 = df.__dataframe__()
39-
columns = ['a', 'b', 'c', 'd']
40-
assert df.columns.values.tolist() == columns
41-
for column in columns:
42-
assert df[column].tolist() == df[column].tolist()
43-
if column is 'c':
44-
assert df[column].dtype.type is np.float64
45-
else:
46-
assert df[column].dtype.type is np.int64
47-
48-
assert df2.get_column_by_name(column).null_count == 0
49-
assert df2.get_column_by_name(column).size == 3
50-
assert df2.get_column_by_name(column).offset == 0
51-
52-
n = np.random.randint(0, 2)
53-
(df[column])[n] = None
54-
assert df[column].dtype.type is np.float64
55-
assert df2.get_column_by_name(column).null_count == 1
56-
57-
58-
def test_mixed_intfloatbool(create_df_from_dict):
59-
df = create_df_from_dict({"x": np.array([True, True, False]),
60-
"y": np.array([1, 2, 0]),
61-
"z": np.array([9.2, 10.5, 11.8])})
62-
df2 = df.__dataframe__()
63-
columns = ['x', 'y', 'z']
64-
assert df.columns.values.tolist() == columns
65-
for column in columns:
66-
assert df[column].tolist() == df[column].tolist()
67-
assert df2.get_column_by_name(column).null_count == 0
68-
assert df2.get_column_by_name(column).size == 3
69-
assert df2.get_column_by_name(column).offset == 0
11+
ids=["str_data", "float_data", "int_data"])
12+
def test_only_one_dtype(test_data, df_from_dict):
13+
columns = list(test_data.keys())
14+
df = df_from_dict(test_data)
15+
dfX = df.__dataframe__()
7016

71-
assert df["x"].dtype.type is np.bool_
72-
assert df["y"].dtype.type is np.int32
73-
assert df["z"].dtype.type is np.float64
74-
75-
assert df2.get_column_by_name("x")._allow_copy == True
76-
77-
for column in columns:
78-
n = np.random.randint(0, 2)
79-
(df[column])[n] = None
80-
if column is "x":
81-
assert df[column].dtype.type is np.object_
82-
else:
83-
assert df[column].dtype.type is np.float64
84-
assert df2.get_column_by_name(column).null_count == 1
85-
86-
87-
def test_string_dtype(create_df_from_dict):
88-
df = create_df_from_dict({"A": ["a", "b", "cdef", "", "g"]})
89-
df2 = df.__dataframe__()
90-
columns = ['A']
91-
assert df.columns.values.tolist() == columns
17+
column_size = len(test_data[columns[0]])
9218
for column in columns:
93-
assert df[column].tolist() == df[column].tolist()
94-
assert df[column].dtype.type is np.object_
95-
assert df2.get_column_by_name(column).null_count == 0
96-
97-
98-
def test_categorical(create_df_from_dict):
99-
df = create_df_from_dict({"year": [2012, 2013, 2015, 2019], "weekday": [0, 1, 4, 6]})
100-
df = df.categorize("year", min_value=2012, max_value=2019)
101-
df = df.categorize("weekday", labels=["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])
102-
# Some detailed testing for correctness of dtype and null handling:
103-
col = df.__dataframe__().get_column_by_name("year")
104-
assert col.describe_categorical == (False, True, {0: 2012, 1: 2013, 2: 2014, 3: 2015, 4: 2016, 5: 2017, 6: 2018, 7: 2019})
105-
assert col.describe_null == (0, None)
106-
col2 = df.__dataframe__().get_column_by_name("weekday")
107-
assert col2.describe_categorical == (False, True, {0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"})
108-
assert col2.describe_null == (0, None)
109-
110-
111-
def test_dataframe(create_df_from_dict):
112-
df = create_df_from_dict({"x": [True, True, False], "y": [1, 2, 0], "z": [9.2, 10.5, 11.8]})
113-
df2 = df.__dataframe__()
114-
assert df2._allow_copy == True
115-
assert df2.num_columns() == 3
116-
assert df2.num_rows() == 3
117-
assert df2.num_chunks() == 1
118-
assert df2.column_names() == ["x", "y", "z"]
119-
assert df2.select_columns((0, 2))._df[:, 0].tolist() == df2.select_columns_by_name(("x", "z"))._df[:, 0].tolist()
120-
assert df2.select_columns((0, 2))._df[:, 1].tolist() == df2.select_columns_by_name(("x", "z"))._df[:, 1].tolist()
121-
122-
123-
def test_chunks(create_df_from_dict):
124-
df = create_df_from_dict({"x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]})
125-
df2 = df.__dataframe__()
126-
chunk_iter = iter(df2.get_chunks(3))
127-
chunk = next(chunk_iter)
128-
assert chunk.num_rows() == 4
129-
chunk = next(chunk_iter)
130-
assert chunk.num_rows() == 4
131-
chunk = next(chunk_iter)
132-
assert chunk.num_rows() == 2
133-
with pytest.raises(StopIteration):
134-
chunk = next(chunk_iter)
135-
136-
137-
def test_get_chunks(create_df_from_dict):
138-
df = create_df_from_dict({"x": [1]})
139-
df2 = df.__dataframe__()
140-
assert df2.get_chunks() == 1
19+
assert dfX.get_column_by_name(column).null_count == 0
20+
assert dfX.get_column_by_name(column).size == column_size
21+
assert dfX.get_column_by_name(column).offset == 0
22+
23+
24+
def test_float_int(df_from_dict):
25+
df = df_from_dict({'a': [1, 2, 3], 'b': [3, 4, 5],
26+
'c': [1.5, 2.5, 3.5], 'd': [9, 10, 11],
27+
'e': [True, False, True],
28+
'f': ["a", "", "c"]})
29+
dfX = df.__dataframe__()
30+
columns = {'a': 0, 'b': 0, 'c': 2, 'd': 0, 'e': 20, 'f': 21}
31+
32+
for column, kind in columns.items():
33+
colX = dfX.get_column_by_name(column)
34+
assert colX.null_count == 0
35+
assert colX.size == 3
36+
assert colX.offset == 0
37+
38+
assert colX.dtype[0] == kind
39+
40+
assert dfX.get_column_by_name("c").dtype[1] == 64
41+
42+
43+
def test_na_float(df_from_dict):
44+
df = df_from_dict({'a': [1.0, math.nan, 2.0]})
45+
dfX = df.__dataframe__()
46+
colX = dfX.get_column_by_name('a')
47+
assert colX.null_count == 1
48+
49+
def test_noncategorical(df_from_dict):
50+
df = df_from_dict({'a': [1, 2, 3]})
51+
dfX = df.__dataframe__()
52+
colX = dfX.get_column_by_name('a')
53+
with pytest.raises(TypeError):
54+
colX.describe_categorical
55+
56+
def test_categorical(df_from_dict):
57+
df = df_from_dict({"weekday": ["Mon", "Tue", "Mon", "Wed", "Mon", "Thu", "Fri", "Sat", "Sun"]}, is_categorical=True)
58+
59+
colX = df.__dataframe__().get_column_by_name("weekday")
60+
is_ordered, is_dictionary, _ = colX.describe_categorical
61+
assert isinstance(is_ordered, bool)
62+
assert isinstance(is_dictionary, bool)
63+
64+
65+
def test_dataframe(df_from_dict):
66+
df = df_from_dict({"x": [True, True, False], "y": [1, 2, 0], "z": [9.2, 10.5, 11.8]})
67+
dfX = df.__dataframe__()
68+
69+
assert dfX.num_columns() == 3
70+
assert dfX.num_rows() == 3
71+
assert dfX.num_chunks() == 1
72+
assert dfX.column_names() == ["x", "y", "z"]
73+
assert dfX.select_columns((0, 2)).column_names() == dfX.select_columns_by_name(("x", "z")).column_names()
74+
75+
@pytest.mark.parametrize(["size", "n_chunks"],
76+
[(10, 3), (12, 3), (12, 5)]
77+
)
78+
def test_chunks(size, n_chunks, df_from_dict):
79+
df = df_from_dict({"x": list(range(size))})
80+
dfX = df.__dataframe__()
81+
chunks = list(dfX.get_chunks(n_chunks))
82+
assert len(chunks) == n_chunks
83+
assert sum(chunk.num_rows() for chunk in chunks) == size
84+
85+
86+
def test_get_chunks(df_from_dict):
87+
df = df_from_dict({"x": [1]})
88+
dfX = df.__dataframe__()
89+
assert len(list(dfX.get_chunks())) == 1

0 commit comments

Comments
 (0)