2
2
import math
3
3
import ctypes
4
4
5
- @pytest .mark .parametrize ("test_data" ,
6
- [
7
- {"a" : ["foo" , "bar" ],
8
- "b" : ["baz" , "qux" ]},
9
- {"a" : [1.5 , 2.5 , 3.5 ], "b" : [9.2 , 10.5 , 11.8 ]},
10
- {"A" : [1 , 2 , 3 , 4 ], "B" : [1 , 2 , 3 , 4 ]}
11
- ],
12
- ids = ["str_data" , "float_data" , "int_data" ])
5
+
6
+ @pytest .mark .parametrize (
7
+ "test_data" ,
8
+ [
9
+ {"a" : ["foo" , "bar" ], "b" : ["baz" , "qux" ]},
10
+ {"a" : [1.5 , 2.5 , 3.5 ], "b" : [9.2 , 10.5 , 11.8 ]},
11
+ {"A" : [1 , 2 , 3 , 4 ], "B" : [1 , 2 , 3 , 4 ]},
12
+ ],
13
+ ids = ["str_data" , "float_data" , "int_data" ],
14
+ )
13
15
def test_only_one_dtype (test_data , df_from_dict ):
14
16
columns = list (test_data .keys ())
15
17
df = df_from_dict (test_data )
@@ -23,10 +25,16 @@ def test_only_one_dtype(test_data, df_from_dict):
23
25
24
26
25
27
def test_float_int (df_from_dict ):
26
- df = df_from_dict ({"a" : [1 , 2 , 3 ], "b" : [3 , 4 , 5 ],
27
- "c" : [1.5 , 2.5 , 3.5 ], "d" : [9 , 10 , 11 ],
28
- "e" : [True , False , True ],
29
- "f" : ["a" , "" , "c" ]})
28
+ df = df_from_dict (
29
+ {
30
+ "a" : [1 , 2 , 3 ],
31
+ "b" : [3 , 4 , 5 ],
32
+ "c" : [1.5 , 2.5 , 3.5 ],
33
+ "d" : [9 , 10 , 11 ],
34
+ "e" : [True , False , True ],
35
+ "f" : ["a" , "" , "c" ],
36
+ }
37
+ )
30
38
dfX = df .__dataframe__ ()
31
39
columns = {"a" : 0 , "b" : 0 , "c" : 2 , "d" : 0 , "e" : 20 , "f" : 21 }
32
40
@@ -47,15 +55,20 @@ def test_na_float(df_from_dict):
47
55
colX = dfX .get_column_by_name ("a" )
48
56
assert colX .null_count == 1
49
57
58
+
50
59
def test_noncategorical (df_from_dict ):
51
60
df = df_from_dict ({"a" : [1 , 2 , 3 ]})
52
61
dfX = df .__dataframe__ ()
53
62
colX = dfX .get_column_by_name ("a" )
54
63
with pytest .raises (TypeError ):
55
64
colX .describe_categorical
56
65
66
+
57
67
def test_categorical (df_from_dict ):
58
- df = df_from_dict ({"weekday" : ["Mon" , "Tue" , "Mon" , "Wed" , "Mon" , "Thu" , "Fri" , "Sat" , "Sun" ]}, is_categorical = True )
68
+ df = df_from_dict (
69
+ {"weekday" : ["Mon" , "Tue" , "Mon" , "Wed" , "Mon" , "Thu" , "Fri" , "Sat" , "Sun" ]},
70
+ is_categorical = True ,
71
+ )
59
72
60
73
colX = df .__dataframe__ ().get_column_by_name ("weekday" )
61
74
is_ordered , is_dictionary , _ = colX .describe_categorical
@@ -64,14 +77,20 @@ def test_categorical(df_from_dict):
64
77
65
78
66
79
def test_dataframe (df_from_dict ):
67
- df = df_from_dict ({"x" : [True , True , False ], "y" : [1 , 2 , 0 ], "z" : [9.2 , 10.5 , 11.8 ]})
80
+ df = df_from_dict (
81
+ {"x" : [True , True , False ], "y" : [1 , 2 , 0 ], "z" : [9.2 , 10.5 , 11.8 ]}
82
+ )
68
83
dfX = df .__dataframe__ ()
69
84
70
85
assert dfX .num_columns () == 3
71
86
assert dfX .num_rows () == 3
72
87
assert dfX .num_chunks () == 1
73
88
assert dfX .column_names () == ["x" , "y" , "z" ]
74
- assert dfX .select_columns ((0 , 2 )).column_names () == dfX .select_columns_by_name (("x" , "z" )).column_names ()
89
+ assert (
90
+ dfX .select_columns ((0 , 2 )).column_names ()
91
+ == dfX .select_columns_by_name (("x" , "z" )).column_names ()
92
+ )
93
+
75
94
76
95
@pytest .mark .parametrize (["size" , "n_chunks" ], [(10 , 3 ), (12 , 3 ), (12 , 5 )])
77
96
def test_df_get_chunks (size , n_chunks , df_from_dict ):
@@ -81,6 +100,7 @@ def test_df_get_chunks(size, n_chunks, df_from_dict):
81
100
assert len (chunks ) == n_chunks
82
101
assert sum (chunk .num_rows () for chunk in chunks ) == size
83
102
103
+
84
104
@pytest .mark .parametrize (["size" , "n_chunks" ], [(10 , 3 ), (12 , 3 ), (12 , 5 )])
85
105
def test_column_get_chunks (size , n_chunks , df_from_dict ):
86
106
df = df_from_dict ({"x" : list (range (size ))})
@@ -89,6 +109,7 @@ def test_column_get_chunks(size, n_chunks, df_from_dict):
89
109
assert len (chunks ) == n_chunks
90
110
assert sum (chunk .size for chunk in chunks ) == size
91
111
112
+
92
113
def test_get_columns (df_from_dict ):
93
114
df = df_from_dict ({"a" : [0 , 1 ], "b" : [2.5 , 3.5 ]})
94
115
dfX = df .__dataframe__ ()
@@ -98,6 +119,7 @@ def test_get_columns(df_from_dict):
98
119
assert dfX .get_column (0 ).dtype [0 ] == 0
99
120
assert dfX .get_column (1 ).dtype [0 ] == 2
100
121
122
+
101
123
def test_buffer (df_from_dict ):
102
124
arr = [0 , 1 , - 1 ]
103
125
df = df_from_dict ({"a" : arr })
@@ -113,12 +135,14 @@ def test_buffer(df_from_dict):
113
135
114
136
assert dataDtype [0 ] == 0
115
137
116
- if device == 1 : # CPU-only as we're going to directly read memory here
138
+ if device == 1 : # CPU-only as we're going to directly read memory here
117
139
bitwidth = dataDtype [1 ]
118
- ctype = {8 : ctypes .c_int8 ,
119
- 16 : ctypes .c_int16 ,
120
- 32 : ctypes .c_int32 ,
121
- 64 : ctypes .c_int64 }[bitwidth ]
140
+ ctype = {
141
+ 8 : ctypes .c_int8 ,
142
+ 16 : ctypes .c_int16 ,
143
+ 32 : ctypes .c_int32 ,
144
+ 64 : ctypes .c_int64 ,
145
+ }[bitwidth ]
122
146
123
147
for idx , truth in enumerate (arr ):
124
148
val = ctype .from_address (dataBuf .ptr + idx * (bitwidth // 8 )).value
0 commit comments