Skip to content

Commit ab2455f

Browse files
authored
test: fix json tests fail locally with disable allow_large_results (#1523)
* test: fix json tests fail locally with disable allow_large_results * fix test_read_gbq_w_json_in_array * move test_read_gbq_w_json* to test_session.py
1 parent e9fe815 commit ab2455f

File tree

5 files changed

+238
-179
lines changed

5 files changed

+238
-179
lines changed

tests/system/small/bigquery/test_json.py

Lines changed: 50 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,6 @@
2222
import bigframes.pandas as bpd
2323

2424

25-
@pytest.fixture(scope="module", autouse=True)
26-
def use_large_query_path():
27-
# b/401630655
28-
with bpd.option_context("bigquery.allow_large_results", True):
29-
yield
30-
31-
3225
@pytest.mark.parametrize(
3326
("json_path", "expected_json"),
3427
[
@@ -39,12 +32,14 @@ def use_large_query_path():
3932
def test_json_set_at_json_path(json_path, expected_json):
4033
original_json = ['{"a": {"b": {"c": "tester", "d": []}}}']
4134
s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
42-
actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
4335

36+
actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
4437
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
38+
39+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
4540
pd.testing.assert_series_equal(
46-
actual.to_pandas(),
47-
expected.to_pandas(),
41+
actual.to_pandas(allow_large_results=True),
42+
expected.to_pandas(allow_large_results=True),
4843
)
4944

5045

@@ -63,11 +58,12 @@ def test_json_set_at_json_value_type(json_value, expected_json):
6358
original_json = ['{"a": {"b": "dev"}}', '{"a": {"b": [1, 2]}}']
6459
s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
6560
actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b", json_value)])
66-
6761
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
62+
63+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
6864
pd.testing.assert_series_equal(
69-
actual.to_pandas(),
70-
expected.to_pandas(),
65+
actual.to_pandas(allow_large_results=True),
66+
expected.to_pandas(allow_large_results=True),
7167
)
7268

7369

@@ -80,18 +76,14 @@ def test_json_set_w_more_pairs():
8076

8177
expected_json = ['{"a": 3, "b": 2}', '{"a": 4, "b": 2}', '{"a": 5, "b": 2, "c": 1}']
8278
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
79+
80+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
8381
pd.testing.assert_series_equal(
84-
actual.to_pandas(),
85-
expected.to_pandas(),
82+
actual.to_pandas(allow_large_results=True),
83+
expected.to_pandas(allow_large_results=True),
8684
)
8785

8886

89-
def test_json_set_w_invalid_json_path_value_pairs():
90-
s = bpd.Series(['{"a": 10}'], dtype=dtypes.JSON_DTYPE)
91-
with pytest.raises(ValueError):
92-
bbq.json_set(s, json_path_value_pairs=[("$.a", 1, 100)]) # type: ignore
93-
94-
9587
def test_json_set_w_invalid_value_type():
9688
s = bpd.Series(['{"a": 10}'], dtype=dtypes.JSON_DTYPE)
9789
with pytest.raises(TypeError):
@@ -119,11 +111,13 @@ def test_json_extract_from_json():
119111
['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
120112
dtype=dtypes.JSON_DTYPE,
121113
)
122-
actual = bbq.json_extract(s, "$.a.b").to_pandas()
123-
expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE).to_pandas()
114+
actual = bbq.json_extract(s, "$.a.b")
115+
expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE)
116+
117+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
124118
pd.testing.assert_series_equal(
125-
actual,
126-
expected,
119+
actual.to_pandas(allow_large_results=True),
120+
expected.to_pandas(allow_large_results=True),
127121
)
128122

129123

@@ -134,9 +128,11 @@ def test_json_extract_from_string():
134128
)
135129
actual = bbq.json_extract(s, "$.a.b")
136130
expected = bpd.Series(["[1,2]", None, "0"], dtype=pd.StringDtype(storage="pyarrow"))
131+
132+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
137133
pd.testing.assert_series_equal(
138-
actual.to_pandas(),
139-
expected.to_pandas(),
134+
actual.to_pandas(allow_large_results=True),
135+
expected.to_pandas(allow_large_results=True),
140136
)
141137

142138

@@ -169,9 +165,10 @@ def test_json_extract_array_from_json():
169165
expected.index.name = None
170166
expected.name = None
171167

168+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
172169
pd.testing.assert_series_equal(
173-
actual.to_pandas(),
174-
expected.to_pandas(),
170+
actual.to_pandas(allow_large_results=True),
171+
expected.to_pandas(allow_large_results=True),
175172
)
176173

177174

@@ -185,9 +182,11 @@ def test_json_extract_array_from_json_strings():
185182
[['"ab"', '"2"', '"3 xy"'], [], ['"4"', '"5"'], None],
186183
dtype=pd.ArrowDtype(pa.list_(pa.string())),
187184
)
185+
186+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
188187
pd.testing.assert_series_equal(
189-
actual.to_pandas(),
190-
expected.to_pandas(),
188+
actual.to_pandas(allow_large_results=True),
189+
expected.to_pandas(allow_large_results=True),
191190
)
192191

193192

@@ -201,9 +200,11 @@ def test_json_extract_array_from_json_array_strings():
201200
[["1", "2", "3"], [], ["4", "5"]],
202201
dtype=pd.ArrowDtype(pa.list_(pa.string())),
203202
)
203+
204+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
204205
pd.testing.assert_series_equal(
205-
actual.to_pandas(),
206-
expected.to_pandas(),
206+
actual.to_pandas(allow_large_results=True),
207+
expected.to_pandas(allow_large_results=True),
207208
)
208209

209210

@@ -217,37 +218,45 @@ def test_json_extract_string_array_from_json_strings():
217218
s = bpd.Series(['{"a": ["ab", "2", "3 xy"]}', '{"a": []}', '{"a": ["4","5"]}'])
218219
actual = bbq.json_extract_string_array(s, "$.a")
219220
expected = bpd.Series([["ab", "2", "3 xy"], [], ["4", "5"]])
221+
222+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
220223
pd.testing.assert_series_equal(
221-
actual.to_pandas(),
222-
expected.to_pandas(),
224+
actual.to_pandas(allow_large_results=True),
225+
expected.to_pandas(allow_large_results=True),
223226
)
224227

225228

226229
def test_json_extract_string_array_from_array_strings():
227230
s = bpd.Series(["[1, 2, 3]", "[]", "[4,5]"])
228231
actual = bbq.json_extract_string_array(s)
229232
expected = bpd.Series([["1", "2", "3"], [], ["4", "5"]])
233+
234+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
230235
pd.testing.assert_series_equal(
231-
actual.to_pandas(),
232-
expected.to_pandas(),
236+
actual.to_pandas(allow_large_results=True),
237+
expected.to_pandas(allow_large_results=True),
233238
)
234239

235240

236241
def test_json_extract_string_array_as_float_array_from_array_strings():
237242
s = bpd.Series(["[1, 2.5, 3]", "[]", "[4,5]"])
238243
actual = bbq.json_extract_string_array(s, value_dtype=dtypes.FLOAT_DTYPE)
239244
expected = bpd.Series([[1, 2.5, 3], [], [4, 5]])
245+
246+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
240247
pd.testing.assert_series_equal(
241-
actual.to_pandas(),
242-
expected.to_pandas(),
248+
actual.to_pandas(allow_large_results=True),
249+
expected.to_pandas(allow_large_results=True),
243250
)
244251

245252

246253
def test_json_extract_string_array_w_invalid_series_type():
254+
s = bpd.Series([1, 2])
247255
with pytest.raises(TypeError):
248-
bbq.json_extract_string_array(bpd.Series([1, 2]))
256+
bbq.json_extract_string_array(s)
249257

250258

251259
def test_parse_json_w_invalid_series_type():
260+
s = bpd.Series([1, 2])
252261
with pytest.raises(TypeError):
253-
bbq.parse_json(bpd.Series([1, 2]))
262+
bbq.parse_json(s)

tests/system/small/test_dataframe_io.py

Lines changed: 0 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from typing import Tuple
1616

17-
import db_dtypes # type:ignore
1817
import google.api_core.exceptions
1918
import pandas as pd
2019
import pandas.testing
@@ -281,143 +280,6 @@ def test_to_arrow_override_global_option(scalars_df_index):
281280
assert scalars_df_index._query_job.destination.table_id == table_id
282281

283282

284-
def test_load_json_w_json_string_items(session):
285-
sql = """
286-
SELECT 0 AS id, JSON_OBJECT('boolean', True) AS json_col,
287-
UNION ALL
288-
SELECT 1, JSON_OBJECT('int', 100),
289-
UNION ALL
290-
SELECT 2, JSON_OBJECT('float', 0.98),
291-
UNION ALL
292-
SELECT 3, JSON_OBJECT('string', 'hello world'),
293-
UNION ALL
294-
SELECT 4, JSON_OBJECT('array', [8, 9, 10]),
295-
UNION ALL
296-
SELECT 5, JSON_OBJECT('null', null),
297-
UNION ALL
298-
SELECT 6, JSON_OBJECT('b', 2, 'a', 1),
299-
UNION ALL
300-
SELECT
301-
7,
302-
JSON_OBJECT(
303-
'dict',
304-
JSON_OBJECT(
305-
'int', 1,
306-
'array', [JSON_OBJECT('foo', 1), JSON_OBJECT('bar', 'hello')]
307-
)
308-
),
309-
"""
310-
df = session.read_gbq(sql, index_col="id")
311-
312-
assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
313-
314-
assert df["json_col"][0] == '{"boolean":true}'
315-
assert df["json_col"][1] == '{"int":100}'
316-
assert df["json_col"][2] == '{"float":0.98}'
317-
assert df["json_col"][3] == '{"string":"hello world"}'
318-
assert df["json_col"][4] == '{"array":[8,9,10]}'
319-
assert df["json_col"][5] == '{"null":null}'
320-
321-
# Verifies JSON strings preserve array order, regardless of dictionary key order.
322-
assert df["json_col"][6] == '{"a":1,"b":2}'
323-
assert df["json_col"][7] == '{"dict":{"array":[{"foo":1},{"bar":"hello"}],"int":1}}'
324-
325-
326-
def test_load_json_to_pandas_has_correct_result(session):
327-
df = session.read_gbq("SELECT JSON_OBJECT('foo', 10, 'bar', TRUE) AS json_col")
328-
assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
329-
result = df.to_pandas()
330-
331-
# These JSON strings are compatible with BigQuery's JSON storage,
332-
pd_df = pd.DataFrame(
333-
{"json_col": ['{"bar":true,"foo":10}']},
334-
dtype=pd.ArrowDtype(db_dtypes.JSONArrowType()),
335-
)
336-
pd_df.index = pd_df.index.astype("Int64")
337-
pd.testing.assert_series_equal(result.dtypes, pd_df.dtypes)
338-
pd.testing.assert_series_equal(result["json_col"], pd_df["json_col"])
339-
340-
341-
def test_load_json_in_struct(session):
342-
"""Avoid regressions for internal issue 381148539."""
343-
sql = """
344-
SELECT 0 AS id, STRUCT(JSON_OBJECT('boolean', True) AS data, 1 AS number) AS struct_col
345-
UNION ALL
346-
SELECT 1, STRUCT(JSON_OBJECT('int', 100), 2),
347-
UNION ALL
348-
SELECT 2, STRUCT(JSON_OBJECT('float', 0.98), 3),
349-
UNION ALL
350-
SELECT 3, STRUCT(JSON_OBJECT('string', 'hello world'), 4),
351-
UNION ALL
352-
SELECT 4, STRUCT(JSON_OBJECT('array', [8, 9, 10]), 5),
353-
UNION ALL
354-
SELECT 5, STRUCT(JSON_OBJECT('null', null), 6),
355-
UNION ALL
356-
SELECT
357-
6,
358-
STRUCT(JSON_OBJECT(
359-
'dict',
360-
JSON_OBJECT(
361-
'int', 1,
362-
'array', [JSON_OBJECT('foo', 1), JSON_OBJECT('bar', 'hello')]
363-
)
364-
), 7),
365-
"""
366-
df = session.read_gbq(sql, index_col="id")
367-
368-
assert isinstance(df.dtypes["struct_col"], pd.ArrowDtype)
369-
assert isinstance(df.dtypes["struct_col"].pyarrow_dtype, pa.StructType)
370-
371-
data = df["struct_col"].struct.field("data")
372-
assert data.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
373-
374-
assert data[0] == '{"boolean":true}'
375-
assert data[1] == '{"int":100}'
376-
assert data[2] == '{"float":0.98}'
377-
assert data[3] == '{"string":"hello world"}'
378-
assert data[4] == '{"array":[8,9,10]}'
379-
assert data[5] == '{"null":null}'
380-
assert data[6] == '{"dict":{"array":[{"foo":1},{"bar":"hello"}],"int":1}}'
381-
382-
383-
def test_load_json_in_array(session):
384-
sql = """
385-
SELECT
386-
0 AS id,
387-
[
388-
JSON_OBJECT('boolean', True),
389-
JSON_OBJECT('int', 100),
390-
JSON_OBJECT('float', 0.98),
391-
JSON_OBJECT('string', 'hello world'),
392-
JSON_OBJECT('array', [8, 9, 10]),
393-
JSON_OBJECT('null', null),
394-
JSON_OBJECT(
395-
'dict',
396-
JSON_OBJECT(
397-
'int', 1,
398-
'array', [JSON_OBJECT('bar', 'hello'), JSON_OBJECT('foo', 1)]
399-
)
400-
)
401-
] AS array_col,
402-
"""
403-
df = session.read_gbq(sql, index_col="id")
404-
405-
assert isinstance(df.dtypes["array_col"], pd.ArrowDtype)
406-
assert isinstance(df.dtypes["array_col"].pyarrow_dtype, pa.ListType)
407-
408-
data = df["array_col"].list
409-
assert data.len()[0] == 7
410-
assert data[0].dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
411-
412-
assert data[0][0] == '{"boolean":true}'
413-
assert data[1][0] == '{"int":100}'
414-
assert data[2][0] == '{"float":0.98}'
415-
assert data[3][0] == '{"string":"hello world"}'
416-
assert data[4][0] == '{"array":[8,9,10]}'
417-
assert data[5][0] == '{"null":null}'
418-
assert data[6][0] == '{"dict":{"array":[{"bar":"hello"},{"foo":1}],"int":1}}'
419-
420-
421283
def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
422284
"""Verify to_pandas_batches() APIs returns the expected dtypes."""
423285
expected = scalars_df_default_index.dtypes

0 commit comments

Comments
 (0)