googleapis · chelsea-lin · Mar 21, 2025 · Mar 21, 2025 · Mar 21, 2025 · Mar 21, 2025
@@ -22,13 +22,6 @@
 import bigframes.pandas as bpd
 
 
-@pytest.fixture(scope="module", autouse=True)
-def use_large_query_path():
-    # b/401630655
-    with bpd.option_context("bigquery.allow_large_results", True):
-        yield
-
-
 @pytest.mark.parametrize(
     ("json_path", "expected_json"),
     [
@@ -39,12 +32,14 @@ def use_large_query_path():
 def test_json_set_at_json_path(json_path, expected_json):
     original_json = ['{"a": {"b": {"c": "tester", "d": []}}}']
     s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
-    actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
 
+    actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
     expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
+
+    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual.to_pandas(allow_large_results=True),
+        expected.to_pandas(allow_large_results=True),
     )
 
 
@@ -63,11 +58,12 @@ def test_json_set_at_json_value_type(json_value, expected_json):
     original_json = ['{"a": {"b": "dev"}}', '{"a": {"b": [1, 2]}}']
     s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
     actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b", json_value)])
-
     expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
+
+    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual.to_pandas(allow_large_results=True),
+        expected.to_pandas(allow_large_results=True),
     )
 
 
@@ -80,18 +76,14 @@ def test_json_set_w_more_pairs():
 
     expected_json = ['{"a": 3, "b": 2}', '{"a": 4, "b": 2}', '{"a": 5, "b": 2, "c": 1}']
     expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
+
+    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual.to_pandas(allow_large_results=True),
+        expected.to_pandas(allow_large_results=True),
     )
 
 
-def test_json_set_w_invalid_json_path_value_pairs():
-    s = bpd.Series(['{"a": 10}'], dtype=dtypes.JSON_DTYPE)
-    with pytest.raises(ValueError):
-        bbq.json_set(s, json_path_value_pairs=[("$.a", 1, 100)])  # type: ignore
-
-
 def test_json_set_w_invalid_value_type():
     s = bpd.Series(['{"a": 10}'], dtype=dtypes.JSON_DTYPE)
     with pytest.raises(TypeError):
@@ -119,11 +111,13 @@ def test_json_extract_from_json():
         ['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
         dtype=dtypes.JSON_DTYPE,
     )
-    actual = bbq.json_extract(s, "$.a.b").to_pandas()
-    expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE).to_pandas()
+    actual = bbq.json_extract(s, "$.a.b")
+    expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE)
+
+    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
     pd.testing.assert_series_equal(
-        actual,
-        expected,
+        actual.to_pandas(allow_large_results=True),
+        expected.to_pandas(allow_large_results=True),
     )
 
 
@@ -134,9 +128,11 @@ def test_json_extract_from_string():
     )
     actual = bbq.json_extract(s, "$.a.b")
     expected = bpd.Series(["[1,2]", None, "0"], dtype=pd.StringDtype(storage="pyarrow"))
+
+    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual.to_pandas(allow_large_results=True),
+        expected.to_pandas(allow_large_results=True),
     )
 
 
@@ -169,9 +165,10 @@ def test_json_extract_array_from_json():
     expected.index.name = None
     expected.name = None
 
+    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual.to_pandas(allow_large_results=True),
+        expected.to_pandas(allow_large_results=True),
     )
 
 
@@ -185,9 +182,11 @@ def test_json_extract_array_from_json_strings():
         [['"ab"', '"2"', '"3 xy"'], [], ['"4"', '"5"'], None],
         dtype=pd.ArrowDtype(pa.list_(pa.string())),
     )
+
+    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual.to_pandas(allow_large_results=True),
+        expected.to_pandas(allow_large_results=True),
     )
 
 
@@ -201,9 +200,11 @@ def test_json_extract_array_from_json_array_strings():
         [["1", "2", "3"], [], ["4", "5"]],
         dtype=pd.ArrowDtype(pa.list_(pa.string())),
     )
+
+    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual.to_pandas(allow_large_results=True),
+        expected.to_pandas(allow_large_results=True),
     )
 
 
@@ -217,37 +218,45 @@ def test_json_extract_string_array_from_json_strings():
     s = bpd.Series(['{"a": ["ab", "2", "3 xy"]}', '{"a": []}', '{"a": ["4","5"]}'])
     actual = bbq.json_extract_string_array(s, "$.a")
     expected = bpd.Series([["ab", "2", "3 xy"], [], ["4", "5"]])
+
+    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual.to_pandas(allow_large_results=True),
+        expected.to_pandas(allow_large_results=True),
     )
 
 
 def test_json_extract_string_array_from_array_strings():
     s = bpd.Series(["[1, 2, 3]", "[]", "[4,5]"])
     actual = bbq.json_extract_string_array(s)
     expected = bpd.Series([["1", "2", "3"], [], ["4", "5"]])
+
+    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual.to_pandas(allow_large_results=True),
+        expected.to_pandas(allow_large_results=True),
     )
 
 
 def test_json_extract_string_array_as_float_array_from_array_strings():
     s = bpd.Series(["[1, 2.5, 3]", "[]", "[4,5]"])
     actual = bbq.json_extract_string_array(s, value_dtype=dtypes.FLOAT_DTYPE)
     expected = bpd.Series([[1, 2.5, 3], [], [4, 5]])
+
+    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual.to_pandas(allow_large_results=True),
+        expected.to_pandas(allow_large_results=True),
     )
 
 
 def test_json_extract_string_array_w_invalid_series_type():
+    s = bpd.Series([1, 2])
     with pytest.raises(TypeError):
-        bbq.json_extract_string_array(bpd.Series([1, 2]))
+        bbq.json_extract_string_array(s)
 
 
 def test_parse_json_w_invalid_series_type():
+    s = bpd.Series([1, 2])
     with pytest.raises(TypeError):
-        bbq.parse_json(bpd.Series([1, 2]))
+        bbq.parse_json(s)
@@ -14,7 +14,6 @@
 
 from typing import Tuple
 
-import db_dtypes  # type:ignore
 import google.api_core.exceptions
 import pandas as pd
 import pandas.testing
@@ -281,143 +280,6 @@ def test_to_arrow_override_global_option(scalars_df_index):
         assert scalars_df_index._query_job.destination.table_id == table_id
 
 
-def test_load_json_w_json_string_items(session):
-    sql = """
-        SELECT 0 AS id, JSON_OBJECT('boolean', True) AS json_col,
-        UNION ALL
-        SELECT 1, JSON_OBJECT('int', 100),
-        UNION ALL
-        SELECT 2, JSON_OBJECT('float', 0.98),
-        UNION ALL
-        SELECT 3, JSON_OBJECT('string', 'hello world'),
-        UNION ALL
-        SELECT 4, JSON_OBJECT('array', [8, 9, 10]),
-        UNION ALL
-        SELECT 5, JSON_OBJECT('null', null),
-        UNION ALL
-        SELECT 6, JSON_OBJECT('b', 2, 'a', 1),
-        UNION ALL
-        SELECT
-            7,
-            JSON_OBJECT(
-                'dict',
-                JSON_OBJECT(
-                    'int', 1,
-                    'array', [JSON_OBJECT('foo', 1), JSON_OBJECT('bar', 'hello')]
-                )
-            ),
-    """
-    df = session.read_gbq(sql, index_col="id")
-
-    assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
-
-    assert df["json_col"][0] == '{"boolean":true}'
-    assert df["json_col"][1] == '{"int":100}'
-    assert df["json_col"][2] == '{"float":0.98}'
-    assert df["json_col"][3] == '{"string":"hello world"}'
-    assert df["json_col"][4] == '{"array":[8,9,10]}'
-    assert df["json_col"][5] == '{"null":null}'
-
-    # Verifies JSON strings preserve array order, regardless of dictionary key order.
-    assert df["json_col"][6] == '{"a":1,"b":2}'
-    assert df["json_col"][7] == '{"dict":{"array":[{"foo":1},{"bar":"hello"}],"int":1}}'
-
-
-def test_load_json_to_pandas_has_correct_result(session):
-    df = session.read_gbq("SELECT JSON_OBJECT('foo', 10, 'bar', TRUE) AS json_col")
-    assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
-    result = df.to_pandas()
-
-    # These JSON strings are compatible with BigQuery's JSON storage,
-    pd_df = pd.DataFrame(
-        {"json_col": ['{"bar":true,"foo":10}']},
-        dtype=pd.ArrowDtype(db_dtypes.JSONArrowType()),
-    )
-    pd_df.index = pd_df.index.astype("Int64")
-    pd.testing.assert_series_equal(result.dtypes, pd_df.dtypes)
-    pd.testing.assert_series_equal(result["json_col"], pd_df["json_col"])
-
-
-def test_load_json_in_struct(session):
-    """Avoid regressions for internal issue 381148539."""
-    sql = """
-        SELECT 0 AS id, STRUCT(JSON_OBJECT('boolean', True) AS data, 1 AS number) AS struct_col
-        UNION ALL
-        SELECT 1, STRUCT(JSON_OBJECT('int', 100), 2),
-        UNION ALL
-        SELECT 2, STRUCT(JSON_OBJECT('float', 0.98), 3),
-        UNION ALL
-        SELECT 3, STRUCT(JSON_OBJECT('string', 'hello world'), 4),
-        UNION ALL
-        SELECT 4, STRUCT(JSON_OBJECT('array', [8, 9, 10]), 5),
-        UNION ALL
-        SELECT 5, STRUCT(JSON_OBJECT('null', null), 6),
-        UNION ALL
-        SELECT
-            6,
-            STRUCT(JSON_OBJECT(
-                'dict',
-                JSON_OBJECT(
-                    'int', 1,
-                    'array', [JSON_OBJECT('foo', 1), JSON_OBJECT('bar', 'hello')]
-                )
-            ), 7),
-    """
-    df = session.read_gbq(sql, index_col="id")
-
-    assert isinstance(df.dtypes["struct_col"], pd.ArrowDtype)
-    assert isinstance(df.dtypes["struct_col"].pyarrow_dtype, pa.StructType)
-
-    data = df["struct_col"].struct.field("data")
-    assert data.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
-
-    assert data[0] == '{"boolean":true}'
-    assert data[1] == '{"int":100}'
-    assert data[2] == '{"float":0.98}'
-    assert data[3] == '{"string":"hello world"}'
-    assert data[4] == '{"array":[8,9,10]}'
-    assert data[5] == '{"null":null}'
-    assert data[6] == '{"dict":{"array":[{"foo":1},{"bar":"hello"}],"int":1}}'
-
-
-def test_load_json_in_array(session):
-    sql = """
-        SELECT
-            0 AS id,
-            [
-                JSON_OBJECT('boolean', True),
-                JSON_OBJECT('int', 100),
-                JSON_OBJECT('float', 0.98),
-                JSON_OBJECT('string', 'hello world'),
-                JSON_OBJECT('array', [8, 9, 10]),
-                JSON_OBJECT('null', null),
-                JSON_OBJECT(
-                    'dict',
-                    JSON_OBJECT(
-                        'int', 1,
-                        'array', [JSON_OBJECT('bar', 'hello'), JSON_OBJECT('foo', 1)]
-                    )
-                )
-            ] AS array_col,
-    """
-    df = session.read_gbq(sql, index_col="id")
-
-    assert isinstance(df.dtypes["array_col"], pd.ArrowDtype)
-    assert isinstance(df.dtypes["array_col"].pyarrow_dtype, pa.ListType)
-
-    data = df["array_col"].list
-    assert data.len()[0] == 7
-    assert data[0].dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
-
-    assert data[0][0] == '{"boolean":true}'
-    assert data[1][0] == '{"int":100}'
-    assert data[2][0] == '{"float":0.98}'
-    assert data[3][0] == '{"string":"hello world"}'
-    assert data[4][0] == '{"array":[8,9,10]}'
-    assert data[5][0] == '{"null":null}'
-    assert data[6][0] == '{"dict":{"array":[{"bar":"hello"},{"foo":1}],"int":1}}'
-
-
 def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
     """Verify to_pandas_batches() APIs returns the expected dtypes."""
     expected = scalars_df_default_index.dtypes