|
26 | 26 | import google.cloud.bigquery as bigquery
|
27 | 27 | import numpy as np
|
28 | 28 | import pandas as pd
|
| 29 | +import pandas.arrays as arrays |
| 30 | +import pyarrow as pa |
29 | 31 | import pytest
|
30 | 32 |
|
31 | 33 | import bigframes
|
@@ -829,6 +831,68 @@ def test_read_pandas_json_index(session, write_engine):
|
829 | 831 | pd.testing.assert_index_equal(actual_result, expected_index)
|
830 | 832 |
|
831 | 833 |
|
| 834 | +@pytest.mark.parametrize( |
| 835 | + ("write_engine"), |
| 836 | + [ |
| 837 | + pytest.param("default"), |
| 838 | + pytest.param("bigquery_load"), |
| 839 | + ], |
| 840 | +) |
| 841 | +def test_read_pandas_w_nested_json(session, write_engine): |
| 842 | + data = [ |
| 843 | + [{"json_field": "1"}], |
| 844 | + [{"json_field": None}], |
| 845 | + [{"json_field": '["1","3","5"]'}], |
| 846 | + [{"json_field": '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}'}], |
| 847 | + ] |
| 848 | + # PyArrow currently lacks support for creating structs or lists containing extension types. |
| 849 | + # See issue: https://github.com/apache/arrow/issues/45262 |
| 850 | + pa_array = pa.array(data, type=pa.list_(pa.struct([("name", pa.string())]))) |
| 851 | + pd_s = pd.Series( |
| 852 | + arrays.ArrowExtensionArray(pa_array), # type: ignore |
| 853 | + dtype=pd.ArrowDtype( |
| 854 | + pa.list_(pa.struct([("name", bigframes.dtypes.JSON_ARROW_TYPE)])) |
| 855 | + ), |
| 856 | + ) |
| 857 | + with pytest.raises(NotImplementedError, match="Nested JSON types, found in column"): |
| 858 | + # Until b/401630655 is resolved, json not compatible with allow_large_results=False |
| 859 | + session.read_pandas(pd_s, write_engine=write_engine).to_pandas( |
| 860 | + allow_large_results=True |
| 861 | + ) |
| 862 | + |
| 863 | + |
| 864 | +@pytest.mark.parametrize( |
| 865 | + ("write_engine"), |
| 866 | + [ |
| 867 | + pytest.param("default"), |
| 868 | + pytest.param("bigquery_load"), |
| 869 | + ], |
| 870 | +) |
| 871 | +def test_read_pandas_w_nested_json_index(session, write_engine): |
| 872 | + data = [ |
| 873 | + [{"json_field": "1"}], |
| 874 | + [{"json_field": None}], |
| 875 | + [{"json_field": '["1","3","5"]'}], |
| 876 | + [{"json_field": '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}'}], |
| 877 | + ] |
| 878 | + # PyArrow currently lacks support for creating structs or lists containing extension types. |
| 879 | + # See issue: https://github.com/apache/arrow/issues/45262 |
| 880 | + pa_array = pa.array(data, type=pa.list_(pa.struct([("name", pa.string())]))) |
| 881 | + pd_idx: pd.Index = pd.Index( |
| 882 | + arrays.ArrowExtensionArray(pa_array), # type: ignore |
| 883 | + dtype=pd.ArrowDtype( |
| 884 | + pa.list_(pa.struct([("name", bigframes.dtypes.JSON_ARROW_TYPE)])) |
| 885 | + ), |
| 886 | + ) |
| 887 | + with pytest.raises( |
| 888 | + NotImplementedError, match="Nested JSON types, found in the index" |
| 889 | + ): |
| 890 | + # Until b/401630655 is resolved, json not compatible with allow_large_results=False |
| 891 | + session.read_pandas(pd_idx, write_engine=write_engine).to_pandas( |
| 892 | + allow_large_results=True |
| 893 | + ) |
| 894 | + |
| 895 | + |
832 | 896 | @utils.skip_legacy_pandas
|
833 | 897 | @pytest.mark.parametrize(
|
834 | 898 | ("write_engine",),
|
|
0 commit comments