BUG: Handle zero-chunked pyarrow.ChunkedArray in StringArray

xhochy · xhochy · commit 3dab96d87f3e · 2021-04-20T10:06:17.000+02:00
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -700,7 +700,7 @@ Conversion
 Strings
 ^^^^^^^
 
--
+- Bug in the conversion from ``pyarrow.ChunkedArray`` to :class:`StringArray` when the original had zero chunks (:issue:`41040`)
 -
 
 Interval
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -118,7 +118,10 @@ def __from_arrow__(
             str_arr = StringArray._from_sequence(np.array(arr))
             results.append(str_arr)
 
-        return StringArray._concat_same_type(results)
+        if len(results) > 0:
+            return StringArray._concat_same_type(results)
+        else:
+            return StringArray(np.array([], dtype="object"))
 
 
 class StringArray(PandasArray):
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
@@ -476,6 +476,22 @@ def test_arrow_roundtrip(dtype, dtype_object):
     assert result.loc[2, "a"] is pd.NA
 
 
+@td.skip_if_no("pyarrow", min_version="0.15.1.dev")
+def test_arrow_load_from_zero_chunks(dtype, dtype_object):
+    # GH-41040
+    import pyarrow as pa
+
+    data = pd.array([], dtype=dtype)
+    df = pd.DataFrame({"a": data})
+    table = pa.table(df)
+    assert table.field("a").type == "string"
+    # Instantiate the same table with no chunks at all
+    table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema)
+    result = table.to_pandas()
+    assert isinstance(result["a"].dtype, dtype_object)
+    tm.assert_frame_equal(result, df)
+
+
 def test_value_counts_na(dtype):
     arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype)
     result = arr.value_counts(dropna=False)

Original file line number	Diff line number	Diff line change
`@@ -700,7 +700,7 @@ Conversion`
`700`	`700`	`Strings`
`701`	`701`	`^^^^^^^`
`702`	`702`
`703`		`--`
	`703`	+- Bug in the conversion from ``pyarrow.ChunkedArray`` to :class:`StringArray` when the original had zero chunks (:issue:`41040`)
`704`	`704`	`-`
`705`	`705`
`706`	`706`	`Interval`