pandas-dev · jreback · May 4, 2021 · May 1, 2021 · jreback · May 4, 2021
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -41,6 +41,7 @@
 from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays.base import ExtensionArray
 from pandas.core.arrays.boolean import BooleanDtype
+from pandas.core.arrays.integer import Int64Dtype
 from pandas.core.indexers import (
     check_array_indexer,
     validate_indices,
@@ -858,6 +859,14 @@ def _str_isupper(self):
         else:
             return super()._str_isupper()
 
+    def _str_len(self):
+        # utf8_length added in pyarrow 4.0.0
+        if hasattr(pc, "utf8_length"):
+            result = pc.utf8_length(self._data)
+            return Int64Dtype().__from_arrow__(result)
+        else:
+            return super()._str_len()
+
     def _str_lower(self):
         return type(self)(pc.utf8_lower(self._data))
 

diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
@@ -14,7 +14,6 @@
     MultiIndex,
     Series,
     isna,
-    notna,
 )
 import pandas._testing as tm
 
@@ -402,14 +401,19 @@ def test_join():
     tm.assert_almost_equal(rs, xp)
 
 
-def test_len():
-    values = Series(["foo", "fooo", "fooooo", np.nan, "fooooooo"])
+def test_len(any_string_dtype):
+    values = Series(
+        ["foo", "fooo", "fooooo", np.nan, "fooooooo", "foo\n", "あ"],
+        dtype=any_string_dtype,
+    )
 
     result = values.str.len()
-    exp = values.map(lambda x: len(x) if notna(x) else np.nan)
-    tm.assert_series_equal(result, exp)
+    expected_dtype = "float64" if any_string_dtype == "object" else "Int64"
+    expected = Series([3, 4, 6, np.nan, 8, 4, 1], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
 
-    # mixed
+
+def test_len_mixed():
     mixed = Series(
         [
             "a_b",