From 84e8ba41482eafd4154f970c7212c8ee1812450a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 2 Feb 2024 13:49:35 +0000 Subject: [PATCH 1/3] BUG: ensure_string_array might modify read-only array inplace --- pandas/_libs/lib.pyx | 3 +++ pandas/tests/copy_view/test_astype.py | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5eb20960f0e3d..50feda8fb188e 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -770,6 +770,9 @@ cpdef ndarray[object] ensure_string_array( result = result.copy() elif not copy and result is arr: already_copied = False + elif not copy and not result.flags.writeable: + # Weird edge case where result is a view + already_copied = False if issubclass(arr.dtype.type, np.str_): # short-circuit, all elements are str diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index 3c1a157dd2c6a..18470a29fadc2 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -139,6 +139,16 @@ def test_astype_string_copy_on_pickle_roundrip(): tm.assert_series_equal(base, base_copy) +def test_astype_string_read_only_on_pickle_roundrip(): + # https://github.com/pandas-dev/pandas/issues/54654 + # ensure_string_array may alter read-only array inplace + base = Series(np.array([(1, 2), None, 1], dtype="object")) + base_copy = pickle.loads(pickle.dumps(base)) + base_copy._values.flags.writeable = False + base_copy.astype("string[pyarrow]", copy=False) + tm.assert_series_equal(base, base_copy) + + def test_astype_dict_dtypes(using_copy_on_write): df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")} From c387e60dab99ae10542345755eb7d1fe5368fe49 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 2 Feb 2024 13:50:56 +0000 Subject: [PATCH 2/3] BUG: ensure_string_array might modify read-only array inplace --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f9117253b61c1..3ff2d9bee8a5c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -159,7 +159,7 @@ Numeric Conversion ^^^^^^^^^^ -- +- Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Strings From 8d5769f52a091fd1940e05d4e401c0d1153eb5c8 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 2 Feb 2024 14:19:31 +0000 Subject: [PATCH 3/3] Fix pyarrow installed error --- pandas/tests/copy_view/test_astype.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index 18470a29fadc2..f280e2143fee0 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -4,6 +4,7 @@ import pytest from pandas.compat.pyarrow import pa_version_under12p0 +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -139,6 +140,7 @@ def test_astype_string_copy_on_pickle_roundrip(): tm.assert_series_equal(base, base_copy) +@td.skip_if_no("pyarrow") def test_astype_string_read_only_on_pickle_roundrip(): # https://github.com/pandas-dev/pandas/issues/54654 # ensure_string_array may alter read-only array inplace