From f934564ea7ebf21766aaf689fa9711740be430cd Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Mon, 29 Nov 2021 11:31:59 +0100 Subject: [PATCH 1/9] Fix setitem with 1d matrix --- pandas/core/construction.py | 3 +++ pandas/tests/frame/indexing/test_setitem.py | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e3b41f2c7b8c2..f303f59efc48a 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -499,6 +499,9 @@ def sanitize_array( if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) + if isinstance(data, np.matrix): + data = data.A + if isinstance(dtype, PandasDtype): # Avoid ending up with a PandasArray dtype = dtype.numpy_dtype diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 597216f55e444..aae401b9b73b8 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -680,6 +680,19 @@ def test_boolean_mask_nullable_int64(self): ) tm.assert_frame_equal(result, expected) + def test_2d_array(self): + from scipy import sparse + + expected = DataFrame( + {"np-array": np.ones(10), "scipy-sparse": np.ones(10)}, index=np.arange(10) + ) + + df = DataFrame(index=np.arange(10)) + df["np-array"] = np.ones((10, 1)) + df["scipy-sparse"] = sparse.eye(10).sum(axis=1) + + tm.assert_frame_equal(df, expected) + class TestSetitemTZAwareValues: @pytest.fixture From 732d8d30c3a14291fbaebadb227a29aece46244d Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Mon, 29 Nov 2021 12:17:38 +0100 Subject: [PATCH 2/9] Remove test scipy dependency --- pandas/tests/frame/indexing/test_setitem.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index aae401b9b73b8..eb2fd53ce4f5a 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -681,15 +681,15 @@ def test_boolean_mask_nullable_int64(self): tm.assert_frame_equal(result, expected) def test_2d_array(self): - from scipy import sparse - + # for use-case df["x"] = sparse.random(10, 10).mean(axis=1) expected = DataFrame( - {"np-array": np.ones(10), "scipy-sparse": np.ones(10)}, index=np.arange(10) + {"np-array": np.ones(10), "np-matrix": np.ones(10)}, index=np.arange(10) ) + a = np.ones((10, 1)) df = DataFrame(index=np.arange(10)) - df["np-array"] = np.ones((10, 1)) - df["scipy-sparse"] = sparse.eye(10).sum(axis=1) + df["np-array"] = a + df["np-matrix"] = np.matrix(a) tm.assert_frame_equal(df, expected) From ccfc283540d53f0c18bc8229674b3b5c70811299 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Mon, 29 Nov 2021 13:39:16 +0100 Subject: [PATCH 3/9] Catch warning --- pandas/tests/frame/indexing/test_setitem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index eb2fd53ce4f5a..ab99a69dc6b02 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -689,7 +689,8 @@ def test_2d_array(self): a = np.ones((10, 1)) df = DataFrame(index=np.arange(10)) df["np-array"] = a - df["np-matrix"] = np.matrix(a) + with tm.assert_produces_warning(PendingDeprecationWarning): + df["np-matrix"] = np.matrix(a) tm.assert_frame_equal(df, expected) From c9278559e955dacdd9cf38d189f7976bff80ba2a Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Mon, 29 Nov 2021 16:12:57 +0100 Subject: [PATCH 4/9] Change condition to elif --- pandas/core/construction.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index f303f59efc48a..5e5e07119e113 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -498,8 +498,7 @@ def sanitize_array( """ if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) - - if isinstance(data, np.matrix): + elif isinstance(data, np.matrix): data = data.A if isinstance(dtype, PandasDtype): From fc9048d9c6942ad19d65101a460b29c95da55504 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Wed, 1 Dec 2021 18:41:11 +0100 Subject: [PATCH 5/9] rename test + move check --- pandas/core/construction.py | 4 ++-- pandas/tests/frame/indexing/test_setitem.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 5e5e07119e113..9851cd699928f 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -498,8 +498,6 @@ def sanitize_array( """ if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) - elif isinstance(data, np.matrix): - data = data.A if isinstance(dtype, PandasDtype): # Avoid ending up with a PandasArray @@ -525,6 +523,8 @@ def sanitize_array( # GH#846 if isinstance(data, np.ndarray): + if isinstance(data, np.matrix): + data = data.A if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index ab99a69dc6b02..55e44400ec1f3 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -680,7 +680,7 @@ def test_boolean_mask_nullable_int64(self): ) tm.assert_frame_equal(result, expected) - def test_2d_array(self): + def test_setitem_npmatrix_2d(self): # for use-case df["x"] = sparse.random(10, 10).mean(axis=1) expected = DataFrame( {"np-array": np.ones(10), "np-matrix": np.ones(10)}, index=np.arange(10) From 762e97d3c3b646c0569605861ad19c6b635cdfa1 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Mon, 6 Dec 2021 14:04:09 +0100 Subject: [PATCH 6/9] Add issue # --- pandas/tests/frame/indexing/test_setitem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 55e44400ec1f3..3842dd2d6f7c7 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -681,6 +681,7 @@ def test_boolean_mask_nullable_int64(self): tm.assert_frame_equal(result, expected) def test_setitem_npmatrix_2d(self): + # GH#42376 # for use-case df["x"] = sparse.random(10, 10).mean(axis=1) expected = DataFrame( {"np-array": np.ones(10), "np-matrix": np.ones(10)}, index=np.arange(10) From fcdec60b49bf8a0fee9b95242b12a5c7c655e740 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Mon, 6 Dec 2021 14:06:33 +0100 Subject: [PATCH 7/9] Comment with reason for warning --- pandas/tests/frame/indexing/test_setitem.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 3842dd2d6f7c7..393cfbb12e47e 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -690,6 +690,8 @@ def test_setitem_npmatrix_2d(self): a = np.ones((10, 1)) df = DataFrame(index=np.arange(10)) df["np-array"] = a + + # Instantiation of `np.matrix` gives PendingDeprecationWarning with tm.assert_produces_warning(PendingDeprecationWarning): df["np-matrix"] = np.matrix(a) From 34fa37ae818b956ebaa02e700d5b45554c5e0505 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Mon, 6 Dec 2021 14:16:18 +0100 Subject: [PATCH 8/9] Added what's new note --- doc/source/whatsnew/v1.3.5.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst index dabd9a650f45b..e27d79d059f42 100644 --- a/doc/source/whatsnew/v1.3.5.rst +++ b/doc/source/whatsnew/v1.3.5.rst @@ -27,7 +27,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fixed bug where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`) - .. --------------------------------------------------------------------------- From 9e7f3c802f6224e11c827ef7aafcaaf44e47b03c Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Mon, 6 Dec 2021 19:43:44 +0100 Subject: [PATCH 9/9] Skip test for array manager --- pandas/tests/frame/indexing/test_setitem.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 393cfbb12e47e..137f8b2f0cc14 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -680,6 +680,8 @@ def test_boolean_mask_nullable_int64(self): ) tm.assert_frame_equal(result, expected) + # TODO(ArrayManager) set column with 2d column array, see #44788 + @td.skip_array_manager_not_yet_implemented def test_setitem_npmatrix_2d(self): # GH#42376 # for use-case df["x"] = sparse.random(10, 10).mean(axis=1)