From e70e7de3809c11e02fd231b8b4310c6d8fb84e59 Mon Sep 17 00:00:00 2001 From: Taylor Packard <3.t.packard@gmail.com> Date: Wed, 22 Nov 2023 00:24:27 -0500 Subject: [PATCH 1/6] changed if len(columns) == 0 and dtype is not None to return np.empty((0,0), dtype=dtype) --- pandas/core/internals/construction.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 57dd310f6b12c..1523c0ce6fade 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -393,8 +393,15 @@ def ndarray_to_mgr( nb = new_block_2d(values, placement=bp, refs=refs) block_values = [nb] +<<<<<<< HEAD if len(columns) == 0: # TODO: check len(values) == 0? +======= + if len(columns) == 0 and dtype is not None: + block_values = np.empty((0, 0), dtype=dtype) + + elif len(columns) == 0 and dtype is None: +>>>>>>> 39c7c45d6f... changed if len(columns) == 0 and dtype is not None to return np.empty((0,0), dtype=dtype) block_values = [] return create_block_manager_from_blocks( From 63373878f7f1c9baac2125b9ee534f9fc724290f Mon Sep 17 00:00:00 2001 From: Taylor Packard <3.t.packard@gmail.com> Date: Wed, 22 Nov 2023 01:52:31 -0500 Subject: [PATCH 2/6] added test for perserving dtype when empty np.array used to make dataframe --- pandas/tests/frame/test_constructors.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index bf17b61b0e3f3..dfbd8ec8b6a65 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2758,6 +2758,13 @@ def test_frame_string_inference_block_dim(self): df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]])) assert df._mgr.blocks[0].ndim == 2 + def test_frame_from_empty_array_perserving_dtype(self): + # GH#55649 + # start of simple + np_list = [np.int8, np.int16, np.uint16, np.uint64] + for n in np_list: + assert (np.array([], dtype=n).reshape((0, 0))).dtype == n + class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): From d8183a0432449079aa02ea28db9887657b4d108c Mon Sep 17 00:00:00 2001 From: Taylor Packard <3.t.packard@gmail.com> Date: Wed, 22 Nov 2023 03:14:15 -0500 Subject: [PATCH 3/6] test --- pandas/core/internals/construction.py | 5 ----- pandas/core/internals/managers.py | 6 +++++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 1523c0ce6fade..4a2f839ea4bc4 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -393,15 +393,10 @@ def ndarray_to_mgr( nb = new_block_2d(values, placement=bp, refs=refs) block_values = [nb] -<<<<<<< HEAD - if len(columns) == 0: - # TODO: check len(values) == 0? -======= if len(columns) == 0 and dtype is not None: block_values = np.empty((0, 0), dtype=dtype) elif len(columns) == 0 and dtype is None: ->>>>>>> 39c7c45d6f... changed if len(columns) == 0 and dtype is not None to return np.empty((0,0), dtype=dtype) block_values = [] return create_block_manager_from_blocks( diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 843441e4865c7..3a157c413c06d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1683,10 +1683,14 @@ def as_array( passed_nan = lib.is_float(na_value) and isna(na_value) # TODO(CoW) handle case where resulting array is a view - if len(self.blocks) == 0: + if len(self.blocks) == 0 and dtype is None: arr = np.empty(self.shape, dtype=float) return arr.transpose() + if len(self.blocks) == 0 and dtype is not None: + arr = np.empty(self.shape, dtype=dtype) + return arr.transpose() + if self.is_single_block: blk = self.blocks[0] From bb4d0fca5c6d89154a158602e6200f73f1c02ec1 Mon Sep 17 00:00:00 2001 From: Taylor Packard <3.t.packard@gmail.com> Date: Wed, 29 Nov 2023 23:23:48 -0500 Subject: [PATCH 4/6] removed comments from tests --- pandas/tests/frame/test_constructors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 985f5f3302c24..8b938a111202d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2761,7 +2761,6 @@ def test_frame_string_inference_block_dim(self): def test_frame_from_empty_array_perserving_dtype(self): # GH#55649 - # start of simple np_list = [np.int8, np.int16, np.uint16, np.uint64] for n in np_list: assert (np.array([], dtype=n).reshape((0, 0))).dtype == n From 49efdfccc359cc752d94a6b43a989ff2ab4edb41 Mon Sep 17 00:00:00 2001 From: Taylor Packard <3.t.packard@gmail.com> Date: Wed, 29 Nov 2023 23:48:15 -0500 Subject: [PATCH 5/6] entry in the latest whatsnew/v.2.2.0.rst --- doc/source/whatsnew/v2.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 8cb4b3f24d435..ceab41ba82e5e 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -598,6 +598,7 @@ Other - Bug in :func:`infer_freq` and :meth:`DatetimeIndex.inferred_freq` with weekly frequencies and non-nanosecond resolutions (:issue:`55609`) - Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`) - Bug in :meth:`Dataframe.from_dict` which would always sort the rows of the created :class:`DataFrame`. (:issue:`55683`) +- Bug in ``dtype`` values inside a :class:`DataFrame` where initial provided dtype was lost when initializing empty DataFrame (:issue:`55649`) - Bug in rendering ``inf`` values inside a a :class:`DataFrame` with the ``use_inf_as_na`` option enabled (:issue:`55483`) - Bug in rendering a :class:`Series` with a :class:`MultiIndex` when one of the index level's names is 0 not having that name displayed (:issue:`55415`) - Bug in the error message when assigning an empty dataframe to a column (:issue:`55956`) From 0336c9766958a7829f81a709b2cd6cd48587579f Mon Sep 17 00:00:00 2001 From: Taylor Packard <3.t.packard@gmail.com> Date: Fri, 1 Dec 2023 17:56:08 -0500 Subject: [PATCH 6/6] fix to preserve original dtype --- pandas/core/internals/construction.py | 10 +++++++++- pandas/core/internals/managers.py | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 4a2f839ea4bc4..c5a7844af29c2 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -394,7 +394,15 @@ def ndarray_to_mgr( block_values = [nb] if len(columns) == 0 and dtype is not None: - block_values = np.empty((0, 0), dtype=dtype) + obj_columns = list(values) + block_values = [ + new_block( + dtype.construct_array_type()._from_sequence(data, dtype=dtype), + BlockPlacement(slice(i, i + 1)), + ndim=2, + ) + for i, data in enumerate(obj_columns) + ] elif len(columns) == 0 and dtype is None: block_values = [] diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 402ff1ec59c5a..7061653235c95 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1687,7 +1687,7 @@ def as_array( arr = np.empty(self.shape, dtype=float) return arr.transpose() - if len(self.blocks) == 0 and dtype is not None: + elif len(self.blocks) == 0 and dtype is not None: arr = np.empty(self.shape, dtype=dtype) return arr.transpose()