From 6dd80f4e8b4cb24383244c0b275846ebe1c17de2 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Fri, 13 Aug 2021 00:40:25 +0530 Subject: [PATCH 1/3] BUG: Different initialization methods lead to different dtypes (DataFrame) --- pandas/core/construction.py | 3 ++- pandas/tests/dtypes/test_dtypes.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index f84aaa907f3fc..b7c179d36d886 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -709,7 +709,8 @@ def _try_cast( else: # i.e. list - varr = np.array(arr, copy=False) + # GH #42971 making dtype=object + varr = np.array(arr, copy=False, dtype=object) # filter out cases that we _dont_ want to go through # maybe_infer_to_datetimelike if varr.dtype != object or varr.size == 0: diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index abb29ce66fd34..c1b52b58f95e8 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -1095,3 +1095,10 @@ def test_period_dtype_compare_to_string(): dtype = PeriodDtype(freq="M") assert (dtype == "period[M]") is True assert (dtype != "period[M]") is False + + +def test_constructor_dtype(): + # GH#42971 + expected = pd.DataFrame(columns=["a"]) + result = pd.DataFrame({"a": []}) + tm.assert_frame_equal(result, expected) From 3cdcb0b841e4a309f4d5edb824eb24b96bbe899e Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Fri, 13 Aug 2021 19:51:50 +0530 Subject: [PATCH 2/3] BUG: seperate df dtypes from different initialization --- pandas/tests/dtypes/test_dtypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index c1b52b58f95e8..414f81fdd5a2d 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -1097,8 +1097,8 @@ def test_period_dtype_compare_to_string(): assert (dtype != "period[M]") is False -def test_constructor_dtype(): +def test_dataframe_constructor_dtype(): # GH#42971 expected = pd.DataFrame(columns=["a"]) result = pd.DataFrame({"a": []}) - tm.assert_frame_equal(result, expected) + tm.assert_series_equal(result.dtypes, expected.dtypes) From 62115835e434fa8762d83b577e5c2c730bf9ec94 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sun, 15 Aug 2021 15:46:26 +0530 Subject: [PATCH 3/3] check if dict value empty --- pandas/core/construction.py | 3 +-- pandas/core/internals/construction.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index b7c179d36d886..f84aaa907f3fc 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -709,8 +709,7 @@ def _try_cast( else: # i.e. list - # GH #42971 making dtype=object - varr = np.array(arr, copy=False, dtype=object) + varr = np.array(arr, copy=False) # filter out cases that we _dont_ want to go through # maybe_infer_to_datetimelike if varr.dtype != object or varr.size == 0: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 7f3d246a6fda6..1a85dbe70a5e3 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -456,6 +456,17 @@ def dict_to_mgr( ] # TODO: can we get rid of the dt64tz special case above? + if dtype is None and index is None: + data_len = 0 + for each in data.values(): + if isinstance(each, (int, str)): + data_len += 1 + elif each is None: + continue + else: + data_len += len(each) + if data_len == 0: + dtype = object return arrays_to_mgr( arrays, data_names, index, columns, dtype=dtype, typ=typ, consolidate=copy )