From be2c9778dba88cf8ddc887b161dd886cc7aa5e5d Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 8 May 2024 18:07:35 -0400
Subject: [PATCH 1/4] BUG: DataFrame constructor defaulting to float dtype on
 empty input

---
 pandas/core/construction.py                   |  4 ++--
 pandas/core/frame.py                          | 14 ++++++-------
 pandas/core/groupby/generic.py                |  2 +-
 pandas/core/internals/managers.py             |  2 +-
 .../tests/arrays/categorical/test_missing.py  |  4 ++--
 pandas/tests/frame/methods/test_quantile.py   |  7 +++++--
 pandas/tests/frame/methods/test_reindex.py    |  2 +-
 pandas/tests/frame/test_reductions.py         |  2 +-
 pandas/tests/frame/test_stack_unstack.py      | 15 +++++++++-----
 pandas/tests/groupby/methods/test_quantile.py |  2 +-
 pandas/tests/groupby/test_apply.py            |  4 +---
 pandas/tests/groupby/test_groupby.py          |  4 ++--
 pandas/tests/groupby/test_grouping.py         | 10 +++-------
 pandas/tests/indexing/test_partial.py         |  6 +++++-
 pandas/tests/resample/test_datetime_index.py  |  2 +-
 pandas/tests/reshape/concat/test_concat.py    |  2 +-
 pandas/tests/reshape/concat/test_empty.py     |  2 +-
 pandas/tests/reshape/test_melt.py             | 20 ++++++++++++++++---
 pandas/tests/series/test_constructors.py      |  2 +-
 pandas/tests/window/test_groupby.py           |  6 +++---
 pandas/tests/window/test_timeseries_window.py |  2 +-
 21 files changed, 66 insertions(+), 48 deletions(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 2718e9819cdf8..2a21492bde2f4 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -652,8 +652,8 @@ def sanitize_array(
         data = list(data)
 
         if len(data) == 0 and dtype is None:
-            # We default to float64, matching numpy
-            subarr = np.array([], dtype=np.float64)
+            # We default to object, diverging from NumPy
+            subarr = np.array([], dtype=np.object_)
 
         elif dtype is not None:
             subarr = _try_cast(data, dtype, copy)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a4decab6e8a2b..c51c7f8a5e0fe 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -13059,16 +13059,14 @@ def quantile(
                 interpolation=interpolation,
                 method=method,
             )
-            if method == "single":
-                res = res_df.iloc[0]
-            else:
-                # cannot directly iloc over sparse arrays
-                res = res_df.T.iloc[:, 0]
+            res = res_df.iloc[0]
             if axis == 1 and len(self) == 0:
                 # GH#41544 try to get an appropriate dtype
-                dtype = find_common_type(list(self.dtypes))
-                if needs_i8_conversion(dtype):
-                    return res.astype(dtype)
+                dtype = "float64"
+                cdtype = find_common_type(list(self.dtypes))
+                if needs_i8_conversion(cdtype):
+                    dtype = cdtype
+                return res.astype(dtype)
             return res
 
         q = Index(q, dtype=np.float64)
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 0a048d11d0b4d..6703b3dcfcbbd 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -578,7 +578,7 @@ def _transform_general(
             concatenated = concat(results, ignore_index=True)
             result = self._set_result_index_ordered(concatenated)
         else:
-            result = self.obj._constructor(dtype=np.float64)
+            result = self.obj._constructor(dtype=self.obj.dtype)
 
         result.name = self.obj.name
         return result
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 7c1bcbec1d3f2..0cc9d549d7566 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1778,7 +1778,7 @@ def as_array(
         passed_nan = lib.is_float(na_value) and isna(na_value)
 
         if len(self.blocks) == 0:
-            arr = np.empty(self.shape, dtype=float)
+            arr = np.empty(self.shape, dtype=object)
             return arr.transpose()
 
         if self.is_single_block:
diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py
index 9d4b78ce9944e..4765cbd8d3097 100644
--- a/pandas/tests/arrays/categorical/test_missing.py
+++ b/pandas/tests/arrays/categorical/test_missing.py
@@ -122,9 +122,9 @@ def test_compare_categorical_with_missing(self, a1, a2, categories):
         "na_value, dtype",
         [
             (pd.NaT, "datetime64[ns]"),
-            (None, "float64"),
+            (None, "object"),
             (np.nan, "float64"),
-            (pd.NA, "float64"),
+            (pd.NA, "object"),
         ],
     )
     def test_categorical_only_missing_values_no_cast(self, na_value, dtype):
diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
index 32ae4c0ff2f50..842d2c3a416d5 100644
--- a/pandas/tests/frame/methods/test_quantile.py
+++ b/pandas/tests/frame/methods/test_quantile.py
@@ -81,7 +81,7 @@ def test_quantile(self, datetime_frame, interp_method, request):
     def test_empty(self, interp_method):
         interpolation, method = interp_method
         q = DataFrame({"x": [], "y": []}).quantile(
-            0.1, axis=0, numeric_only=True, interpolation=interpolation, method=method
+            0.1, axis=0, interpolation=interpolation, method=method
         )
         assert np.isnan(q["x"]) and np.isnan(q["y"])
 
@@ -319,8 +319,11 @@ def test_quantile_multi_empty(self, interp_method):
         result = DataFrame({"x": [], "y": []}).quantile(
             [0.1, 0.9], axis=0, interpolation=interpolation, method=method
         )
+        dtype = "float64" if method == "single" else "object"
         expected = DataFrame(
-            {"x": [np.nan, np.nan], "y": [np.nan, np.nan]}, index=[0.1, 0.9]
+            {"x": [np.nan, np.nan], "y": [np.nan, np.nan]},
+            index=[0.1, 0.9],
+            dtype=dtype,
         )
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py
index 45109991c4553..3452e2796d16c 100644
--- a/pandas/tests/frame/methods/test_reindex.py
+++ b/pandas/tests/frame/methods/test_reindex.py
@@ -77,7 +77,7 @@ def test_setitem_reset_index_dtypes(self):
         df1["d"] = []
         result = df1.reset_index()
         expected = DataFrame(columns=["a", "b", "c", "d"], index=range(0)).astype(
-            {"a": "datetime64[ns]", "b": np.int64, "c": np.float64, "d": np.float64}
+            {"a": "datetime64[ns]", "b": np.int64, "c": np.float64, "d": np.object_}
         )
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 5118561f67338..330c937729b55 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -1627,7 +1627,7 @@ def test_min_max_dt64_api_consistency_empty_df(self):
         # check DataFrame/Series api consistency when calling min/max on an empty
         # DataFrame/Series.
         df = DataFrame({"x": []})
-        expected_float_series = Series([], dtype=float)
+        expected_float_series = Series([], dtype=object)
         # check axis 0
         assert np.isnan(df.min(axis=0).x) == np.isnan(expected_float_series.min())
         assert np.isnan(df.max(axis=0).x) == np.isnan(expected_float_series.max())
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index 03db284d892e3..c45d46607982a 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -1418,11 +1418,12 @@ def test_stack_timezone_aware_values(future_stack):
 def test_stack_empty_frame(dropna, future_stack):
     # GH 36113
     levels = [np.array([], dtype=np.int64), np.array([], dtype=np.int64)]
-    expected = Series(dtype=np.float64, index=MultiIndex(levels=levels, codes=[[], []]))
+    expected = Series(dtype=np.object_, index=MultiIndex(levels=levels, codes=[[], []]))
     if future_stack and dropna is not lib.no_default:
         with pytest.raises(ValueError, match="dropna must be unspecified"):
             DataFrame(dtype=np.float64).stack(dropna=dropna, future_stack=future_stack)
     else:
+        # dtype=np.float64 is lost since there are no columns
         result = DataFrame(dtype=np.float64).stack(
             dropna=dropna, future_stack=future_stack
         )
@@ -1612,7 +1613,9 @@ def test_unstack(self, multiindex_year_month_day_dataframe_random_data):
             (
                 [[1, 1, None, None, 30.0], [2, None, None, None, 30.0]],
                 ["ix1", "ix2", "col1", "col2", "col3"],
-                None,
+                # Nones are used as floats in the presence of numeric data,
+                # resulting in np.nan for index level 1.
+                np.nan,
                 [None, None, 30.0],
             ),
         ],
@@ -1624,10 +1627,12 @@ def test_unstack_partial(
         # https://github.com/pandas-dev/pandas/issues/19351
         # make sure DataFrame.unstack() works when its run on a subset of the DataFrame
         # and the Index levels contain values that are not present in the subset
-        result = DataFrame(result_rows, columns=result_columns).set_index(
-            ["ix1", "ix2"]
+        data = (
+            DataFrame(result_rows, columns=result_columns)
+            .set_index(["ix1", "ix2"])
+            .iloc[1:2]
         )
-        result = result.iloc[1:2].unstack("ix2")
+        result = data.unstack("ix2")
         expected = DataFrame(
             [expected_row],
             columns=MultiIndex.from_product(
diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
index af0deba138469..70816e7fd1da7 100644
--- a/pandas/tests/groupby/methods/test_quantile.py
+++ b/pandas/tests/groupby/methods/test_quantile.py
@@ -192,7 +192,7 @@ def test_quantile_missing_group_values_no_segfaults():
         ([1.0, np.nan, 2.0, 2.0], range(4), [1.0, 2.0], [0.0, 2.5]),
         (["a", "b", "b", np.nan], range(4), ["a", "b"], [0, 1.5]),
         ([0], [42], [0], [42.0]),
-        ([], [], np.array([], dtype="float64"), np.array([], dtype="float64")),
+        ([], np.array([], dtype="float64"), [], np.array([], dtype="float64")),
     ],
 )
 def test_quantile_missing_group_values_correct_results(
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index e27c782c1bdcf..24ac7e8c4fa94 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1479,9 +1479,7 @@ def test_empty_df(method, op):
     group = getattr(gb, "b")
 
     result = getattr(group, method)(op)
-    expected = Series(
-        [], name="b", dtype="float64", index=Index([], dtype="float64", name="a")
-    )
+    expected = Series([], name="b", index=Index([], name="a"))
 
     tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index b99ef2a0e840d..a58e10d8005d1 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1116,10 +1116,10 @@ def convert_force_pure(x):
 def test_groupby_dtype_inference_empty():
     # GH 6733
     df = DataFrame({"x": [], "range": np.arange(0, dtype="int64")})
-    assert df["x"].dtype == np.float64
+    assert df["x"].dtype == np.object_
 
     result = df.groupby("x").first()
-    exp_index = Index([], name="x", dtype=np.float64)
+    exp_index = Index([], name="x", dtype=np.object_)
     expected = DataFrame({"range": Series([], index=exp_index, dtype="int64")})
     tm.assert_frame_equal(result, expected, by_blocks=True)
 
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 39eadd32f300d..3a62bb48ba5b7 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -739,19 +739,15 @@ def test_list_grouper_with_nat(self):
         [
             (
                 "transform",
-                Series(name=2, dtype=np.float64),
+                Series(name=2),
             ),
             (
                 "agg",
-                Series(
-                    name=2, dtype=np.float64, index=Index([], dtype=np.float64, name=1)
-                ),
+                Series(name=2, index=Index([], name=1)),
             ),
             (
                 "apply",
-                Series(
-                    name=2, dtype=np.float64, index=Index([], dtype=np.float64, name=1)
-                ),
+                Series(name=2, index=Index([], name=1)),
             ),
         ],
     )
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index b0a041ed5b69c..71cfa850cc855 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -119,7 +119,6 @@ def test_partial_set_empty_frame3(self):
         expected = DataFrame(
             columns=Index(["foo"], dtype=object), index=Index([], dtype="int64")
         )
-        expected["foo"] = expected["foo"].astype("float64")
 
         df = DataFrame(index=Index([], dtype="int64"))
         df["foo"] = []
@@ -128,6 +127,11 @@ def test_partial_set_empty_frame3(self):
 
         df = DataFrame(index=Index([], dtype="int64"))
         df["foo"] = Series(np.arange(len(df)), dtype="float64")
+        expected = DataFrame(
+            columns=Index(["foo"], dtype=object),
+            index=Index([], dtype="int64"),
+            dtype="float64",
+        )
 
         tm.assert_frame_equal(df, expected)
 
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index 5ee9b65ba9ae7..f57da2a8c6d27 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -2009,7 +2009,7 @@ def test_resample_empty_series_with_tz():
     expected_idx = DatetimeIndex(
         [], freq="2MS", name="ts", dtype="datetime64[ns, Atlantic/Faroe]"
     )
-    expected = Series([], index=expected_idx, name="values", dtype="float64")
+    expected = Series([], index=expected_idx, name="values")
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
index f86cc0c69d363..a51a128a3e7f0 100644
--- a/pandas/tests/reshape/concat/test_concat.py
+++ b/pandas/tests/reshape/concat/test_concat.py
@@ -572,7 +572,7 @@ def test_concat_empty_and_non_empty_frame_regression():
     # GH 18178 regression test
     df1 = DataFrame({"foo": [1]})
     df2 = DataFrame({"foo": []})
-    expected = DataFrame({"foo": [1.0]})
+    expected = DataFrame({"foo": [1]}, dtype="object")
     result = concat([df1, df2])
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py
index 06d57c48df817..ea02ceb2fcb30 100644
--- a/pandas/tests/reshape/concat/test_empty.py
+++ b/pandas/tests/reshape/concat/test_empty.py
@@ -90,7 +90,7 @@ def test_concat_empty_series_timelike(self, tz, values):
         expected = DataFrame(
             {
                 0: Series([pd.NaT] * len(values), dtype="M8[ns]").dt.tz_localize(tz),
-                1: values,
+                1: Series(values, dtype=dtype),
             }
         )
         result = concat([first, second], axis=1)
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
index f224a45ca3279..21107e2d8fb58 100644
--- a/pandas/tests/reshape/test_melt.py
+++ b/pandas/tests/reshape/test_melt.py
@@ -924,7 +924,14 @@ def test_invalid_separator(self):
             "A": [],
             "B": [],
         }
-        expected = DataFrame(exp_data).astype({"year": np.int64})
+        expected = DataFrame(exp_data).astype(
+            {
+                "A2010": np.float64,
+                "A2011": np.float64,
+                "B2010": np.float64,
+                "year": np.int64,
+            }
+        )
         expected = expected.set_index(["id", "year"])[
             ["X", "A2010", "A2011", "B2010", "A", "B"]
         ]
@@ -987,7 +994,14 @@ def test_invalid_suffixtype(self):
             "A": [],
             "B": [],
         }
-        expected = DataFrame(exp_data).astype({"year": np.int64})
+        expected = DataFrame(exp_data).astype(
+            {
+                "Aone": np.float64,
+                "Atwo": np.float64,
+                "Bone": np.float64,
+                "year": np.int64,
+            }
+        )
 
         expected = expected.set_index(["id", "year"])
         expected.index = expected.index.set_levels([0, 1], level=0)
@@ -1211,7 +1225,7 @@ def test_missing_stubname(self, dtype):
             name=("id", "num"),
         )
         expected = DataFrame(
-            {"a": [100, 200, 300, 400], "b": [np.nan] * 4},
+            {"a": [100, 200, 300, 400], "b": pd.Series([np.nan] * 4, dtype="object")},
             index=index,
         )
         new_level = expected.index.levels[0].astype(dtype)
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 3f9d5bbe806bb..03c7cf4ec3ee8 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -1416,7 +1416,7 @@ def test_constructor_dict_tuple_indexer(self):
         data = {(1, 1, None): -1.0}
         result = Series(data)
         expected = Series(
-            -1.0, index=MultiIndex(levels=[[1], [1], [np.nan]], codes=[[0], [0], [-1]])
+            -1.0, index=MultiIndex(levels=[[1], [1], []], codes=[[0], [0], [-1]])
         )
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
index 120470b09a92b..983d4f524f209 100644
--- a/pandas/tests/window/test_groupby.py
+++ b/pandas/tests/window/test_groupby.py
@@ -549,7 +549,7 @@ def test_groupby_rolling_empty_frame(self):
         # GH-38057 from_tuples gives empty object dtype, we now get float/int levels
         # expected.index = MultiIndex.from_tuples([], names=["s1", None])
         expected.index = MultiIndex.from_product(
-            [Index([], dtype="float64"), Index([], dtype="int64")], names=["s1", None]
+            [Index([]), Index([], dtype="int64")], names=["s1", None]
         )
         tm.assert_frame_equal(result, expected)
 
@@ -559,8 +559,8 @@ def test_groupby_rolling_empty_frame(self):
         expected = expected.drop(columns=["s1", "s2"])
         expected.index = MultiIndex.from_product(
             [
-                Index([], dtype="float64"),
-                Index([], dtype="float64"),
+                Index([]),
+                Index([]),
                 Index([], dtype="int64"),
             ],
             names=["s1", "s2", None],
diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py
index 820b0134cc577..82fb5fda9ff06 100644
--- a/pandas/tests/window/test_timeseries_window.py
+++ b/pandas/tests/window/test_timeseries_window.py
@@ -671,7 +671,7 @@ def test_rolling_on_empty(self):
         # GH-32385
         df = DataFrame({"column": []}, index=[])
         result = df.rolling("5s").min()
-        expected = DataFrame({"column": []}, index=[])
+        expected = DataFrame({"column": []}, index=[], dtype="float64")
         tm.assert_frame_equal(result, expected)
 
     def test_rolling_on_multi_index_level(self):

From 9d249032c8b60676c3c206786a4b99b1b124af01 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 12 May 2024 09:27:31 -0400
Subject: [PATCH 2/4] cleanup

---
 pandas/tests/groupby/methods/test_quantile.py |  7 ++++++-
 pandas/tests/groupby/test_grouping.py         | 15 +++------------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
index 70816e7fd1da7..a7e5cccab6b55 100644
--- a/pandas/tests/groupby/methods/test_quantile.py
+++ b/pandas/tests/groupby/methods/test_quantile.py
@@ -192,7 +192,12 @@ def test_quantile_missing_group_values_no_segfaults():
         ([1.0, np.nan, 2.0, 2.0], range(4), [1.0, 2.0], [0.0, 2.5]),
         (["a", "b", "b", np.nan], range(4), ["a", "b"], [0, 1.5]),
         ([0], [42], [0], [42.0]),
-        ([], np.array([], dtype="float64"), [], np.array([], dtype="float64")),
+        (
+            np.array([], dtype="float64"),
+            np.array([], dtype="float64"),
+            np.array([], dtype="float64"),
+            np.array([], dtype="float64"),
+        ),
     ],
 )
 def test_quantile_missing_group_values_correct_results(
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 3a62bb48ba5b7..a6416aea98591 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -737,18 +737,9 @@ def test_list_grouper_with_nat(self):
     @pytest.mark.parametrize(
         "func,expected",
         [
-            (
-                "transform",
-                Series(name=2),
-            ),
-            (
-                "agg",
-                Series(name=2, index=Index([], name=1)),
-            ),
-            (
-                "apply",
-                Series(name=2, index=Index([], name=1)),
-            ),
+            ("transform", Series(name=2)),
+            ("agg", Series(name=2, index=Index([], name=1))),
+            ("apply", Series(name=2, index=Index([], name=1))),
         ],
     )
     def test_evaluate_with_empty_groups(self, func, expected):

From cf562f0d808d8e733f72624cb216cae390855292 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 15 May 2024 13:16:37 -0400
Subject: [PATCH 3/4] WIP

---
 pandas/core/frame.py                        | 24 +++++++++++++--------
 pandas/tests/frame/methods/test_quantile.py |  9 ++++----
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index c51c7f8a5e0fe..31f5f1c356fd8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -13045,6 +13045,7 @@ def quantile(
         C        1 days 12:00:00
         Name: 0.5, dtype: object
         """
+        from pandas.core.dtypes.common import is_object_dtype
         validate_percentile(q)
         axis = self._get_axis_number(axis)
 
@@ -13064,7 +13065,7 @@ def quantile(
                 # GH#41544 try to get an appropriate dtype
                 dtype = "float64"
                 cdtype = find_common_type(list(self.dtypes))
-                if needs_i8_conversion(cdtype):
+                if needs_i8_conversion(cdtype) or is_object_dtype(cdtype):
                     dtype = cdtype
                 return res.astype(dtype)
             return res
@@ -13083,7 +13084,7 @@ def quantile(
             if axis == 1:
                 # GH#41544 try to get an appropriate dtype
                 cdtype = find_common_type(list(self.dtypes))
-                if needs_i8_conversion(cdtype):
+                if needs_i8_conversion(cdtype) or is_object_dtype(cdtype):
                     dtype = cdtype
 
             res = self._constructor([], index=q, columns=cols, dtype=dtype)
@@ -13094,6 +13095,18 @@ def quantile(
             raise ValueError(
                 f"Invalid method: {method}. Method must be in {valid_method}."
             )
+
+        # handle degenerate case
+        if len(data) == 0:
+            dtype = np.float64
+            if data.ndim == 2:
+                cdtype = find_common_type(list(self.dtypes))
+            else:
+                cdtype = self.dtype
+            if needs_i8_conversion(cdtype) or is_object_dtype(cdtype):
+                dtype = cdtype
+            return self._constructor([], index=q, columns=data.columns, dtype=dtype)
+
         if method == "single":
             res = data._mgr.quantile(qs=q, interpolation=interpolation)
         elif method == "table":
@@ -13103,13 +13116,6 @@ def quantile(
                     f"Invalid interpolation: {interpolation}. "
                     f"Interpolation must be in {valid_interpolation}"
                 )
-            # handle degenerate case
-            if len(data) == 0:
-                if data.ndim == 2:
-                    dtype = find_common_type(list(self.dtypes))
-                else:
-                    dtype = self.dtype
-                return self._constructor([], index=q, columns=data.columns, dtype=dtype)
 
             q_idx = np.quantile(np.arange(len(data)), q, method=interpolation)
 
diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
index 842d2c3a416d5..ff8758a9f9759 100644
--- a/pandas/tests/frame/methods/test_quantile.py
+++ b/pandas/tests/frame/methods/test_quantile.py
@@ -319,7 +319,8 @@ def test_quantile_multi_empty(self, interp_method):
         result = DataFrame({"x": [], "y": []}).quantile(
             [0.1, 0.9], axis=0, interpolation=interpolation, method=method
         )
-        dtype = "float64" if method == "single" else "object"
+        # dtype = "float64" if method == "single" else "object"
+        dtype = "object"
         expected = DataFrame(
             {"x": [np.nan, np.nan], "y": [np.nan, np.nan]},
             index=[0.1, 0.9],
@@ -692,7 +693,7 @@ def test_quantile_empty_no_rows_dt64(self, interp_method):
             0.5, numeric_only=False, interpolation=interpolation, method=method
         )
         exp = exp.astype(object)
-        if interpolation == "nearest":
+        if True or interpolation == "nearest":
             # GH#18463 TODO: would we prefer NaTs here?
             exp = exp.fillna(np.nan)
         tm.assert_series_equal(res, exp)
@@ -911,7 +912,7 @@ def test_empty_datelike(
     @pytest.mark.parametrize(
         "expected_data, expected_index, axis",
         [
-            [[np.nan, np.nan], range(2), 1],
+            [[pd.NaT, pd.NaT], range(2), 1],
             [[], [], 0],
         ],
     )
@@ -926,6 +927,6 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis):
         )
         result = df[["a", "c"]].quantile(0.5, axis=axis, numeric_only=True)
         expected = Series(
-            expected_data, name=0.5, index=Index(expected_index), dtype=np.float64
+            expected_data, name=0.5, index=Index(expected_index)
         )
         tm.assert_series_equal(result, expected)

From 088273bedca291f422cc49de57f6251e641bdbc0 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 2 Jun 2024 11:33:35 -0400
Subject: [PATCH 4/4] Rework quantile

---
 pandas/core/frame.py                        | 25 ++++++++++++++-------
 pandas/tests/frame/methods/test_quantile.py | 14 +++++-------
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 31f5f1c356fd8..1b3b04aae2a5e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -83,6 +83,7 @@
     can_hold_element,
     construct_1d_arraylike_from_scalar,
     construct_2d_arraylike_from_scalar,
+    ensure_dtype_can_hold_na,
     find_common_type,
     infer_dtype_from_scalar,
     invalidate_string_dtypes,
@@ -13046,6 +13047,7 @@ def quantile(
         Name: 0.5, dtype: object
         """
         from pandas.core.dtypes.common import is_object_dtype
+
         validate_percentile(q)
         axis = self._get_axis_number(axis)
 
@@ -13075,6 +13077,10 @@ def quantile(
 
         if axis == 1:
             data = data.T
+            if data.shape[0] == 0:
+                # The transpose has no rows, so the original has no columns, meaning we
+                # have no dtype information. Since this is quantile, default to float64
+                data = data.astype("float64")
 
         if len(data.columns) == 0:
             # GH#23925 _get_numeric_data may have dropped all columns
@@ -13098,14 +13104,17 @@ def quantile(
 
         # handle degenerate case
         if len(data) == 0:
-            dtype = np.float64
-            if data.ndim == 2:
-                cdtype = find_common_type(list(self.dtypes))
-            else:
-                cdtype = self.dtype
-            if needs_i8_conversion(cdtype) or is_object_dtype(cdtype):
-                dtype = cdtype
-            return self._constructor([], index=q, columns=data.columns, dtype=dtype)
+            from pandas import array
+
+            result = self._constructor(
+                {
+                    idx: array(len(q) * [np.nan], dtype=ensure_dtype_can_hold_na(dtype))
+                    for idx, dtype in enumerate(data.dtypes)
+                },
+                index=q,
+            )
+            result.columns = data.columns
+            return result
 
         if method == "single":
             res = data._mgr.quantile(qs=q, interpolation=interpolation)
diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
index ff8758a9f9759..6347a770f06f0 100644
--- a/pandas/tests/frame/methods/test_quantile.py
+++ b/pandas/tests/frame/methods/test_quantile.py
@@ -319,12 +319,10 @@ def test_quantile_multi_empty(self, interp_method):
         result = DataFrame({"x": [], "y": []}).quantile(
             [0.1, 0.9], axis=0, interpolation=interpolation, method=method
         )
-        # dtype = "float64" if method == "single" else "object"
-        dtype = "object"
         expected = DataFrame(
             {"x": [np.nan, np.nan], "y": [np.nan, np.nan]},
             index=[0.1, 0.9],
-            dtype=dtype,
+            dtype="object",
         )
         tm.assert_frame_equal(result, expected)
 
@@ -692,10 +690,8 @@ def test_quantile_empty_no_rows_dt64(self, interp_method):
         res = df.quantile(
             0.5, numeric_only=False, interpolation=interpolation, method=method
         )
-        exp = exp.astype(object)
-        if True or interpolation == "nearest":
-            # GH#18463 TODO: would we prefer NaTs here?
-            exp = exp.fillna(np.nan)
+        # GH#18463 TODO: would we prefer NaTs here?
+        exp = exp.astype(object).fillna(pd.NaT)
         tm.assert_series_equal(res, exp)
 
         # both dt64tz
@@ -912,7 +908,7 @@ def test_empty_datelike(
     @pytest.mark.parametrize(
         "expected_data, expected_index, axis",
         [
-            [[pd.NaT, pd.NaT], range(2), 1],
+            [[np.nan, np.nan], range(2), 1],
             [[], [], 0],
         ],
     )
@@ -927,6 +923,6 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis):
         )
         result = df[["a", "c"]].quantile(0.5, axis=axis, numeric_only=True)
         expected = Series(
-            expected_data, name=0.5, index=Index(expected_index)
+            expected_data, name=0.5, index=Index(expected_index), dtype=np.float64
         )
         tm.assert_series_equal(result, expected)