diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7263329d2e53b..4e020d8b7bcdc 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -144,6 +144,7 @@ Other API changes ^^^^^^^^^^^^^^^^^ - 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`) - :attr:`MultiIndex.codes`, :attr:`MultiIndex.levels`, and :attr:`MultiIndex.names` now returns a ``tuple`` instead of a ``FrozenList`` (:issue:`53531`) +- :func:`concat` no longer silently removes ``None`` from the input (:issue:`57846`) - :func:`read_table`'s ``parse_dates`` argument defaults to ``None`` to improve consistency with :func:`read_csv` (:issue:`57476`) - Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`) - Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 1f0fe0542a0c0..f0d4e2a71d003 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -173,8 +173,7 @@ def concat( objs : an iterable or mapping of Series or DataFrame objects If a mapping is passed, the sorted keys will be used as the `keys` argument, unless it is passed, in which case the values will be - selected (see below). Any None objects will be dropped silently unless - they are all None in which case a ValueError will be raised. + selected (see below). axis : {0/'index', 1/'columns'}, default 0 The axis to concatenate along. join : {'inner', 'outer'}, default 'outer' @@ -491,8 +490,6 @@ def _clean_keys_and_objs( objs_list = list(com.not_none(*objs_list)) else: # GH#1649 - key_indices = [] - clean_objs = [] if is_iterator(keys): keys = list(keys) if len(keys) != len(objs_list): @@ -501,18 +498,10 @@ def _clean_keys_and_objs( f"The length of the keys ({len(keys)}) must match " f"the length of the objects to concatenate ({len(objs_list)})" ) - for i, obj in enumerate(objs_list): - if obj is not None: - key_indices.append(i) - clean_objs.append(obj) - objs_list = clean_objs if not isinstance(keys, Index): keys = Index(keys) - if len(key_indices) < len(keys): - keys = keys.take(key_indices) - if len(objs_list) == 0: raise ValueError("All objects passed were None") diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 9bd2c22788fac..a04bb067c1403 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -843,29 +843,6 @@ def test_func(x): tm.assert_frame_equal(result, expected) -def test_groupby_apply_none_first(): - # GH 12824. Tests if apply returns None first. - test_df1 = DataFrame({"groups": [1, 1, 1, 2], "vars": [0, 1, 2, 3]}) - test_df2 = DataFrame({"groups": [1, 2, 2, 2], "vars": [0, 1, 2, 3]}) - - def test_func(x): - if x.shape[0] < 2: - return None - return x.iloc[[0, -1]] - - msg = "DataFrameGroupBy.apply operated on the grouping columns" - with tm.assert_produces_warning(DeprecationWarning, match=msg): - result1 = test_df1.groupby("groups").apply(test_func) - with tm.assert_produces_warning(DeprecationWarning, match=msg): - result2 = test_df2.groupby("groups").apply(test_func) - index1 = MultiIndex.from_arrays([[1, 1], [0, 2]], names=["groups", None]) - index2 = MultiIndex.from_arrays([[2, 2], [1, 3]], names=["groups", None]) - expected1 = DataFrame({"groups": [1, 1], "vars": [0, 2]}, index=index1) - expected2 = DataFrame({"groups": [2, 2], "vars": [1, 3]}, index=index2) - tm.assert_frame_equal(result1, expected1) - tm.assert_frame_equal(result2, expected2) - - def test_groupby_apply_return_empty_chunk(): # GH 22221: apply filter which returns some empty groups df = DataFrame({"value": [0, 1], "group": ["filled", "empty"]}) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index cf11bf237f615..4bc611f38e5fb 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -383,26 +383,20 @@ def test_concat_exclude_none(self): concat([None, None]) def test_concat_keys_with_none(self): - # #1649 + # #57846 df0 = DataFrame([[10, 20, 30], [10, 20, 30], [10, 20, 30]]) - result = concat({"a": None, "b": df0, "c": df0[:2], "d": df0[:1], "e": df0}) - expected = concat({"b": df0, "c": df0[:2], "d": df0[:1], "e": df0}) - tm.assert_frame_equal(result, expected) - - result = concat( - [None, df0, df0[:2], df0[:1], df0], keys=["a", "b", "c", "d", "e"] - ) - expected = concat([df0, df0[:2], df0[:1], df0], keys=["b", "c", "d", "e"]) - tm.assert_frame_equal(result, expected) + with pytest.raises( + TypeError, match="cannot concatenate object of type ''" + ): + concat({"a": None, "b": df0, "c": df0[:2], "d": df0[:1], "e": df0}) @pytest.mark.parametrize("klass", [range, RangeIndex]) - @pytest.mark.parametrize("include_none", [True, False]) - def test_concat_preserves_rangeindex(self, klass, include_none): + def test_concat_preserves_rangeindex(self, klass): df = DataFrame([1, 2]) df2 = DataFrame([3, 4]) - data = [df, None, df2, None] if include_none else [df, df2] - keys_length = 4 if include_none else 2 + data = [df, df2] + keys_length = 2 result = concat(data, keys=klass(keys_length)) expected = DataFrame( [1, 2, 3, 4],