diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index a28e20a636ce2..5bb87b8bb2663 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -28,26 +28,6 @@ def time_frame_get_numeric_data(self): self.df._get_numeric_data() -class Lookup: - def setup(self): - self.df = DataFrame(np.random.randn(10000, 8), columns=list("abcdefgh")) - self.df["foo"] = "bar" - self.row_labels = list(self.df.index[::10])[:900] - self.col_labels = list(self.df.columns) * 100 - self.row_labels_all = np.array( - list(self.df.index) * len(self.df.columns), dtype="object" - ) - self.col_labels_all = np.array( - list(self.df.columns) * len(self.df.index), dtype="object" - ) - - def time_frame_fancy_lookup(self): - self.df.lookup(self.row_labels, self.col_labels) - - def time_frame_fancy_lookup_all(self): - self.df.lookup(self.row_labels_all, self.col_labels_all) - - class Reindex: def setup(self): N = 10**3 diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 2309347ac96d8..d9fb3c8a8ff89 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -22,26 +22,6 @@ from pandas import ordered_merge as merge_ordered -class Append: - def setup(self): - self.df1 = DataFrame(np.random.randn(10000, 4), columns=["A", "B", "C", "D"]) - self.df2 = self.df1.copy() - self.df2.index = np.arange(10000, 20000) - self.mdf1 = self.df1.copy() - self.mdf1["obj1"] = "bar" - self.mdf1["obj2"] = "bar" - self.mdf1["int1"] = 5 - self.mdf1 = self.mdf1._consolidate() - self.mdf2 = self.mdf1.copy() - self.mdf2.index = self.df2.index - - def time_append_homogenous(self): - self.df1.append(self.df2) - - def time_append_mixed(self): - self.mdf1.append(self.mdf2) - - class Concat: params = [0, 1] diff --git a/doc/redirects.csv b/doc/redirects.csv index f0fab09196f26..b177be0c5c321 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -315,7 +315,6 @@ generated/pandas.DataFrame.aggregate,../reference/api/pandas.DataFrame.aggregate generated/pandas.DataFrame.align,../reference/api/pandas.DataFrame.align generated/pandas.DataFrame.all,../reference/api/pandas.DataFrame.all generated/pandas.DataFrame.any,../reference/api/pandas.DataFrame.any -generated/pandas.DataFrame.append,../reference/api/pandas.DataFrame.append generated/pandas.DataFrame.apply,../reference/api/pandas.DataFrame.apply generated/pandas.DataFrame.applymap,../reference/api/pandas.DataFrame.applymap generated/pandas.DataFrame.as_blocks,../reference/api/pandas.DataFrame.as_blocks @@ -408,7 +407,6 @@ generated/pandas.DataFrame.last,../reference/api/pandas.DataFrame.last generated/pandas.DataFrame.last_valid_index,../reference/api/pandas.DataFrame.last_valid_index generated/pandas.DataFrame.le,../reference/api/pandas.DataFrame.le generated/pandas.DataFrame.loc,../reference/api/pandas.DataFrame.loc -generated/pandas.DataFrame.lookup,../reference/api/pandas.DataFrame.lookup generated/pandas.DataFrame.lt,../reference/api/pandas.DataFrame.lt generated/pandas.DataFrame.mask,../reference/api/pandas.DataFrame.mask generated/pandas.DataFrame.max,../reference/api/pandas.DataFrame.max @@ -917,7 +915,6 @@ generated/pandas.Series.aggregate,../reference/api/pandas.Series.aggregate generated/pandas.Series.align,../reference/api/pandas.Series.align generated/pandas.Series.all,../reference/api/pandas.Series.all generated/pandas.Series.any,../reference/api/pandas.Series.any -generated/pandas.Series.append,../reference/api/pandas.Series.append generated/pandas.Series.apply,../reference/api/pandas.Series.apply generated/pandas.Series.argmax,../reference/api/pandas.Series.argmax generated/pandas.Series.argmin,../reference/api/pandas.Series.argmin diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 6c0b6a4752875..ea19bb6d85aed 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -66,7 +66,6 @@ Indexing, iteration DataFrame.keys DataFrame.iterrows DataFrame.itertuples - DataFrame.lookup DataFrame.pop DataFrame.tail DataFrame.xs @@ -250,7 +249,6 @@ Combining / comparing / joining / merging .. autosummary:: :toctree: api/ - DataFrame.append DataFrame.assign DataFrame.compare DataFrame.join diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index c8bbe922f5313..659385c611ff0 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -247,7 +247,6 @@ Combining / comparing / joining / merging .. autosummary:: :toctree: api/ - Series.append Series.compare Series.update diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index dbd6d2757e1be..6566a1d67d1c9 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1559,7 +1559,7 @@ For instance: df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx] Formerly this could be achieved with the dedicated ``DataFrame.lookup`` method -which was deprecated in version 1.2.0. +which was deprecated in version 1.2.0 and removed in version 2.0.0. .. _indexing.class: diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c6b5816d12061..0f2dfd3d4c8a1 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -265,6 +265,9 @@ Removal of prior version deprecations/changes - Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`) - Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`) - Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`) +- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`) +- Removed deprecated :meth:`DataFrame.lookup` (:issue:`35224`) +- Removed deprecated :meth:`Series.append`, :meth:`DataFrame.append`, use :func:`concat` instead (:issue:`35407`) - Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems` and :meth:`HDFStore.iteritems` use ``obj.items`` instead (:issue:`45321`) - Removed deprecated :meth:`DatetimeIndex.union_many` (:issue:`45018`) - Removed deprecated ``weekofyear`` and ``week`` attributes of :class:`DatetimeArray`, :class:`DatetimeIndex` and ``dt`` accessor in favor of ``isocalendar().week`` (:issue:`33595`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2965baf837419..ec5ea5b9b19d5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4870,69 +4870,6 @@ def _series(self): for idx, item in enumerate(self.columns) } - def lookup( - self, row_labels: Sequence[IndexLabel], col_labels: Sequence[IndexLabel] - ) -> np.ndarray: - """ - Label-based "fancy indexing" function for DataFrame. - - .. deprecated:: 1.2.0 - DataFrame.lookup is deprecated, - use pandas.factorize and NumPy indexing instead. - For further details see - :ref:`Looking up values by index/column labels `. - - Given equal-length arrays of row and column labels, return an - array of the values corresponding to each (row, col) pair. - - Parameters - ---------- - row_labels : sequence - The row labels to use for lookup. - col_labels : sequence - The column labels to use for lookup. - - Returns - ------- - numpy.ndarray - The found values. - """ - msg = ( - "The 'lookup' method is deprecated and will be " - "removed in a future version. " - "You can use DataFrame.melt and DataFrame.loc " - "as a substitute." - ) - warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) - - n = len(row_labels) - if n != len(col_labels): - raise ValueError("Row labels must have same size as column labels") - if not (self.index.is_unique and self.columns.is_unique): - # GH#33041 - raise ValueError("DataFrame.lookup requires unique index and columns") - - thresh = 1000 - if not self._is_mixed_type or n > thresh: - values = self.values - ridx = self.index.get_indexer(row_labels) - cidx = self.columns.get_indexer(col_labels) - if (ridx == -1).any(): - raise KeyError("One or more row labels was not found") - if (cidx == -1).any(): - raise KeyError("One or more column labels was not found") - flat_index = ridx * len(self.columns) + cidx - result = values.flat[flat_index] - else: - result = np.empty(n, dtype="O") - for i, (r, c) in enumerate(zip(row_labels, col_labels)): - result[i] = self._get_value(r, c) - - if is_object_dtype(result): - result = lib.maybe_convert_objects(result) - - return result - # ---------------------------------------------------------------------- # Reindexing and alignment @@ -9562,118 +9499,6 @@ def infer(x): # ---------------------------------------------------------------------- # Merging / joining methods - def append( - self, - other, - ignore_index: bool = False, - verify_integrity: bool = False, - sort: bool = False, - ) -> DataFrame: - """ - Append rows of `other` to the end of caller, returning a new object. - - .. deprecated:: 1.4.0 - Use :func:`concat` instead. For further details see - :ref:`whatsnew_140.deprecations.frame_series_append` - - Columns in `other` that are not in the caller are added as new columns. - - Parameters - ---------- - other : DataFrame or Series/dict-like object, or list of these - The data to append. - ignore_index : bool, default False - If True, the resulting axis will be labeled 0, 1, …, n - 1. - verify_integrity : bool, default False - If True, raise ValueError on creating index with duplicates. - sort : bool, default False - Sort columns if the columns of `self` and `other` are not aligned. - - .. versionchanged:: 1.0.0 - - Changed to not sort by default. - - Returns - ------- - DataFrame - A new DataFrame consisting of the rows of caller and the rows of `other`. - - See Also - -------- - concat : General function to concatenate DataFrame or Series objects. - - Notes - ----- - If a list of dict/series is passed and the keys are all contained in - the DataFrame's index, the order of the columns in the resulting - DataFrame will be unchanged. - - Iteratively appending rows to a DataFrame can be more computationally - intensive than a single concatenate. A better solution is to append - those rows to a list and then concatenate the list with the original - DataFrame all at once. - - Examples - -------- - >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'), index=['x', 'y']) - >>> df - A B - x 1 2 - y 3 4 - >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'), index=['x', 'y']) - >>> df.append(df2) - A B - x 1 2 - y 3 4 - x 5 6 - y 7 8 - - With `ignore_index` set to True: - - >>> df.append(df2, ignore_index=True) - A B - 0 1 2 - 1 3 4 - 2 5 6 - 3 7 8 - - The following, while not recommended methods for generating DataFrames, - show two ways to generate a DataFrame from multiple data sources. - - Less efficient: - - >>> df = pd.DataFrame(columns=['A']) - >>> for i in range(5): - ... df = df.append({'A': i}, ignore_index=True) - >>> df - A - 0 0 - 1 1 - 2 2 - 3 3 - 4 4 - - More efficient: - - >>> pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)], - ... ignore_index=True) - A - 0 0 - 1 1 - 2 2 - 3 3 - 4 4 - """ - warnings.warn( - "The frame.append method is deprecated " - "and will be removed from pandas in a future version. " - "Use pandas.concat instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - return self._append(other, ignore_index, verify_integrity, sort) - def _append( self, other, diff --git a/pandas/core/series.py b/pandas/core/series.py index bba225bb91caf..8e2234a9e6f88 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2976,92 +2976,6 @@ def searchsorted( # type: ignore[override] # ------------------------------------------------------------------- # Combination - def append( - self, to_append, ignore_index: bool = False, verify_integrity: bool = False - ) -> Series: - """ - Concatenate two or more Series. - - .. deprecated:: 1.4.0 - Use :func:`concat` instead. For further details see - :ref:`whatsnew_140.deprecations.frame_series_append` - - Parameters - ---------- - to_append : Series or list/tuple of Series - Series to append with self. - ignore_index : bool, default False - If True, the resulting axis will be labeled 0, 1, …, n - 1. - verify_integrity : bool, default False - If True, raise Exception on creating index with duplicates. - - Returns - ------- - Series - Concatenated Series. - - See Also - -------- - concat : General function to concatenate DataFrame or Series objects. - - Notes - ----- - Iteratively appending to a Series can be more computationally intensive - than a single concatenate. A better solution is to append values to a - list and then concatenate the list with the original Series all at - once. - - Examples - -------- - >>> s1 = pd.Series([1, 2, 3]) - >>> s2 = pd.Series([4, 5, 6]) - >>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5]) - >>> s1.append(s2) - 0 1 - 1 2 - 2 3 - 0 4 - 1 5 - 2 6 - dtype: int64 - - >>> s1.append(s3) - 0 1 - 1 2 - 2 3 - 3 4 - 4 5 - 5 6 - dtype: int64 - - With `ignore_index` set to True: - - >>> s1.append(s2, ignore_index=True) - 0 1 - 1 2 - 2 3 - 3 4 - 4 5 - 5 6 - dtype: int64 - - With `verify_integrity` set to True: - - >>> s1.append(s2, verify_integrity=True) - Traceback (most recent call last): - ... - ValueError: Indexes have overlapping values: [0, 1, 2] - """ - warnings.warn( - "The series.append method is deprecated " - "and will be removed from pandas in a future version. " - "Use pandas.concat instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - return self._append(to_append, ignore_index, verify_integrity) - def _append( self, to_append, ignore_index: bool = False, verify_integrity: bool = False ): diff --git a/pandas/tests/frame/indexing/test_lookup.py b/pandas/tests/frame/indexing/test_lookup.py deleted file mode 100644 index caab5feea853b..0000000000000 --- a/pandas/tests/frame/indexing/test_lookup.py +++ /dev/null @@ -1,94 +0,0 @@ -import numpy as np -import pytest - -from pandas import ( - DataFrame, - Series, -) -import pandas._testing as tm - - -class TestLookup: - def test_lookup_float(self, float_frame): - df = float_frame - rows = list(df.index) * len(df.columns) - cols = list(df.columns) * len(df.index) - with tm.assert_produces_warning(FutureWarning): - result = df.lookup(rows, cols) - - expected = np.array([df.loc[r, c] for r, c in zip(rows, cols)]) - tm.assert_numpy_array_equal(result, expected) - - def test_lookup_mixed(self, float_string_frame): - df = float_string_frame - rows = list(df.index) * len(df.columns) - cols = list(df.columns) * len(df.index) - with tm.assert_produces_warning(FutureWarning): - result = df.lookup(rows, cols) - - expected = np.array( - [df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_ - ) - tm.assert_almost_equal(result, expected) - - def test_lookup_bool(self): - df = DataFrame( - { - "label": ["a", "b", "a", "c"], - "mask_a": [True, True, False, True], - "mask_b": [True, False, False, False], - "mask_c": [False, True, False, True], - } - ) - with tm.assert_produces_warning(FutureWarning): - df["mask"] = df.lookup(df.index, "mask_" + df["label"]) - - exp_mask = np.array( - [df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])] - ) - - tm.assert_series_equal(df["mask"], Series(exp_mask, name="mask")) - assert df["mask"].dtype == np.bool_ - - def test_lookup_raises(self, float_frame): - with pytest.raises(KeyError, match="'One or more row labels was not found'"): - with tm.assert_produces_warning(FutureWarning): - float_frame.lookup(["xyz"], ["A"]) - - with pytest.raises(KeyError, match="'One or more column labels was not found'"): - with tm.assert_produces_warning(FutureWarning): - float_frame.lookup([float_frame.index[0]], ["xyz"]) - - with pytest.raises(ValueError, match="same size"): - with tm.assert_produces_warning(FutureWarning): - float_frame.lookup(["a", "b", "c"], ["a"]) - - def test_lookup_requires_unique_axes(self): - # GH#33041 raise with a helpful error message - df = DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "A"]) - - rows = [0, 1] - cols = ["A", "A"] - - # homogeneous-dtype case - with pytest.raises(ValueError, match="requires unique index and columns"): - with tm.assert_produces_warning(FutureWarning): - df.lookup(rows, cols) - with pytest.raises(ValueError, match="requires unique index and columns"): - with tm.assert_produces_warning(FutureWarning): - df.T.lookup(cols, rows) - - # heterogeneous dtype - df["B"] = 0 - with pytest.raises(ValueError, match="requires unique index and columns"): - with tm.assert_produces_warning(FutureWarning): - df.lookup(rows, cols) - - -def test_lookup_deprecated(): - # GH#18262 - df = DataFrame( - {"col": ["A", "A", "B", "B"], "A": [80, 23, np.nan, 22], "B": [80, 55, 76, 67]} - ) - with tm.assert_produces_warning(FutureWarning): - df.lookup(df.index, df["col"]) diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py deleted file mode 100644 index 54d7d95ed4570..0000000000000 --- a/pandas/tests/frame/methods/test_append.py +++ /dev/null @@ -1,287 +0,0 @@ -import numpy as np -import pytest - -import pandas as pd -from pandas import ( - DataFrame, - Series, - Timestamp, - date_range, - timedelta_range, -) -import pandas._testing as tm - - -class TestDataFrameAppend: - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") - def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series): - obj = multiindex_dataframe_random_data - obj = tm.get_obj(obj, frame_or_series) - - a = obj[:5] - b = obj[5:] - - result = a.append(b) - tm.assert_equal(result, obj) - - def test_append_empty_list(self): - # GH 28769 - df = DataFrame() - result = df._append([]) - expected = df - tm.assert_frame_equal(result, expected) - assert result is not df - - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - result = df._append([]) - expected = df - tm.assert_frame_equal(result, expected) - assert result is not df # ._append() should return a new object - - def test_append_series_dict(self): - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - - series = df.loc[4] - msg = "Indexes have overlapping values" - with pytest.raises(ValueError, match=msg): - df._append(series, verify_integrity=True) - - series.name = None - msg = "Can only append a Series if ignore_index=True" - with pytest.raises(TypeError, match=msg): - df._append(series, verify_integrity=True) - - result = df._append(series[::-1], ignore_index=True) - expected = df._append( - DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True - ) - tm.assert_frame_equal(result, expected) - - # dict - result = df._append(series.to_dict(), ignore_index=True) - tm.assert_frame_equal(result, expected) - - result = df._append(series[::-1][:3], ignore_index=True) - expected = df._append( - DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True - ) - tm.assert_frame_equal(result, expected.loc[:, result.columns]) - - msg = "Can only append a dict if ignore_index=True" - with pytest.raises(TypeError, match=msg): - df._append(series.to_dict()) - - # can append when name set - row = df.loc[4] - row.name = 5 - result = df._append(row) - expected = df._append(df[-1:], ignore_index=True) - tm.assert_frame_equal(result, expected) - - def test_append_list_of_series_dicts(self): - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - - dicts = [x.to_dict() for idx, x in df.iterrows()] - - result = df._append(dicts, ignore_index=True) - expected = df._append(df, ignore_index=True) - tm.assert_frame_equal(result, expected) - - # different columns - dicts = [ - {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, - {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}, - ] - result = df._append(dicts, ignore_index=True, sort=True) - expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) - tm.assert_frame_equal(result, expected) - - def test_append_list_retain_index_name(self): - df = DataFrame( - [[1, 2], [3, 4]], index=pd.Index(["a", "b"], name="keepthisname") - ) - - serc = Series([5, 6], name="c") - - expected = DataFrame( - [[1, 2], [3, 4], [5, 6]], - index=pd.Index(["a", "b", "c"], name="keepthisname"), - ) - - # append series - result = df._append(serc) - tm.assert_frame_equal(result, expected) - - # append list of series - result = df._append([serc]) - tm.assert_frame_equal(result, expected) - - def test_append_missing_cols(self): - # GH22252 - # exercise the conditional branch in append method where the data - # to be appended is a list and does not contain all columns that are in - # the target DataFrame - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - - dicts = [{"foo": 9}, {"bar": 10}] - result = df._append(dicts, ignore_index=True, sort=True) - - expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) - tm.assert_frame_equal(result, expected) - - def test_append_empty_dataframe(self): - - # Empty df append empty df - df1 = DataFrame() - df2 = DataFrame() - result = df1._append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - # Non-empty df append empty df - df1 = DataFrame(np.random.randn(5, 2)) - df2 = DataFrame() - result = df1._append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - # Empty df with columns append empty df - df1 = DataFrame(columns=["bar", "foo"]) - df2 = DataFrame() - result = df1._append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - # Non-Empty df with columns append empty df - df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) - df2 = DataFrame() - result = df1._append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - def test_append_dtypes(self, using_array_manager): - - # GH 5754 - # row appends of different dtypes (so need to do by-item) - # can sometimes infer the correct type - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) - df2 = DataFrame() - result = df1._append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) - result = df1._append(df2) - expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) - result = df1._append(df2) - expected = DataFrame( - {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} - ) - if using_array_manager: - # TODO(ArrayManager) decide on exact casting rules in concat - # With ArrayManager, all-NaN float is not ignored - expected = expected.astype(object) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) - result = df1._append(df2) - expected = DataFrame( - {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} - ) - if using_array_manager: - # With ArrayManager, all-NaN float is not ignored - expected = expected.astype(object) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": np.nan}, index=range(1)) - df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) - result = df1._append(df2) - expected = DataFrame( - {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")} - ) - if using_array_manager: - # With ArrayManager, all-NaN float is not ignored - expected = expected.astype(object) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) - result = df1._append(df2) - expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"] - ) - def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): - # GH 30238 - tz = tz_naive_fixture - df = DataFrame([Timestamp(timestamp, tz=tz)]) - result = df._append(df.iloc[0]).iloc[-1] - expected = Series(Timestamp(timestamp, tz=tz), name=0) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "data, dtype", - [ - ([1], pd.Int64Dtype()), - ([1], pd.CategoricalDtype()), - ([pd.Interval(left=0, right=5)], pd.IntervalDtype()), - ([pd.Period("2000-03", freq="M")], pd.PeriodDtype("M")), - ([1], pd.SparseDtype()), - ], - ) - def test_other_dtypes(self, data, dtype, using_array_manager): - df = DataFrame(data, dtype=dtype) - - result = df._append(df.iloc[0]).iloc[-1] - - expected = Series(data, name=0, dtype=dtype) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) - def test_append_numpy_bug_1681(self, dtype): - # another datetime64 bug - if dtype == "datetime64[ns]": - index = date_range("2011/1/1", "2012/1/1", freq="W-FRI") - else: - index = timedelta_range("1 days", "10 days", freq="2D") - - df = DataFrame() - other = DataFrame({"A": "foo", "B": index}, index=index) - - result = df._append(other) - assert (result["B"] == index).all() - - @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning") - def test_multiindex_column_append_multiple(self): - # GH 29699 - df = DataFrame( - [[1, 11], [2, 12], [3, 13]], - columns=pd.MultiIndex.from_tuples( - [("multi", "col1"), ("multi", "col2")], names=["level1", None] - ), - ) - df2 = df.copy() - for i in range(1, 10): - df[i, "colA"] = 10 - df = df._append(df2, ignore_index=True) - result = df["multi"] - expected = DataFrame( - {"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)} - ) - tm.assert_frame_equal(result, expected) - - def test_append_raises_future_warning(self): - # GH#35407 - df1 = DataFrame([[1, 2], [3, 4]]) - df2 = DataFrame([[5, 6], [7, 8]]) - with tm.assert_produces_warning(FutureWarning): - df1.append(df2) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 7634f783117d6..689caffe98a2d 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -161,26 +161,6 @@ pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("applymap", lambda x: x)) ), - pytest.param( - ( - pd.DataFrame, - frame_data, - operator.methodcaller("append", pd.DataFrame({"A": [1]})), - ), - marks=pytest.mark.filterwarnings( - "ignore:.*append method is deprecated.*:FutureWarning" - ), - ), - pytest.param( - ( - pd.DataFrame, - frame_data, - operator.methodcaller("append", pd.DataFrame({"B": [1]})), - ), - marks=pytest.mark.filterwarnings( - "ignore:.*append method is deprecated.*:FutureWarning" - ), - ), pytest.param( ( pd.DataFrame, diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index fa9cf5215c0f7..eda27787afe1c 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -84,7 +84,6 @@ def test_getitem_setitem_ellipsis(): assert (result == 5).all() -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize( "result_1, duplicate_item, expected_1", [ @@ -102,8 +101,8 @@ def test_getitem_setitem_ellipsis(): ) def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1): # GH 17610 - result = result_1.append(duplicate_item) - expected = expected_1.append(duplicate_item) + result = result_1._append(duplicate_item) + expected = expected_1._append(duplicate_item) tm.assert_series_equal(result[1], expected) assert result[2] == result_1[2] diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py deleted file mode 100644 index 6f8852ade6408..0000000000000 --- a/pandas/tests/series/methods/test_append.py +++ /dev/null @@ -1,271 +0,0 @@ -import numpy as np -import pytest - -import pandas as pd -from pandas import ( - DataFrame, - DatetimeIndex, - Index, - Series, - Timestamp, - date_range, -) -import pandas._testing as tm - - -class TestSeriesAppend: - def test_append_preserve_name(self, datetime_series): - result = datetime_series[:5]._append(datetime_series[5:]) - assert result.name == datetime_series.name - - def test_append(self, datetime_series, string_series, object_series): - appended_series = string_series._append(object_series) - for idx, value in appended_series.items(): - if idx in string_series.index: - assert value == string_series[idx] - elif idx in object_series.index: - assert value == object_series[idx] - else: - raise AssertionError("orphaned index!") - - msg = "Indexes have overlapping values:" - with pytest.raises(ValueError, match=msg): - datetime_series._append(datetime_series, verify_integrity=True) - - def test_append_many(self, datetime_series): - pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]] - - result = pieces[0]._append(pieces[1:]) - tm.assert_series_equal(result, datetime_series) - - def test_append_duplicates(self): - # GH 13677 - s1 = Series([1, 2, 3]) - s2 = Series([4, 5, 6]) - exp = Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2]) - tm.assert_series_equal(s1._append(s2), exp) - tm.assert_series_equal(pd.concat([s1, s2]), exp) - - # the result must have RangeIndex - exp = Series([1, 2, 3, 4, 5, 6]) - tm.assert_series_equal( - s1._append(s2, ignore_index=True), exp, check_index_type=True - ) - tm.assert_series_equal( - pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True - ) - - msg = "Indexes have overlapping values:" - with pytest.raises(ValueError, match=msg): - s1._append(s2, verify_integrity=True) - with pytest.raises(ValueError, match=msg): - pd.concat([s1, s2], verify_integrity=True) - - def test_append_tuples(self): - # GH 28410 - s = Series([1, 2, 3]) - list_input = [s, s] - tuple_input = (s, s) - - expected = s._append(list_input) - result = s._append(tuple_input) - - tm.assert_series_equal(expected, result) - - def test_append_dataframe_raises(self): - # GH 31413 - df = DataFrame({"A": [1, 2], "B": [3, 4]}) - - msg = "to_append should be a Series or list/tuple of Series, got DataFrame" - with pytest.raises(TypeError, match=msg): - df.A._append(df) - with pytest.raises(TypeError, match=msg): - df.A._append([df]) - - def test_append_raises_future_warning(self): - # GH#35407 - with tm.assert_produces_warning(FutureWarning): - Series([1, 2]).append(Series([3, 4])) - - -class TestSeriesAppendWithDatetimeIndex: - def test_append(self): - rng = date_range("5/8/2012 1:45", periods=10, freq="5T") - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - - result = ts._append(ts) - result_df = df._append(df) - ex_index = DatetimeIndex(np.tile(rng.values, 2)) - tm.assert_index_equal(result.index, ex_index) - tm.assert_index_equal(result_df.index, ex_index) - - appended = rng.append(rng) - tm.assert_index_equal(appended, ex_index) - - appended = rng.append([rng, rng]) - ex_index = DatetimeIndex(np.tile(rng.values, 3)) - tm.assert_index_equal(appended, ex_index) - - # different index names - rng1 = rng.copy() - rng2 = rng.copy() - rng1.name = "foo" - rng2.name = "bar" - - assert rng1.append(rng1).name == "foo" - assert rng1.append(rng2).name is None - - def test_append_tz(self): - # see gh-2938 - rng = date_range("5/8/2012 1:45", periods=10, freq="5T", tz="US/Eastern") - rng2 = date_range("5/8/2012 2:35", periods=10, freq="5T", tz="US/Eastern") - rng3 = date_range("5/8/2012 1:45", periods=20, freq="5T", tz="US/Eastern") - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts._append(ts2) - result_df = df._append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_tz_explicit_pytz(self): - # see gh-2938 - from pytz import timezone as timezone - - rng = date_range( - "5/8/2012 1:45", periods=10, freq="5T", tz=timezone("US/Eastern") - ) - rng2 = date_range( - "5/8/2012 2:35", periods=10, freq="5T", tz=timezone("US/Eastern") - ) - rng3 = date_range( - "5/8/2012 1:45", periods=20, freq="5T", tz=timezone("US/Eastern") - ) - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts._append(ts2) - result_df = df._append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_tz_dateutil(self): - # see gh-2938 - rng = date_range( - "5/8/2012 1:45", periods=10, freq="5T", tz="dateutil/US/Eastern" - ) - rng2 = date_range( - "5/8/2012 2:35", periods=10, freq="5T", tz="dateutil/US/Eastern" - ) - rng3 = date_range( - "5/8/2012 1:45", periods=20, freq="5T", tz="dateutil/US/Eastern" - ) - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts._append(ts2) - result_df = df._append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_series_append_aware(self): - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1._append(ser2) - - exp_index = DatetimeIndex( - ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern", freq="H" - ) - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - assert ts_result.index.tz == rng1.tz - - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1._append(ser2) - - exp_index = DatetimeIndex( - ["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC", freq="H" - ) - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - utc = rng1.tz - assert utc == ts_result.index.tz - - # GH#7795 - # different tz coerces to object dtype, not UTC - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1._append(ser2) - exp_index = Index( - [ - Timestamp("1/1/2011 01:00", tz="US/Eastern"), - Timestamp("1/1/2011 02:00", tz="US/Central"), - ] - ) - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - - def test_series_append_aware_naive(self): - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") - ser1 = Series(np.random.randn(len(rng1)), index=rng1) - ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1._append(ser2) - - expected = ser1.index.astype(object).append(ser2.index.astype(object)) - assert ts_result.index.equals(expected) - - # mixed - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") - rng2 = range(100) - ser1 = Series(np.random.randn(len(rng1)), index=rng1) - ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1._append(ser2) - - expected = ser1.index.astype(object).append(ser2.index) - assert ts_result.index.equals(expected) - - def test_series_append_dst(self): - rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") - rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") - ser1 = Series([1, 2, 3], index=rng1) - ser2 = Series([10, 11, 12], index=rng2) - ts_result = ser1._append(ser2) - - exp_index = DatetimeIndex( - [ - "2016-01-01 01:00", - "2016-01-01 02:00", - "2016-01-01 03:00", - "2016-08-01 01:00", - "2016-08-01 02:00", - "2016-08-01 03:00", - ], - tz="US/Eastern", - ) - exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) - tm.assert_series_equal(ts_result, exp) - assert ts_result.index.tz == rng1.tz