From 704b422f333de60660d9d66cbc1e52b1b62041b6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 18:08:20 -0700 Subject: [PATCH 01/11] remove IX --- doc/source/reference/index.rst | 2 - pandas/core/indexing.py | 102 --- pandas/tests/frame/test_indexing.py | 626 +----------------- pandas/tests/indexing/common.py | 11 +- .../tests/indexing/multiindex/test_slice.py | 4 - .../indexing/test_chaining_and_caching.py | 6 - pandas/tests/indexing/test_floats.py | 3 +- pandas/tests/indexing/test_iloc.py | 67 +- pandas/tests/indexing/test_ix.py | 345 ---------- pandas/tests/indexing/test_loc.py | 6 - pandas/tests/indexing/test_partial.py | 71 +- pandas/tests/test_multilevel.py | 11 - 12 files changed, 14 insertions(+), 1240 deletions(-) delete mode 100644 pandas/tests/indexing/test_ix.py diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst index 12ca318c815d3..9d5649c37e92f 100644 --- a/doc/source/reference/index.rst +++ b/doc/source/reference/index.rst @@ -49,7 +49,6 @@ public functions related to data types in pandas. api/pandas.DataFrame.blocks api/pandas.DataFrame.as_matrix - api/pandas.DataFrame.ix api/pandas.Index.asi8 api/pandas.Index.data api/pandas.Index.flags @@ -60,7 +59,6 @@ public functions related to data types in pandas. api/pandas.Series.asobject api/pandas.Series.blocks api/pandas.Series.from_array - api/pandas.Series.ix api/pandas.Series.imag api/pandas.Series.real diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a1a8619fab892..640a4b2a3f346 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -10,10 +10,8 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( - ensure_platform_int, is_float, is_integer, - is_integer_dtype, is_iterator, is_list_like, is_numeric_dtype, @@ -34,7 +32,6 @@ def get_indexers_list(): return [ - ("ix", _IXIndexer), ("iloc", _iLocIndexer), ("loc", _LocIndexer), ("at", _AtIndexer), @@ -901,9 +898,6 @@ def _getitem_lowerdim(self, tup: Tuple): if len(tup) > self.ndim: raise IndexingError("Too many indexers. handle elsewhere") - # to avoid wasted computation - # df.ix[d1:d2, 0] -> columns first (True) - # df.ix[0, ['C', 'B', A']] -> rows first (False) for i, key in enumerate(tup): if is_label_like(key) or isinstance(key, tuple): section = self._getitem_axis(key, axis=i) @@ -1275,102 +1269,6 @@ def _get_slice_axis(self, slice_obj: slice, axis: int): return self._slice(indexer, axis=axis, kind="iloc") -class _IXIndexer(_NDFrameIndexer): - """ - A primarily label-location based indexer, with integer position - fallback. - - Warning: Starting in 0.20.0, the .ix indexer is deprecated, in - favor of the more strict .iloc and .loc indexers. - - ``.ix[]`` supports mixed integer and label based access. It is - primarily label based, but will fall back to integer positional - access unless the corresponding axis is of integer type. - - ``.ix`` is the most general indexer and will support any of the - inputs in ``.loc`` and ``.iloc``. ``.ix`` also supports floating - point label schemes. ``.ix`` is exceptionally useful when dealing - with mixed positional and label based hierarchical indexes. - - However, when an axis is integer based, ONLY label based access - and not positional access is supported. Thus, in such cases, it's - usually better to be explicit and use ``.iloc`` or ``.loc``. - - See more at :ref:`Advanced Indexing `. - """ - - _ix_deprecation_warning = textwrap.dedent( - """ - .ix is deprecated. Please use - .loc for label based indexing or - .iloc for positional indexing - - See the documentation here: - http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated""" # noqa: E501 - ) - - def __init__(self, name, obj): - warnings.warn(self._ix_deprecation_warning, FutureWarning, stacklevel=2) - super().__init__(name, obj) - - @Appender(_NDFrameIndexer._validate_key.__doc__) - def _validate_key(self, key, axis: int): - if isinstance(key, slice): - return True - - elif com.is_bool_indexer(key): - return True - - elif is_list_like_indexer(key): - return True - - else: - - self._convert_scalar_indexer(key, axis) - - return True - - def _convert_for_reindex(self, key, axis: int): - """ - Transform a list of keys into a new array ready to be used as axis of - the object we return (e.g. including NaNs). - - Parameters - ---------- - key : list-like - Target labels - axis: int - Where the indexing is being made - - Returns - ------- - list-like of labels - """ - labels = self.obj._get_axis(axis) - - if com.is_bool_indexer(key): - key = check_bool_indexer(labels, key) - return labels[key] - - if isinstance(key, Index): - keyarr = labels._convert_index_indexer(key) - else: - # asarray can be unsafe, NumPy strings are weird - keyarr = com.asarray_tuplesafe(key) - - if is_integer_dtype(keyarr): - # Cast the indexer to uint64 if possible so - # that the values returned from indexing are - # also uint64. - keyarr = labels._convert_arr_indexer(keyarr) - - if not labels.is_integer(): - keyarr = ensure_platform_int(keyarr) - return labels.take(keyarr) - - return keyarr - - class _LocationIndexer(_NDFrameIndexer): _exception = Exception diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 814a99701b703..fdd04d0b19946 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1,6 +1,5 @@ from datetime import date, datetime, time, timedelta import re -from warnings import catch_warnings, simplefilter import numpy as np import pytest @@ -402,12 +401,6 @@ def test_getitem_ix_mixed_integer(self): expected = df.loc[df.index[:-1]] assert_frame_equal(result, expected) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = df.ix[[1, 10]] - expected = df.ix[Index([1, 10], dtype=object)] - assert_frame_equal(result, expected) - # 11320 df = pd.DataFrame( { @@ -425,53 +418,6 @@ def test_getitem_ix_mixed_integer(self): expected = df.iloc[:, [1]] assert_frame_equal(result, expected) - def test_getitem_setitem_ix_negative_integers(self, float_frame): - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = float_frame.ix[:, -1] - assert_series_equal(result, float_frame["D"]) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = float_frame.ix[:, [-1]] - assert_frame_equal(result, float_frame[["D"]]) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = float_frame.ix[:, [-1, -2]] - assert_frame_equal(result, float_frame[["D", "C"]]) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - float_frame.ix[:, [-1]] = 0 - assert (float_frame["D"] == 0).all() - - df = DataFrame(np.random.randn(8, 4)) - # ix does label-based indexing when having an integer index - msg = "\"None of [Int64Index([-1], dtype='int64')] are in the [index]\"" - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - with pytest.raises(KeyError, match=re.escape(msg)): - df.ix[[-1]] - - msg = "\"None of [Int64Index([-1], dtype='int64')] are in the [columns]\"" - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - with pytest.raises(KeyError, match=re.escape(msg)): - df.ix[:, [-1]] - - # #1942 - a = DataFrame(np.random.randn(20, 2), index=[chr(x + 65) for x in range(20)]) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - a.ix[-1] = a.ix[-2] - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - assert_series_equal(a.ix[-1], a.ix[-2], check_names=False) - assert a.ix[-1].name == "T" - assert a.ix[-2].name == "S" - def test_getattr(self, float_frame): assert_series_equal(float_frame.A, float_frame["A"]) msg = "'DataFrame' object has no attribute 'NONEXISTENT_NAME'" @@ -844,55 +790,6 @@ def test_delitem_corner(self, float_frame): del f["B"] assert len(f.columns) == 2 - def test_getitem_fancy_2d(self, float_frame): - f = float_frame - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - assert_frame_equal(f.ix[:, ["B", "A"]], f.reindex(columns=["B", "A"])) - - subidx = float_frame.index[[5, 4, 1]] - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - assert_frame_equal( - f.ix[subidx, ["B", "A"]], f.reindex(index=subidx, columns=["B", "A"]) - ) - - # slicing rows, etc. - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - assert_frame_equal(f.ix[5:10], f[5:10]) - assert_frame_equal(f.ix[5:10, :], f[5:10]) - assert_frame_equal( - f.ix[:5, ["A", "B"]], f.reindex(index=f.index[:5], columns=["A", "B"]) - ) - - # slice rows with labels, inclusive! - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - expected = f.ix[5:11] - result = f.ix[f.index[5] : f.index[10]] - assert_frame_equal(expected, result) - - # slice columns - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - assert_frame_equal(f.ix[:, :2], f.reindex(columns=["A", "B"])) - - # get view - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - exp = f.copy() - f.ix[5:10].values[:] = 5 - exp.values[5:10] = 5 - assert_frame_equal(f, exp) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - msg = "Cannot index with multidimensional key" - with pytest.raises(ValueError, match=msg): - f.ix[f > 0.5] - def test_slice_floats(self): index = [52195.504153, 52196.303147, 52198.369883] df = DataFrame(np.random.rand(3, 2), index=index) @@ -941,111 +838,7 @@ def test_getitem_setitem_integer_slice_keyerrors(self): with pytest.raises(KeyError, match=r"^3$"): df2.loc[3:11] = 0 - def test_setitem_fancy_2d(self, float_frame): - - # case 1 - frame = float_frame.copy() - expected = frame.copy() - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame.ix[:, ["B", "A"]] = 1 - expected["B"] = 1.0 - expected["A"] = 1.0 - assert_frame_equal(frame, expected) - - # case 2 - frame = float_frame.copy() - frame2 = float_frame.copy() - - expected = frame.copy() - - subidx = float_frame.index[[5, 4, 1]] - values = np.random.randn(3, 2) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame.ix[subidx, ["B", "A"]] = values - frame2.ix[[5, 4, 1], ["B", "A"]] = values - - expected["B"].ix[subidx] = values[:, 0] - expected["A"].ix[subidx] = values[:, 1] - - assert_frame_equal(frame, expected) - assert_frame_equal(frame2, expected) - - # case 3: slicing rows, etc. - frame = float_frame.copy() - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - expected1 = float_frame.copy() - frame.ix[5:10] = 1.0 - expected1.values[5:10] = 1.0 - assert_frame_equal(frame, expected1) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - expected2 = float_frame.copy() - arr = np.random.randn(5, len(frame.columns)) - frame.ix[5:10] = arr - expected2.values[5:10] = arr - assert_frame_equal(frame, expected2) - - # case 4 - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame = float_frame.copy() - frame.ix[5:10, :] = 1.0 - assert_frame_equal(frame, expected1) - frame.ix[5:10, :] = arr - assert_frame_equal(frame, expected2) - - # case 5 - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame = float_frame.copy() - frame2 = float_frame.copy() - - expected = float_frame.copy() - values = np.random.randn(5, 2) - - frame.ix[:5, ["A", "B"]] = values - expected["A"][:5] = values[:, 0] - expected["B"][:5] = values[:, 1] - assert_frame_equal(frame, expected) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame2.ix[:5, [0, 1]] = values - assert_frame_equal(frame2, expected) - - # case 6: slice rows with labels, inclusive! - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame = float_frame.copy() - expected = float_frame.copy() - - frame.ix[frame.index[5] : frame.index[10]] = 5.0 - expected.values[5:11] = 5 - assert_frame_equal(frame, expected) - - # case 7: slice columns - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame = float_frame.copy() - frame2 = float_frame.copy() - expected = float_frame.copy() - - # slice indices - frame.ix[:, 1:3] = 4.0 - expected.values[:, 1:3] = 4.0 - assert_frame_equal(frame, expected) - - # slice with labels - frame.ix[:, "B":"C"] = 4.0 - assert_frame_equal(frame, expected) - + def test_setitem_fancy_2d(self): # new corner case of boolean slicing / setting frame = DataFrame(zip([2, 3, 9, 6, 7], [np.nan] * 5), columns=["a", "b"]) lst = [100] @@ -1067,194 +860,6 @@ def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): assert (float_frame["C"] == 4).all() - def test_fancy_setitem_int_labels(self): - # integer index defers to label-based indexing - - df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2)) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - tmp = df.copy() - exp = df.copy() - tmp.ix[[0, 2, 4]] = 5 - exp.values[:3] = 5 - assert_frame_equal(tmp, exp) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - tmp = df.copy() - exp = df.copy() - tmp.ix[6] = 5 - exp.values[3] = 5 - assert_frame_equal(tmp, exp) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - tmp = df.copy() - exp = df.copy() - tmp.ix[:, 2] = 5 - - # tmp correctly sets the dtype - # so match the exp way - exp[2] = 5 - assert_frame_equal(tmp, exp) - - def test_fancy_getitem_int_labels(self): - df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2)) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = df.ix[[4, 2, 0], [2, 0]] - expected = df.reindex(index=[4, 2, 0], columns=[2, 0]) - assert_frame_equal(result, expected) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = df.ix[[4, 2, 0]] - expected = df.reindex(index=[4, 2, 0]) - assert_frame_equal(result, expected) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = df.ix[4] - expected = df.xs(4) - assert_series_equal(result, expected) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = df.ix[:, 3] - expected = df[3] - assert_series_equal(result, expected) - - def test_fancy_index_int_labels_exceptions(self, float_frame): - df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2)) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - - # labels that aren't contained - with pytest.raises(KeyError, match=r"\[1\] not in index"): - df.ix[[0, 1, 2], [2, 3, 4]] = 5 - - # try to set indices not contained in frame - msg = ( - r"None of \[Index\(\['foo', 'bar', 'baz'\]," - r" dtype='object'\)\] are in the \[index\]" - ) - with pytest.raises(KeyError, match=msg): - float_frame.ix[["foo", "bar", "baz"]] = 1 - msg = ( - r"None of \[Index\(\['E'\], dtype='object'\)\] are in the" - r" \[columns\]" - ) - with pytest.raises(KeyError, match=msg): - float_frame.ix[:, ["E"]] = 1 - - # FIXME: don't leave commented-out - # partial setting now allows this GH2578 - # pytest.raises(KeyError, float_frame.ix.__setitem__, - # (slice(None, None), 'E'), 1) - - def test_setitem_fancy_mixed_2d(self, float_string_frame): - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - self.mixed_frame.ix[:5, ["C", "B", "A"]] = 5 - result = self.mixed_frame.ix[:5, ["C", "B", "A"]] - assert (result.values == 5).all() - - float_string_frame.ix[5] = np.nan - assert isna(float_string_frame.ix[5]).all() - - float_string_frame.ix[5] = float_string_frame.ix[6] - assert_series_equal( - float_string_frame.ix[5], float_string_frame.ix[6], check_names=False - ) - - # #1432 - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - df = DataFrame({1: [1.0, 2.0, 3.0], 2: [3, 4, 5]}) - assert df._is_mixed_type - - df.ix[1] = [5, 10] - - expected = DataFrame({1: [1.0, 5.0, 3.0], 2: [3, 10, 5]}) - - assert_frame_equal(df, expected) - - def test_ix_align(self): - b = Series(np.random.randn(10), name=0).sort_values() - df_orig = DataFrame(np.random.randn(10, 4)) - df = df_orig.copy() - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - df.ix[:, 0] = b - assert_series_equal(df.ix[:, 0].reindex(b.index), b) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - dft = df_orig.T - dft.ix[0, :] = b - assert_series_equal(dft.ix[0, :].reindex(b.index), b) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - df = df_orig.copy() - df.ix[:5, 0] = b - s = df.ix[:5, 0] - assert_series_equal(s, b.reindex(s.index)) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - dft = df_orig.T - dft.ix[0, :5] = b - s = dft.ix[0, :5] - assert_series_equal(s, b.reindex(s.index)) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - df = df_orig.copy() - idx = [0, 1, 3, 5] - df.ix[idx, 0] = b - s = df.ix[idx, 0] - assert_series_equal(s, b.reindex(s.index)) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - dft = df_orig.T - dft.ix[0, idx] = b - s = dft.ix[0, idx] - assert_series_equal(s, b.reindex(s.index)) - - def test_ix_frame_align(self): - b = DataFrame(np.random.randn(3, 4)) - df_orig = DataFrame(np.random.randn(10, 4)) - df = df_orig.copy() - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - df.ix[:3] = b - out = b.ix[:3] - assert_frame_equal(out, b) - - b.sort_index(inplace=True) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - df = df_orig.copy() - df.ix[[0, 1, 2]] = b - out = df.ix[[0, 1, 2]].reindex(b.index) - assert_frame_equal(out, b) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - df = df_orig.copy() - df.ix[:3] = b - out = df.ix[:3] - assert_frame_equal(out, b.reindex(out.index)) - def test_getitem_setitem_non_ix_labels(self): df = tm.makeTimeDataFrame() @@ -1281,6 +886,7 @@ def test_ix_multi_take(self): xp = df.reindex([0]) assert_frame_equal(rs, xp) + # FIXME: dont leave commented-out """ #1321 df = DataFrame(np.random.randn(3, 2)) rs = df.loc[df.index==0, df.columns==1] @@ -1288,168 +894,6 @@ def test_ix_multi_take(self): assert_frame_equal(rs, xp) """ - def test_ix_multi_take_nonint_index(self): - df = DataFrame(np.random.randn(3, 2), index=["x", "y", "z"], columns=["a", "b"]) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - rs = df.ix[[0], [0]] - xp = df.reindex(["x"], columns=["a"]) - assert_frame_equal(rs, xp) - - def test_ix_multi_take_multiindex(self): - df = DataFrame( - np.random.randn(3, 2), - index=["x", "y", "z"], - columns=[["a", "b"], ["1", "2"]], - ) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - rs = df.ix[[0], [0]] - xp = df.reindex(["x"], columns=[("a", "1")]) - assert_frame_equal(rs, xp) - - def test_ix_dup(self): - idx = Index(["a", "a", "b", "c", "d", "d"]) - df = DataFrame(np.random.randn(len(idx), 3), idx) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - sub = df.ix[:"d"] - assert_frame_equal(sub, df) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - sub = df.ix["a":"c"] - assert_frame_equal(sub, df.ix[0:4]) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - sub = df.ix["b":"d"] - assert_frame_equal(sub, df.ix[2:]) - - def test_getitem_fancy_1d(self, float_frame, float_string_frame): - f = float_frame - - # return self if no slicing...for now - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - assert f.ix[:, :] is f - - # low dimensional slice - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - xs1 = f.ix[2, ["C", "B", "A"]] - xs2 = f.xs(f.index[2]).reindex(["C", "B", "A"]) - tm.assert_series_equal(xs1, xs2) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - ts1 = f.ix[5:10, 2] - ts2 = f[f.columns[2]][5:10] - tm.assert_series_equal(ts1, ts2) - - # positional xs - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - xs1 = f.ix[0] - xs2 = f.xs(f.index[0]) - tm.assert_series_equal(xs1, xs2) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - xs1 = f.ix[f.index[5]] - xs2 = f.xs(f.index[5]) - tm.assert_series_equal(xs1, xs2) - - # single column - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - assert_series_equal(f.ix[:, "A"], f["A"]) - - # return view - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - exp = f.copy() - exp.values[5] = 4 - f.ix[5][:] = 4 - tm.assert_frame_equal(exp, f) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - exp.values[:, 1] = 6 - f.ix[:, 1][:] = 6 - tm.assert_frame_equal(exp, f) - - # slice of mixed-frame - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - xs = float_string_frame.ix[5] - exp = float_string_frame.xs(float_string_frame.index[5]) - tm.assert_series_equal(xs, exp) - - def test_setitem_fancy_1d(self, float_frame): - - # case 1: set cross-section for indices - frame = float_frame.copy() - expected = float_frame.copy() - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame.ix[2, ["C", "B", "A"]] = [1.0, 2.0, 3.0] - expected["C"][2] = 1.0 - expected["B"][2] = 2.0 - expected["A"][2] = 3.0 - assert_frame_equal(frame, expected) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame2 = float_frame.copy() - frame2.ix[2, [3, 2, 1]] = [1.0, 2.0, 3.0] - assert_frame_equal(frame, expected) - - # case 2, set a section of a column - frame = float_frame.copy() - expected = float_frame.copy() - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - vals = np.random.randn(5) - expected.values[5:10, 2] = vals - frame.ix[5:10, 2] = vals - assert_frame_equal(frame, expected) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame2 = float_frame.copy() - frame2.ix[5:10, "B"] = vals - assert_frame_equal(frame, expected) - - # case 3: full xs - frame = float_frame.copy() - expected = float_frame.copy() - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame.ix[4] = 5.0 - expected.values[4] = 5.0 - assert_frame_equal(frame, expected) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame.ix[frame.index[4]] = 6.0 - expected.values[4] = 6.0 - assert_frame_equal(frame, expected) - - # single column - frame = float_frame.copy() - expected = float_frame.copy() - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - frame.ix[:, "A"] = 7.0 - expected["A"] = 7.0 - assert_frame_equal(frame, expected) - def test_getitem_fancy_scalar(self, float_frame): f = float_frame ix = f.loc @@ -1969,15 +1413,11 @@ def test_get_set_value_no_partial_indexing(self): with pytest.raises(KeyError, match=r"^0$"): df._get_value(0, 1) + # TODO: rename? remove? def test_single_element_ix_dont_upcast(self, float_frame): float_frame["E"] = 1 assert issubclass(float_frame["E"].dtype.type, (int, np.integer)) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = float_frame.ix[float_frame.index[5], "E"] - assert is_integer(result) - result = float_frame.loc[float_frame.index[5], "E"] assert is_integer(result) @@ -1985,18 +1425,10 @@ def test_single_element_ix_dont_upcast(self, float_frame): df = pd.DataFrame(dict(a=[1.23])) df["b"] = 666 - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = df.ix[0, "b"] - assert is_integer(result) result = df.loc[0, "b"] assert is_integer(result) expected = Series([666], [0], name="b") - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = df.ix[[0], "b"] - assert_series_equal(result, expected) result = df.loc[[0], "b"] assert_series_equal(result, expected) @@ -2064,45 +1496,12 @@ def test_iloc_duplicates(self): df = DataFrame(np.random.rand(3, 3), columns=list("ABC"), index=list("aab")) result = df.iloc[0] - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result2 = df.ix[0] assert isinstance(result, Series) assert_almost_equal(result.values, df.values[0]) - assert_series_equal(result, result2) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = df.T.iloc[:, 0] - result2 = df.T.ix[:, 0] + result = df.T.iloc[:, 0] assert isinstance(result, Series) assert_almost_equal(result.values, df.values[0]) - assert_series_equal(result, result2) - - # multiindex - df = DataFrame( - np.random.randn(3, 3), - columns=[["i", "i", "j"], ["A", "A", "B"]], - index=[["i", "i", "j"], ["X", "X", "Y"]], - ) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - rs = df.iloc[0] - xp = df.ix[0] - assert_series_equal(rs, xp) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - rs = df.iloc[:, 0] - xp = df.T.ix[0] - assert_series_equal(rs, xp) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - rs = df.iloc[:, [0]] - xp = df.ix[:, [0]] - assert_frame_equal(rs, xp) # #2259 df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2]) @@ -2355,9 +1754,6 @@ def test_getitem_ix_float_duplicates(self): ) expect = df.iloc[1:] assert_frame_equal(df.loc[0.2], expect) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[1:, 0] assert_series_equal(df.loc[0.2, "a"], expect) @@ -2365,9 +1761,6 @@ def test_getitem_ix_float_duplicates(self): df.index = [1, 0.2, 0.2] expect = df.iloc[1:] assert_frame_equal(df.loc[0.2], expect) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[1:, 0] assert_series_equal(df.loc[0.2, "a"], expect) @@ -2377,9 +1770,6 @@ def test_getitem_ix_float_duplicates(self): ) expect = df.iloc[1:-1] assert_frame_equal(df.loc[0.2], expect) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[1:-1, 0] assert_series_equal(df.loc[0.2, "a"], expect) @@ -2387,9 +1777,6 @@ def test_getitem_ix_float_duplicates(self): df.index = [0.1, 0.2, 2, 0.2] expect = df.iloc[[1, -1]] assert_frame_equal(df.loc[0.2], expect) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[[1, -1], 0] assert_series_equal(df.loc[0.2, "a"], expect) @@ -2618,11 +2005,6 @@ def test_index_namedtuple(self): index = Index([idx1, idx2], name="composite_index", tupleize_cols=False) df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"]) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - result = df.ix[IndexType("foo", "bar")]["A"] - assert result == 1 - result = df.loc[IndexType("foo", "bar")]["A"] assert result == 1 diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 9ceeb06b6fd86..b9b459a10eeae 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -1,7 +1,7 @@ """ common utilities """ import itertools -from warnings import catch_warnings, filterwarnings +from warnings import catch_warnings import numpy as np @@ -154,9 +154,12 @@ def get_value(self, f, i, values=False): # for a in reversed(i): # v = v.__getitem__(a) # return v - with catch_warnings(record=True): - filterwarnings("ignore", "\\n.ix", FutureWarning) - return f.ix[i] + + # TODO: this used to be f.ix[i]; is loc-then-iloc correct here? + try: + return f.loc[i] + except KeyError: + return f.iloc[i] def check_values(self, f, func, values=False): diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index 692a86aa1a338..a67255a0fcf43 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -1,4 +1,3 @@ -from warnings import catch_warnings import numpy as np import pytest @@ -12,7 +11,6 @@ from pandas.util import testing as tm -@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning") class TestMultiIndexSlicers: def test_per_axis_per_level_getitem(self): @@ -637,8 +635,6 @@ def test_multiindex_label_slicing_with_negative_step(self): def assert_slices_equivalent(l_slc, i_slc): tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) - with catch_warnings(record=True): - tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) assert_slices_equivalent(SLC[::-1], SLC[::-1]) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 702bf0b15dec9..dd9764beb49f3 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -365,12 +365,6 @@ def check(result, expected): def test_cache_updating(self): # GH 4939, make sure to update the cache on setitem - df = tm.makeDataFrame() - df["A"] # cache series - df.ix["Hello Friend"] = df.ix[0] - assert "Hello Friend" in df["A"].index - assert "Hello Friend" in df["B"].index - # 10264 df = DataFrame( np.zeros((5, 5), dtype="int64"), diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 56a78081bc624..3a44f2479ef6f 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -134,8 +134,7 @@ def test_scalar_non_numeric(self): # these should prob work # and are inconsisten between series/dataframe ATM - # for idxr in [lambda x: x.ix, - # lambda x: x]: + # for idxr in [lambda x: x]: # s2 = s.copy() # # with pytest.raises(TypeError): diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 85eab91af3c48..aa9ace1556b00 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1,6 +1,6 @@ """ test positional based indexing with iloc """ -from warnings import catch_warnings, filterwarnings, simplefilter +from warnings import catch_warnings, simplefilter import numpy as np import pytest @@ -462,71 +462,6 @@ def test_iloc_setitem_dups(self): df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) tm.assert_frame_equal(df, expected) - def test_iloc_getitem_frame(self): - df = DataFrame( - np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2) - ) - - result = df.iloc[2] - with catch_warnings(record=True): - filterwarnings("ignore", "\\n.ix", FutureWarning) - exp = df.ix[4] - tm.assert_series_equal(result, exp) - - result = df.iloc[2, 2] - with catch_warnings(record=True): - filterwarnings("ignore", "\\n.ix", FutureWarning) - exp = df.ix[4, 4] - assert result == exp - - # slice - result = df.iloc[4:8] - with catch_warnings(record=True): - filterwarnings("ignore", "\\n.ix", FutureWarning) - expected = df.ix[8:14] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 2:3] - with catch_warnings(record=True): - filterwarnings("ignore", "\\n.ix", FutureWarning) - expected = df.ix[:, 4:5] - tm.assert_frame_equal(result, expected) - - # list of integers - result = df.iloc[[0, 1, 3]] - with catch_warnings(record=True): - filterwarnings("ignore", "\\n.ix", FutureWarning) - expected = df.ix[[0, 2, 6]] - tm.assert_frame_equal(result, expected) - - result = df.iloc[[0, 1, 3], [0, 1]] - with catch_warnings(record=True): - filterwarnings("ignore", "\\n.ix", FutureWarning) - expected = df.ix[[0, 2, 6], [0, 2]] - tm.assert_frame_equal(result, expected) - - # neg indices - result = df.iloc[[-1, 1, 3], [-1, 1]] - with catch_warnings(record=True): - filterwarnings("ignore", "\\n.ix", FutureWarning) - expected = df.ix[[18, 2, 6], [6, 2]] - tm.assert_frame_equal(result, expected) - - # dups indices - result = df.iloc[[-1, -1, 1, 3], [-1, 1]] - with catch_warnings(record=True): - filterwarnings("ignore", "\\n.ix", FutureWarning) - expected = df.ix[[18, 18, 2, 6], [6, 2]] - tm.assert_frame_equal(result, expected) - - # with index-like - s = Series(index=range(1, 5)) - result = df.iloc[s.index] - with catch_warnings(record=True): - filterwarnings("ignore", "\\n.ix", FutureWarning) - expected = df.ix[[2, 4, 6, 8]] - tm.assert_frame_equal(result, expected) - def test_iloc_getitem_labelled_frame(self): # try with labelled frame df = DataFrame( diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py deleted file mode 100644 index 45ccd8d1b8fb3..0000000000000 --- a/pandas/tests/indexing/test_ix.py +++ /dev/null @@ -1,345 +0,0 @@ -""" test indexing with ix """ - -from warnings import catch_warnings - -import numpy as np -import pytest - -from pandas.core.dtypes.common import is_scalar - -import pandas as pd -from pandas import DataFrame, Series, option_context -from pandas.util import testing as tm - - -def test_ix_deprecation(): - # GH 15114 - - df = DataFrame({"A": [1, 2, 3]}) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=True): - df.ix[1, "A"] - - -@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning") -class TestIX: - def test_ix_loc_setitem_consistency(self): - - # GH 5771 - # loc with slice and series - s = Series(0, index=[4, 5, 6]) - s.loc[4:5] += 1 - expected = Series([1, 1, 0], index=[4, 5, 6]) - tm.assert_series_equal(s, expected) - - # GH 5928 - # chained indexing assignment - df = DataFrame({"a": [0, 1, 2]}) - expected = df.copy() - with catch_warnings(record=True): - expected.ix[[0, 1, 2], "a"] = -expected.ix[[0, 1, 2], "a"] - - with catch_warnings(record=True): - df["a"].ix[[0, 1, 2]] = -df["a"].ix[[0, 1, 2]] - tm.assert_frame_equal(df, expected) - - df = DataFrame({"a": [0, 1, 2], "b": [0, 1, 2]}) - with catch_warnings(record=True): - df["a"].ix[[0, 1, 2]] = -df["a"].ix[[0, 1, 2]].astype("float64") + 0.5 - expected = DataFrame({"a": [0.5, -0.5, -1.5], "b": [0, 1, 2]}) - tm.assert_frame_equal(df, expected) - - # GH 8607 - # ix setitem consistency - df = DataFrame( - { - "delta": [1174, 904, 161], - "elapsed": [7673, 9277, 1470], - "timestamp": [1413840976, 1413842580, 1413760580], - } - ) - expected = DataFrame( - { - "delta": [1174, 904, 161], - "elapsed": [7673, 9277, 1470], - "timestamp": pd.to_datetime( - [1413840976, 1413842580, 1413760580], unit="s" - ), - } - ) - - df2 = df.copy() - df2["timestamp"] = pd.to_datetime(df["timestamp"], unit="s") - tm.assert_frame_equal(df2, expected) - - df2 = df.copy() - df2.loc[:, "timestamp"] = pd.to_datetime(df["timestamp"], unit="s") - tm.assert_frame_equal(df2, expected) - - df2 = df.copy() - with catch_warnings(record=True): - df2.ix[:, 2] = pd.to_datetime(df["timestamp"], unit="s") - tm.assert_frame_equal(df2, expected) - - def test_ix_loc_consistency(self): - - # GH 8613 - # some edge cases where ix/loc should return the same - # this is not an exhaustive case - - def compare(result, expected): - if is_scalar(expected): - assert result == expected - else: - assert expected.equals(result) - - # failure cases for .loc, but these work for .ix - df = DataFrame(np.random.randn(5, 4), columns=list("ABCD")) - for key in [ - slice(1, 3), - tuple([slice(0, 2), slice(0, 2)]), - tuple([slice(0, 2), df.columns[0:2]]), - ]: - - for index in [ - tm.makeStringIndex, - tm.makeUnicodeIndex, - tm.makeDateIndex, - tm.makePeriodIndex, - tm.makeTimedeltaIndex, - ]: - df.index = index(len(df.index)) - with catch_warnings(record=True): - df.ix[key] - - msg = ( - r"cannot do slice indexing" - r" on {klass} with these indexers \[(0|1)\] of" - r" {kind}".format(klass=type(df.index), kind=str(int)) - ) - with pytest.raises(TypeError, match=msg): - df.loc[key] - - df = DataFrame( - np.random.randn(5, 4), - columns=list("ABCD"), - index=pd.date_range("2012-01-01", periods=5), - ) - - for key in [ - "2012-01-03", - "2012-01-31", - slice("2012-01-03", "2012-01-03"), - slice("2012-01-03", "2012-01-04"), - slice("2012-01-03", "2012-01-06", 2), - slice("2012-01-03", "2012-01-31"), - tuple([[True, True, True, False, True]]), - ]: - - # getitem - - # if the expected raises, then compare the exceptions - try: - with catch_warnings(record=True): - expected = df.ix[key] - except KeyError: - with pytest.raises(KeyError, match=r"^'2012-01-31'$"): - df.loc[key] - continue - - result = df.loc[key] - compare(result, expected) - - # setitem - df1 = df.copy() - df2 = df.copy() - - with catch_warnings(record=True): - df1.ix[key] = 10 - df2.loc[key] = 10 - compare(df2, df1) - - # edge cases - s = Series([1, 2, 3, 4], index=list("abde")) - - result1 = s["a":"c"] - with catch_warnings(record=True): - result2 = s.ix["a":"c"] - result3 = s.loc["a":"c"] - tm.assert_series_equal(result1, result2) - tm.assert_series_equal(result1, result3) - - # now work rather than raising KeyError - s = Series(range(5), [-2, -1, 1, 2, 3]) - - with catch_warnings(record=True): - result1 = s.ix[-10:3] - result2 = s.loc[-10:3] - tm.assert_series_equal(result1, result2) - - with catch_warnings(record=True): - result1 = s.ix[0:3] - result2 = s.loc[0:3] - tm.assert_series_equal(result1, result2) - - def test_ix_weird_slicing(self): - # http://stackoverflow.com/q/17056560/1240268 - df = DataFrame({"one": [1, 2, 3, np.nan, np.nan], "two": [1, 2, 3, 4, 5]}) - df.loc[df["one"] > 1, "two"] = -df["two"] - - expected = DataFrame( - { - "one": {0: 1.0, 1: 2.0, 2: 3.0, 3: np.nan, 4: np.nan}, - "two": {0: 1, 1: -2, 2: -3, 3: 4, 4: 5}, - } - ) - tm.assert_frame_equal(df, expected) - - def test_ix_assign_column_mixed(self, float_frame): - # GH #1142 - df = float_frame - df["foo"] = "bar" - - orig = df.loc[:, "B"].copy() - df.loc[:, "B"] = df.loc[:, "B"] + 1 - tm.assert_series_equal(df.B, orig + 1) - - # GH 3668, mixed frame with series value - df = DataFrame({"x": np.arange(10), "y": np.arange(10, 20), "z": "bar"}) - expected = df.copy() - - for i in range(5): - indexer = i * 2 - v = 1000 + i * 200 - expected.loc[indexer, "y"] = v - assert expected.loc[indexer, "y"] == v - - df.loc[df.x % 2 == 0, "y"] = df.loc[df.x % 2 == 0, "y"] * 100 - tm.assert_frame_equal(df, expected) - - # GH 4508, making sure consistency of assignments - df = DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]}) - df.loc[[0, 2], "b"] = [100, -100] - expected = DataFrame({"a": [1, 2, 3], "b": [100, 1, -100]}) - tm.assert_frame_equal(df, expected) - - df = DataFrame({"a": list(range(4))}) - df["b"] = np.nan - df.loc[[1, 3], "b"] = [100, -100] - expected = DataFrame({"a": [0, 1, 2, 3], "b": [np.nan, 100, np.nan, -100]}) - tm.assert_frame_equal(df, expected) - - # ok, but chained assignments are dangerous - # if we turn off chained assignment it will work - with option_context("chained_assignment", None): - df = DataFrame({"a": list(range(4))}) - df["b"] = np.nan - df["b"].loc[[1, 3]] = [100, -100] - tm.assert_frame_equal(df, expected) - - def test_ix_get_set_consistency(self): - - # GH 4544 - # ix/loc get/set not consistent when - # a mixed int/string index - df = DataFrame( - np.arange(16).reshape((4, 4)), - columns=["a", "b", 8, "c"], - index=["e", 7, "f", "g"], - ) - - with catch_warnings(record=True): - assert df.ix["e", 8] == 2 - assert df.loc["e", 8] == 2 - - with catch_warnings(record=True): - df.ix["e", 8] = 42 - assert df.ix["e", 8] == 42 - assert df.loc["e", 8] == 42 - - df.loc["e", 8] = 45 - with catch_warnings(record=True): - assert df.ix["e", 8] == 45 - assert df.loc["e", 8] == 45 - - def test_ix_slicing_strings(self): - # see gh-3836 - data = { - "Classification": ["SA EQUITY CFD", "bbb", "SA EQUITY", "SA SSF", "aaa"], - "Random": [1, 2, 3, 4, 5], - "X": ["correct", "wrong", "correct", "correct", "wrong"], - } - df = DataFrame(data) - x = df[~df.Classification.isin(["SA EQUITY CFD", "SA EQUITY", "SA SSF"])] - with catch_warnings(record=True): - df.ix[x.index, "X"] = df["Classification"] - - expected = DataFrame( - { - "Classification": { - 0: "SA EQUITY CFD", - 1: "bbb", - 2: "SA EQUITY", - 3: "SA SSF", - 4: "aaa", - }, - "Random": {0: 1, 1: 2, 2: 3, 3: 4, 4: 5}, - "X": {0: "correct", 1: "bbb", 2: "correct", 3: "correct", 4: "aaa"}, - } - ) # bug was 4: 'bbb' - - tm.assert_frame_equal(df, expected) - - def test_ix_setitem_out_of_bounds_axis_0(self): - df = DataFrame( - np.random.randn(2, 5), - index=["row{i}".format(i=i) for i in range(2)], - columns=["col{i}".format(i=i) for i in range(5)], - ) - with catch_warnings(record=True): - msg = "cannot set by positional indexing with enlargement" - with pytest.raises(ValueError, match=msg): - df.ix[2, 0] = 100 - - def test_ix_setitem_out_of_bounds_axis_1(self): - df = DataFrame( - np.random.randn(5, 2), - index=["row{i}".format(i=i) for i in range(5)], - columns=["col{i}".format(i=i) for i in range(2)], - ) - with catch_warnings(record=True): - msg = "cannot set by positional indexing with enlargement" - with pytest.raises(ValueError, match=msg): - df.ix[0, 2] = 100 - - def test_ix_empty_list_indexer_is_ok(self): - with catch_warnings(record=True): - from pandas.util.testing import makeCustomDataframe as mkdf - - df = mkdf(5, 2) - # vertical empty - tm.assert_frame_equal( - df.ix[:, []], - df.iloc[:, :0], - check_index_type=True, - check_column_type=True, - ) - # horizontal empty - tm.assert_frame_equal( - df.ix[[], :], - df.iloc[:0, :], - check_index_type=True, - check_column_type=True, - ) - # horizontal empty - tm.assert_frame_equal( - df.ix[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True - ) - - def test_ix_duplicate_returns_series(self): - df = DataFrame( - np.random.randn(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc") - ) - with catch_warnings(record=True): - r = df.ix[0.2, "a"] - e = df.loc[0.2, "a"] - tm.assert_series_equal(r, e) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index abe0cd86c90d7..2e9b149663d70 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,7 +1,6 @@ """ test label based indexing with loc """ from io import StringIO import re -from warnings import catch_warnings, filterwarnings import numpy as np import pytest @@ -956,11 +955,6 @@ def test_loc_name(self): result = df.iloc[[0, 1]].index.name assert result == "index_name" - with catch_warnings(record=True): - filterwarnings("ignore", "\\n.ix", FutureWarning) - result = df.ix[[0, 1]].index.name - assert result == "index_name" - result = df.loc[[0, 1]].index.name assert result == "index_name" diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 68e93f06e43dc..7290354c84457 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -1,11 +1,9 @@ """ test setting *parts* of objects both positionally and label based -TOD: these should be split among the indexer tests +TODO: these should be split among the indexer tests """ -from warnings import catch_warnings - import numpy as np import pytest @@ -15,7 +13,6 @@ class TestPartialSetting: - @pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning") def test_partial_setting(self): # GH2578, allow ix and friends to partially set @@ -84,37 +81,6 @@ def test_partial_setting(self): df.loc[3] = df.loc[2] tm.assert_frame_equal(df, expected) - # single dtype frame, overwrite - expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]})) - df = df_orig.copy() - with catch_warnings(record=True): - df.ix[:, "B"] = df.ix[:, "A"] - tm.assert_frame_equal(df, expected) - - # mixed dtype frame, overwrite - expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])})) - df = df_orig.copy() - df["B"] = df["B"].astype(np.float64) - with catch_warnings(record=True): - df.ix[:, "B"] = df.ix[:, "A"] - tm.assert_frame_equal(df, expected) - - # single dtype frame, partial setting - expected = df_orig.copy() - expected["C"] = df["A"] - df = df_orig.copy() - with catch_warnings(record=True): - df.ix[:, "C"] = df.ix[:, "A"] - tm.assert_frame_equal(df, expected) - - # mixed frame, partial setting - expected = df_orig.copy() - expected["C"] = df["A"] - df = df_orig.copy() - with catch_warnings(record=True): - df.ix[:, "C"] = df.ix[:, "A"] - tm.assert_frame_equal(df, expected) - # GH 8473 dates = date_range("1/1/2000", periods=8) df_orig = DataFrame( @@ -364,41 +330,6 @@ def test_series_partial_set_with_name(self): result = ser.iloc[[1, 1, 0, 0]] tm.assert_series_equal(result, expected, check_index_type=True) - @pytest.mark.filterwarnings("ignore:\\n.ix") - def test_partial_set_invalid(self): - - # GH 4940 - # allow only setting of 'valid' values - - orig = tm.makeTimeDataFrame() - df = orig.copy() - - # don't allow not string inserts - with pytest.raises(TypeError): - with catch_warnings(record=True): - df.loc[100.0, :] = df.ix[0] - - with pytest.raises(TypeError): - with catch_warnings(record=True): - df.loc[100, :] = df.ix[0] - - with pytest.raises(TypeError): - with catch_warnings(record=True): - df.ix[100.0, :] = df.ix[0] - - with pytest.raises(ValueError): - with catch_warnings(record=True): - df.ix[100, :] = df.ix[0] - - # allow object conversion here - df = orig.copy() - with catch_warnings(record=True): - df.loc["a", :] = df.ix[0] - exp = orig.append(Series(df.ix[0], name="a")) - tm.assert_frame_equal(df, exp) - tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) - assert df.index.dtype == "object" - def test_partial_set_empty_series(self): # GH5226 diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index c97c69c323b56..c155d5b56327a 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2,7 +2,6 @@ from io import StringIO import itertools from itertools import product -from warnings import catch_warnings, simplefilter import numpy as np from numpy.random import randn @@ -209,11 +208,6 @@ def test_reindex(self): reindexed = self.frame.loc[[("foo", "one"), ("bar", "one")]] tm.assert_frame_equal(reindexed, expected) - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - reindexed = self.frame.ix[[("foo", "one"), ("bar", "one")]] - tm.assert_frame_equal(reindexed, expected) - def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] chunk = self.ymd.reindex(new_index) @@ -222,11 +216,6 @@ def test_reindex_preserve_levels(self): chunk = self.ymd.loc[new_index] assert chunk.index is new_index - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - chunk = self.ymd.ix[new_index] - assert chunk.index is new_index - ymdT = self.ymd.T chunk = ymdT.reindex(columns=new_index) assert chunk.columns is new_index From ec02d1f06bb3d71c449dd2e735030a2093aeabb2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 18:43:19 -0700 Subject: [PATCH 02/11] remove ix-only methods --- pandas/core/indexing.py | 66 --------------------------------- pandas/tests/test_downstream.py | 1 + 2 files changed, 1 insertion(+), 66 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 640a4b2a3f346..e8e2e65f6268b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -110,37 +110,6 @@ def __call__(self, axis=None): new_self.axis = axis return new_self - def __iter__(self): - raise NotImplementedError("ix is not iterable") - - def __getitem__(self, key): - if type(key) is tuple: - # Note: we check the type exactly instead of with isinstance - # because NamedTuple is checked separately. - key = tuple(com.apply_if_callable(x, self.obj) for x in key) - try: - values = self.obj._get_value(*key) - except (KeyError, TypeError, InvalidIndexError): - # TypeError occurs here if the key has non-hashable entries, - # generally slice or list. - # TODO(ix): most/all of the TypeError cases here are for ix, - # so this check can be removed once ix is removed. - # The InvalidIndexError is only catched for compatibility - # with geopandas, see - # https://github.com/pandas-dev/pandas/issues/27258 - pass - else: - if is_scalar(values): - return values - - return self._getitem_tuple(key) - else: - # we by definition only have the 0th axis - axis = self.axis or 0 - - key = com.apply_if_callable(key, self.obj) - return self._getitem_axis(key, axis=axis) - def _get_label(self, label, axis: int): if self.ndim == 1: # for perf reasons we want to try _xs first @@ -978,41 +947,6 @@ def _getitem_nested_tuple(self, tup: Tuple): return obj - def _getitem_axis(self, key, axis: int): - if is_iterator(key): - key = list(key) - self._validate_key(key, axis) - - labels = self.obj._get_axis(axis) - if isinstance(key, slice): - return self._get_slice_axis(key, axis=axis) - elif is_list_like_indexer(key) and not ( - isinstance(key, tuple) and isinstance(labels, MultiIndex) - ): - - if hasattr(key, "ndim") and key.ndim > 1: - raise ValueError("Cannot index with multidimensional key") - - return self._getitem_iterable(key, axis=axis) - else: - - # maybe coerce a float scalar to integer - key = labels._maybe_cast_indexer(key) - - if is_integer(key): - if axis == 0 and isinstance(labels, MultiIndex): - try: - return self._get_label(key, axis=axis) - except (KeyError, TypeError): - if self.obj.index.levels[0].is_integer(): - raise - - # this is the fallback! (for a non-float, non-integer index) - if not labels.is_floating() and not labels.is_integer(): - return self._get_loc(key, axis=axis) - - return self._get_label(key, axis=axis) - def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False): """ Transform a list-like of keys into a new index and an indexer. diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 93baafddedeb4..4d8eecb042a20 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -123,6 +123,7 @@ def test_geopandas(): assert geopandas.read_file(fp) is not None +@pytest.mark.xfail(reason="ix-only methods have been removed from _NDFrameIndexer") def test_geopandas_coordinate_indexer(): # this test is included to have coverage of one case in the indexing.py # code that is only kept for compatibility with geopandas, see From db1567fb51138cf04de4f817e388b6626fc86e10 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 20:24:46 -0700 Subject: [PATCH 03/11] lint fixups --- pandas/core/indexing.py | 2 +- pandas/tests/indexing/multiindex/test_slice.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index e8e2e65f6268b..cd505496634d4 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -24,7 +24,7 @@ from pandas.core.dtypes.missing import _infer_fill_value, isna import pandas.core.common as com -from pandas.core.index import Index, InvalidIndexError, MultiIndex +from pandas.core.index import Index, MultiIndex from pandas.core.indexers import is_list_like_indexer, length_of_indexer diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index a67255a0fcf43..d0f58cbbbcd9f 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -1,4 +1,3 @@ - import numpy as np import pytest From 23d2eca61bd21118b2cf26b28a5b887a2b75e1a0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 21 Aug 2019 20:34:21 -0700 Subject: [PATCH 04/11] restore getitem needed by geopandas --- pandas/core/indexing.py | 71 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4d5b8b5c7ef7d..75b8652946c4b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -24,7 +24,7 @@ from pandas.core.dtypes.missing import _infer_fill_value, isna import pandas.core.common as com -from pandas.core.index import Index, MultiIndex +from pandas.core.index import Index, InvalidIndexError, MultiIndex from pandas.core.indexers import is_list_like_indexer, length_of_indexer @@ -110,6 +110,75 @@ def __call__(self, axis=None): new_self.axis = axis return new_self + # TODO: remove once geopandas no longer needs this + def __getitem__(self, key): + # Used in ix and downstream in geopandas _CoordinateIndexer + if type(key) is tuple: + # Note: we check the type exactly instead of with isinstance + # because NamedTuple is checked separately. + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + try: + values = self.obj._get_value(*key) + except (KeyError, TypeError, InvalidIndexError, AttributeError): + # TypeError occurs here if the key has non-hashable entries, + # generally slice or list. + # TODO(ix): most/all of the TypeError cases here are for ix, + # so this check can be removed once ix is removed. + # The InvalidIndexError is only catched for compatibility + # with geopandas, see + # https://github.com/pandas-dev/pandas/issues/27258 + # TODO: The AttributeError is for IntervalIndex which + # incorrectly implements get_value, see + # https://github.com/pandas-dev/pandas/issues/27865 + pass + else: + if is_scalar(values): + return values + + return self._getitem_tuple(key) + else: + # we by definition only have the 0th axis + axis = self.axis or 0 + + key = com.apply_if_callable(key, self.obj) + return self._getitem_axis(key, axis=axis) + + # TODO: remove once geopandas no longer needs __getitem__ + def _getitem_axis(self, key, axis: int): + if is_iterator(key): + key = list(key) + self._validate_key(key, axis) + + labels = self.obj._get_axis(axis) + if isinstance(key, slice): + return self._get_slice_axis(key, axis=axis) + elif is_list_like_indexer(key) and not ( + isinstance(key, tuple) and isinstance(labels, MultiIndex) + ): + + if hasattr(key, "ndim") and key.ndim > 1: + raise ValueError("Cannot index with multidimensional key") + + return self._getitem_iterable(key, axis=axis) + else: + + # maybe coerce a float scalar to integer + key = labels._maybe_cast_indexer(key) + + if is_integer(key): + if axis == 0 and isinstance(labels, MultiIndex): + try: + return self._get_label(key, axis=axis) + except (KeyError, TypeError): + if self.obj.index.levels[0].is_integer(): + raise + + # this is the fallback! (for a non-float, non-integer index) + if not labels.is_floating() and not labels.is_integer(): + return self._get_loc(key, axis=axis) + + return self._get_label(key, axis=axis) + def _get_label(self, label, axis: int): if self.ndim == 1: # for perf reasons we want to try _xs first From ecc8b36f18a6e1e3b8e6d94086153582f46fb4c8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 21 Aug 2019 20:35:10 -0700 Subject: [PATCH 05/11] remove xfail --- pandas/tests/test_downstream.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 4d8eecb042a20..93baafddedeb4 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -123,7 +123,6 @@ def test_geopandas(): assert geopandas.read_file(fp) is not None -@pytest.mark.xfail(reason="ix-only methods have been removed from _NDFrameIndexer") def test_geopandas_coordinate_indexer(): # this test is included to have coverage of one case in the indexing.py # code that is only kept for compatibility with geopandas, see From 6c5326f111592ca0915f9f338428b456b003a01d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 22 Aug 2019 11:50:29 -0700 Subject: [PATCH 06/11] PERF: replace with list, closes #28084 --- pandas/core/internals/blocks.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e24e6e088b92a..e8867b87ab458 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -743,6 +743,26 @@ def replace( return [self] return [self.copy()] + to_replace = [x for x in to_replace if self._can_hold_element(x)] + if not len(to_replace): + # GH#28084 avoid costly checks since we can infer + # that there is nothing to replace in this block + if inplace: + return [self] + return [self.copy()] + + if len(to_replace) == 1: + # _can_hold_element checks have reduced this back to the + # scalar case and we can avoid a costly object cast + return self.replace( + to_replace[0], + value, + inplace=inplace, + filter=filter, + regex=regex, + convert=convert, + ) + # GH 22083, TypeError or ValueError occurred within error handling # causes infinite loop. Cast and retry only if not objectblock. if is_object_dtype(self): @@ -751,7 +771,7 @@ def replace( # try again with a compatible block block = self.astype(object) return block.replace( - to_replace=original_to_replace, + to_replace=to_replace, value=value, inplace=inplace, filter=filter, From f68432f25558daf80e8351f8d43228128334e6bf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 22 Aug 2019 11:59:44 -0700 Subject: [PATCH 07/11] revert changes from another branch not intended here --- doc/source/reference/index.rst | 2 + pandas/core/indexing.py | 177 +++-- pandas/tests/frame/test_indexing.py | 626 +++++++++++++++++- pandas/tests/indexing/common.py | 11 +- .../tests/indexing/multiindex/test_slice.py | 5 + .../indexing/test_chaining_and_caching.py | 6 + pandas/tests/indexing/test_floats.py | 3 +- pandas/tests/indexing/test_iloc.py | 67 +- pandas/tests/indexing/test_loc.py | 6 + pandas/tests/indexing/test_partial.py | 71 +- pandas/tests/test_multilevel.py | 11 + 11 files changed, 934 insertions(+), 51 deletions(-) diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst index 9d5649c37e92f..12ca318c815d3 100644 --- a/doc/source/reference/index.rst +++ b/doc/source/reference/index.rst @@ -49,6 +49,7 @@ public functions related to data types in pandas. api/pandas.DataFrame.blocks api/pandas.DataFrame.as_matrix + api/pandas.DataFrame.ix api/pandas.Index.asi8 api/pandas.Index.data api/pandas.Index.flags @@ -59,6 +60,7 @@ public functions related to data types in pandas. api/pandas.Series.asobject api/pandas.Series.blocks api/pandas.Series.from_array + api/pandas.Series.ix api/pandas.Series.imag api/pandas.Series.real diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 75b8652946c4b..7bb5e2fa3018d 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -10,8 +10,10 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( + ensure_platform_int, is_float, is_integer, + is_integer_dtype, is_iterator, is_list_like, is_numeric_dtype, @@ -32,6 +34,7 @@ def get_indexers_list(): return [ + ("ix", _IXIndexer), ("iloc", _iLocIndexer), ("loc", _LocIndexer), ("at", _AtIndexer), @@ -110,7 +113,9 @@ def __call__(self, axis=None): new_self.axis = axis return new_self - # TODO: remove once geopandas no longer needs this + def __iter__(self): + raise NotImplementedError("ix is not iterable") + def __getitem__(self, key): # Used in ix and downstream in geopandas _CoordinateIndexer if type(key) is tuple: @@ -143,42 +148,6 @@ def __getitem__(self, key): key = com.apply_if_callable(key, self.obj) return self._getitem_axis(key, axis=axis) - # TODO: remove once geopandas no longer needs __getitem__ - def _getitem_axis(self, key, axis: int): - if is_iterator(key): - key = list(key) - self._validate_key(key, axis) - - labels = self.obj._get_axis(axis) - if isinstance(key, slice): - return self._get_slice_axis(key, axis=axis) - elif is_list_like_indexer(key) and not ( - isinstance(key, tuple) and isinstance(labels, MultiIndex) - ): - - if hasattr(key, "ndim") and key.ndim > 1: - raise ValueError("Cannot index with multidimensional key") - - return self._getitem_iterable(key, axis=axis) - else: - - # maybe coerce a float scalar to integer - key = labels._maybe_cast_indexer(key) - - if is_integer(key): - if axis == 0 and isinstance(labels, MultiIndex): - try: - return self._get_label(key, axis=axis) - except (KeyError, TypeError): - if self.obj.index.levels[0].is_integer(): - raise - - # this is the fallback! (for a non-float, non-integer index) - if not labels.is_floating() and not labels.is_integer(): - return self._get_loc(key, axis=axis) - - return self._get_label(key, axis=axis) - def _get_label(self, label, axis: int): if self.ndim == 1: # for perf reasons we want to try _xs first @@ -943,6 +912,9 @@ def _getitem_lowerdim(self, tup: Tuple): if len(tup) > self.ndim: raise IndexingError("Too many indexers. handle elsewhere") + # to avoid wasted computation + # df.ix[d1:d2, 0] -> columns first (True) + # df.ix[0, ['C', 'B', A']] -> rows first (False) for i, key in enumerate(tup): if is_label_like(key) or isinstance(key, tuple): section = self._getitem_axis(key, axis=i) @@ -1023,6 +995,41 @@ def _getitem_nested_tuple(self, tup: Tuple): return obj + def _getitem_axis(self, key, axis: int): + if is_iterator(key): + key = list(key) + self._validate_key(key, axis) + + labels = self.obj._get_axis(axis) + if isinstance(key, slice): + return self._get_slice_axis(key, axis=axis) + elif is_list_like_indexer(key) and not ( + isinstance(key, tuple) and isinstance(labels, MultiIndex) + ): + + if hasattr(key, "ndim") and key.ndim > 1: + raise ValueError("Cannot index with multidimensional key") + + return self._getitem_iterable(key, axis=axis) + else: + + # maybe coerce a float scalar to integer + key = labels._maybe_cast_indexer(key) + + if is_integer(key): + if axis == 0 and isinstance(labels, MultiIndex): + try: + return self._get_label(key, axis=axis) + except (KeyError, TypeError): + if self.obj.index.levels[0].is_integer(): + raise + + # this is the fallback! (for a non-float, non-integer index) + if not labels.is_floating() and not labels.is_integer(): + return self._get_loc(key, axis=axis) + + return self._get_label(key, axis=axis) + def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False): """ Transform a list-like of keys into a new index and an indexer. @@ -1279,6 +1286,102 @@ def _get_slice_axis(self, slice_obj: slice, axis: int): return self._slice(indexer, axis=axis, kind="iloc") +class _IXIndexer(_NDFrameIndexer): + """ + A primarily label-location based indexer, with integer position + fallback. + + Warning: Starting in 0.20.0, the .ix indexer is deprecated, in + favor of the more strict .iloc and .loc indexers. + + ``.ix[]`` supports mixed integer and label based access. It is + primarily label based, but will fall back to integer positional + access unless the corresponding axis is of integer type. + + ``.ix`` is the most general indexer and will support any of the + inputs in ``.loc`` and ``.iloc``. ``.ix`` also supports floating + point label schemes. ``.ix`` is exceptionally useful when dealing + with mixed positional and label based hierarchical indexes. + + However, when an axis is integer based, ONLY label based access + and not positional access is supported. Thus, in such cases, it's + usually better to be explicit and use ``.iloc`` or ``.loc``. + + See more at :ref:`Advanced Indexing `. + """ + + _ix_deprecation_warning = textwrap.dedent( + """ + .ix is deprecated. Please use + .loc for label based indexing or + .iloc for positional indexing + + See the documentation here: + http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated""" # noqa: E501 + ) + + def __init__(self, name, obj): + warnings.warn(self._ix_deprecation_warning, FutureWarning, stacklevel=2) + super().__init__(name, obj) + + @Appender(_NDFrameIndexer._validate_key.__doc__) + def _validate_key(self, key, axis: int): + if isinstance(key, slice): + return True + + elif com.is_bool_indexer(key): + return True + + elif is_list_like_indexer(key): + return True + + else: + + self._convert_scalar_indexer(key, axis) + + return True + + def _convert_for_reindex(self, key, axis: int): + """ + Transform a list of keys into a new array ready to be used as axis of + the object we return (e.g. including NaNs). + + Parameters + ---------- + key : list-like + Target labels + axis: int + Where the indexing is being made + + Returns + ------- + list-like of labels + """ + labels = self.obj._get_axis(axis) + + if com.is_bool_indexer(key): + key = check_bool_indexer(labels, key) + return labels[key] + + if isinstance(key, Index): + keyarr = labels._convert_index_indexer(key) + else: + # asarray can be unsafe, NumPy strings are weird + keyarr = com.asarray_tuplesafe(key) + + if is_integer_dtype(keyarr): + # Cast the indexer to uint64 if possible so + # that the values returned from indexing are + # also uint64. + keyarr = labels._convert_arr_indexer(keyarr) + + if not labels.is_integer(): + keyarr = ensure_platform_int(keyarr) + return labels.take(keyarr) + + return keyarr + + class _LocationIndexer(_NDFrameIndexer): _exception = Exception diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index a51a0ed0f2047..a78b2ab7d1c4c 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1,5 +1,6 @@ from datetime import date, datetime, time, timedelta import re +from warnings import catch_warnings, simplefilter import numpy as np import pytest @@ -401,6 +402,12 @@ def test_getitem_ix_mixed_integer(self): expected = df.loc[df.index[:-1]] assert_frame_equal(result, expected) + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = df.ix[[1, 10]] + expected = df.ix[Index([1, 10], dtype=object)] + assert_frame_equal(result, expected) + # 11320 df = pd.DataFrame( { @@ -418,6 +425,53 @@ def test_getitem_ix_mixed_integer(self): expected = df.iloc[:, [1]] assert_frame_equal(result, expected) + def test_getitem_setitem_ix_negative_integers(self, float_frame): + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = float_frame.ix[:, -1] + assert_series_equal(result, float_frame["D"]) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = float_frame.ix[:, [-1]] + assert_frame_equal(result, float_frame[["D"]]) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = float_frame.ix[:, [-1, -2]] + assert_frame_equal(result, float_frame[["D", "C"]]) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + float_frame.ix[:, [-1]] = 0 + assert (float_frame["D"] == 0).all() + + df = DataFrame(np.random.randn(8, 4)) + # ix does label-based indexing when having an integer index + msg = "\"None of [Int64Index([-1], dtype='int64')] are in the [index]\"" + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + with pytest.raises(KeyError, match=re.escape(msg)): + df.ix[[-1]] + + msg = "\"None of [Int64Index([-1], dtype='int64')] are in the [columns]\"" + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + with pytest.raises(KeyError, match=re.escape(msg)): + df.ix[:, [-1]] + + # #1942 + a = DataFrame(np.random.randn(20, 2), index=[chr(x + 65) for x in range(20)]) + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + a.ix[-1] = a.ix[-2] + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + assert_series_equal(a.ix[-1], a.ix[-2], check_names=False) + assert a.ix[-1].name == "T" + assert a.ix[-2].name == "S" + def test_getattr(self, float_frame): assert_series_equal(float_frame.A, float_frame["A"]) msg = "'DataFrame' object has no attribute 'NONEXISTENT_NAME'" @@ -798,6 +852,55 @@ def test_delitem_corner(self, float_frame): del f["B"] assert len(f.columns) == 2 + def test_getitem_fancy_2d(self, float_frame): + f = float_frame + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + assert_frame_equal(f.ix[:, ["B", "A"]], f.reindex(columns=["B", "A"])) + + subidx = float_frame.index[[5, 4, 1]] + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + assert_frame_equal( + f.ix[subidx, ["B", "A"]], f.reindex(index=subidx, columns=["B", "A"]) + ) + + # slicing rows, etc. + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + assert_frame_equal(f.ix[5:10], f[5:10]) + assert_frame_equal(f.ix[5:10, :], f[5:10]) + assert_frame_equal( + f.ix[:5, ["A", "B"]], f.reindex(index=f.index[:5], columns=["A", "B"]) + ) + + # slice rows with labels, inclusive! + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + expected = f.ix[5:11] + result = f.ix[f.index[5] : f.index[10]] + assert_frame_equal(expected, result) + + # slice columns + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + assert_frame_equal(f.ix[:, :2], f.reindex(columns=["A", "B"])) + + # get view + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + exp = f.copy() + f.ix[5:10].values[:] = 5 + exp.values[5:10] = 5 + assert_frame_equal(f, exp) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + msg = "Cannot index with multidimensional key" + with pytest.raises(ValueError, match=msg): + f.ix[f > 0.5] + def test_slice_floats(self): index = [52195.504153, 52196.303147, 52198.369883] df = DataFrame(np.random.rand(3, 2), index=index) @@ -846,7 +949,111 @@ def test_getitem_setitem_integer_slice_keyerrors(self): with pytest.raises(KeyError, match=r"^3$"): df2.loc[3:11] = 0 - def test_setitem_fancy_2d(self): + def test_setitem_fancy_2d(self, float_frame): + + # case 1 + frame = float_frame.copy() + expected = frame.copy() + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame.ix[:, ["B", "A"]] = 1 + expected["B"] = 1.0 + expected["A"] = 1.0 + assert_frame_equal(frame, expected) + + # case 2 + frame = float_frame.copy() + frame2 = float_frame.copy() + + expected = frame.copy() + + subidx = float_frame.index[[5, 4, 1]] + values = np.random.randn(3, 2) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame.ix[subidx, ["B", "A"]] = values + frame2.ix[[5, 4, 1], ["B", "A"]] = values + + expected["B"].ix[subidx] = values[:, 0] + expected["A"].ix[subidx] = values[:, 1] + + assert_frame_equal(frame, expected) + assert_frame_equal(frame2, expected) + + # case 3: slicing rows, etc. + frame = float_frame.copy() + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + expected1 = float_frame.copy() + frame.ix[5:10] = 1.0 + expected1.values[5:10] = 1.0 + assert_frame_equal(frame, expected1) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + expected2 = float_frame.copy() + arr = np.random.randn(5, len(frame.columns)) + frame.ix[5:10] = arr + expected2.values[5:10] = arr + assert_frame_equal(frame, expected2) + + # case 4 + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame = float_frame.copy() + frame.ix[5:10, :] = 1.0 + assert_frame_equal(frame, expected1) + frame.ix[5:10, :] = arr + assert_frame_equal(frame, expected2) + + # case 5 + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame = float_frame.copy() + frame2 = float_frame.copy() + + expected = float_frame.copy() + values = np.random.randn(5, 2) + + frame.ix[:5, ["A", "B"]] = values + expected["A"][:5] = values[:, 0] + expected["B"][:5] = values[:, 1] + assert_frame_equal(frame, expected) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame2.ix[:5, [0, 1]] = values + assert_frame_equal(frame2, expected) + + # case 6: slice rows with labels, inclusive! + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame = float_frame.copy() + expected = float_frame.copy() + + frame.ix[frame.index[5] : frame.index[10]] = 5.0 + expected.values[5:11] = 5 + assert_frame_equal(frame, expected) + + # case 7: slice columns + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame = float_frame.copy() + frame2 = float_frame.copy() + expected = float_frame.copy() + + # slice indices + frame.ix[:, 1:3] = 4.0 + expected.values[:, 1:3] = 4.0 + assert_frame_equal(frame, expected) + + # slice with labels + frame.ix[:, "B":"C"] = 4.0 + assert_frame_equal(frame, expected) + # new corner case of boolean slicing / setting frame = DataFrame(zip([2, 3, 9, 6, 7], [np.nan] * 5), columns=["a", "b"]) lst = [100] @@ -868,6 +1075,194 @@ def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): assert (float_frame["C"] == 4).all() + def test_fancy_setitem_int_labels(self): + # integer index defers to label-based indexing + + df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2)) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + tmp = df.copy() + exp = df.copy() + tmp.ix[[0, 2, 4]] = 5 + exp.values[:3] = 5 + assert_frame_equal(tmp, exp) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + tmp = df.copy() + exp = df.copy() + tmp.ix[6] = 5 + exp.values[3] = 5 + assert_frame_equal(tmp, exp) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + tmp = df.copy() + exp = df.copy() + tmp.ix[:, 2] = 5 + + # tmp correctly sets the dtype + # so match the exp way + exp[2] = 5 + assert_frame_equal(tmp, exp) + + def test_fancy_getitem_int_labels(self): + df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2)) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = df.ix[[4, 2, 0], [2, 0]] + expected = df.reindex(index=[4, 2, 0], columns=[2, 0]) + assert_frame_equal(result, expected) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = df.ix[[4, 2, 0]] + expected = df.reindex(index=[4, 2, 0]) + assert_frame_equal(result, expected) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = df.ix[4] + expected = df.xs(4) + assert_series_equal(result, expected) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = df.ix[:, 3] + expected = df[3] + assert_series_equal(result, expected) + + def test_fancy_index_int_labels_exceptions(self, float_frame): + df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2)) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + + # labels that aren't contained + with pytest.raises(KeyError, match=r"\[1\] not in index"): + df.ix[[0, 1, 2], [2, 3, 4]] = 5 + + # try to set indices not contained in frame + msg = ( + r"None of \[Index\(\['foo', 'bar', 'baz'\]," + r" dtype='object'\)\] are in the \[index\]" + ) + with pytest.raises(KeyError, match=msg): + float_frame.ix[["foo", "bar", "baz"]] = 1 + msg = ( + r"None of \[Index\(\['E'\], dtype='object'\)\] are in the" + r" \[columns\]" + ) + with pytest.raises(KeyError, match=msg): + float_frame.ix[:, ["E"]] = 1 + + # FIXME: don't leave commented-out + # partial setting now allows this GH2578 + # pytest.raises(KeyError, float_frame.ix.__setitem__, + # (slice(None, None), 'E'), 1) + + def test_setitem_fancy_mixed_2d(self, float_string_frame): + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + self.mixed_frame.ix[:5, ["C", "B", "A"]] = 5 + result = self.mixed_frame.ix[:5, ["C", "B", "A"]] + assert (result.values == 5).all() + + float_string_frame.ix[5] = np.nan + assert isna(float_string_frame.ix[5]).all() + + float_string_frame.ix[5] = float_string_frame.ix[6] + assert_series_equal( + float_string_frame.ix[5], float_string_frame.ix[6], check_names=False + ) + + # #1432 + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + df = DataFrame({1: [1.0, 2.0, 3.0], 2: [3, 4, 5]}) + assert df._is_mixed_type + + df.ix[1] = [5, 10] + + expected = DataFrame({1: [1.0, 5.0, 3.0], 2: [3, 10, 5]}) + + assert_frame_equal(df, expected) + + def test_ix_align(self): + b = Series(np.random.randn(10), name=0).sort_values() + df_orig = DataFrame(np.random.randn(10, 4)) + df = df_orig.copy() + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + df.ix[:, 0] = b + assert_series_equal(df.ix[:, 0].reindex(b.index), b) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + dft = df_orig.T + dft.ix[0, :] = b + assert_series_equal(dft.ix[0, :].reindex(b.index), b) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + df = df_orig.copy() + df.ix[:5, 0] = b + s = df.ix[:5, 0] + assert_series_equal(s, b.reindex(s.index)) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + dft = df_orig.T + dft.ix[0, :5] = b + s = dft.ix[0, :5] + assert_series_equal(s, b.reindex(s.index)) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + df = df_orig.copy() + idx = [0, 1, 3, 5] + df.ix[idx, 0] = b + s = df.ix[idx, 0] + assert_series_equal(s, b.reindex(s.index)) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + dft = df_orig.T + dft.ix[0, idx] = b + s = dft.ix[0, idx] + assert_series_equal(s, b.reindex(s.index)) + + def test_ix_frame_align(self): + b = DataFrame(np.random.randn(3, 4)) + df_orig = DataFrame(np.random.randn(10, 4)) + df = df_orig.copy() + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + df.ix[:3] = b + out = b.ix[:3] + assert_frame_equal(out, b) + + b.sort_index(inplace=True) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + df = df_orig.copy() + df.ix[[0, 1, 2]] = b + out = df.ix[[0, 1, 2]].reindex(b.index) + assert_frame_equal(out, b) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + df = df_orig.copy() + df.ix[:3] = b + out = df.ix[:3] + assert_frame_equal(out, b.reindex(out.index)) + def test_getitem_setitem_non_ix_labels(self): df = tm.makeTimeDataFrame() @@ -894,7 +1289,6 @@ def test_ix_multi_take(self): xp = df.reindex([0]) assert_frame_equal(rs, xp) - # FIXME: dont leave commented-out """ #1321 df = DataFrame(np.random.randn(3, 2)) rs = df.loc[df.index==0, df.columns==1] @@ -902,6 +1296,168 @@ def test_ix_multi_take(self): assert_frame_equal(rs, xp) """ + def test_ix_multi_take_nonint_index(self): + df = DataFrame(np.random.randn(3, 2), index=["x", "y", "z"], columns=["a", "b"]) + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + rs = df.ix[[0], [0]] + xp = df.reindex(["x"], columns=["a"]) + assert_frame_equal(rs, xp) + + def test_ix_multi_take_multiindex(self): + df = DataFrame( + np.random.randn(3, 2), + index=["x", "y", "z"], + columns=[["a", "b"], ["1", "2"]], + ) + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + rs = df.ix[[0], [0]] + xp = df.reindex(["x"], columns=[("a", "1")]) + assert_frame_equal(rs, xp) + + def test_ix_dup(self): + idx = Index(["a", "a", "b", "c", "d", "d"]) + df = DataFrame(np.random.randn(len(idx), 3), idx) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + sub = df.ix[:"d"] + assert_frame_equal(sub, df) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + sub = df.ix["a":"c"] + assert_frame_equal(sub, df.ix[0:4]) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + sub = df.ix["b":"d"] + assert_frame_equal(sub, df.ix[2:]) + + def test_getitem_fancy_1d(self, float_frame, float_string_frame): + f = float_frame + + # return self if no slicing...for now + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + assert f.ix[:, :] is f + + # low dimensional slice + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + xs1 = f.ix[2, ["C", "B", "A"]] + xs2 = f.xs(f.index[2]).reindex(["C", "B", "A"]) + tm.assert_series_equal(xs1, xs2) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + ts1 = f.ix[5:10, 2] + ts2 = f[f.columns[2]][5:10] + tm.assert_series_equal(ts1, ts2) + + # positional xs + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + xs1 = f.ix[0] + xs2 = f.xs(f.index[0]) + tm.assert_series_equal(xs1, xs2) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + xs1 = f.ix[f.index[5]] + xs2 = f.xs(f.index[5]) + tm.assert_series_equal(xs1, xs2) + + # single column + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + assert_series_equal(f.ix[:, "A"], f["A"]) + + # return view + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + exp = f.copy() + exp.values[5] = 4 + f.ix[5][:] = 4 + tm.assert_frame_equal(exp, f) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + exp.values[:, 1] = 6 + f.ix[:, 1][:] = 6 + tm.assert_frame_equal(exp, f) + + # slice of mixed-frame + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + xs = float_string_frame.ix[5] + exp = float_string_frame.xs(float_string_frame.index[5]) + tm.assert_series_equal(xs, exp) + + def test_setitem_fancy_1d(self, float_frame): + + # case 1: set cross-section for indices + frame = float_frame.copy() + expected = float_frame.copy() + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame.ix[2, ["C", "B", "A"]] = [1.0, 2.0, 3.0] + expected["C"][2] = 1.0 + expected["B"][2] = 2.0 + expected["A"][2] = 3.0 + assert_frame_equal(frame, expected) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame2 = float_frame.copy() + frame2.ix[2, [3, 2, 1]] = [1.0, 2.0, 3.0] + assert_frame_equal(frame, expected) + + # case 2, set a section of a column + frame = float_frame.copy() + expected = float_frame.copy() + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + vals = np.random.randn(5) + expected.values[5:10, 2] = vals + frame.ix[5:10, 2] = vals + assert_frame_equal(frame, expected) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame2 = float_frame.copy() + frame2.ix[5:10, "B"] = vals + assert_frame_equal(frame, expected) + + # case 3: full xs + frame = float_frame.copy() + expected = float_frame.copy() + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame.ix[4] = 5.0 + expected.values[4] = 5.0 + assert_frame_equal(frame, expected) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame.ix[frame.index[4]] = 6.0 + expected.values[4] = 6.0 + assert_frame_equal(frame, expected) + + # single column + frame = float_frame.copy() + expected = float_frame.copy() + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + frame.ix[:, "A"] = 7.0 + expected["A"] = 7.0 + assert_frame_equal(frame, expected) + def test_getitem_fancy_scalar(self, float_frame): f = float_frame ix = f.loc @@ -1421,11 +1977,15 @@ def test_get_set_value_no_partial_indexing(self): with pytest.raises(KeyError, match=r"^0$"): df._get_value(0, 1) - # TODO: rename? remove? def test_single_element_ix_dont_upcast(self, float_frame): float_frame["E"] = 1 assert issubclass(float_frame["E"].dtype.type, (int, np.integer)) + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = float_frame.ix[float_frame.index[5], "E"] + assert is_integer(result) + result = float_frame.loc[float_frame.index[5], "E"] assert is_integer(result) @@ -1433,10 +1993,18 @@ def test_single_element_ix_dont_upcast(self, float_frame): df = pd.DataFrame(dict(a=[1.23])) df["b"] = 666 + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = df.ix[0, "b"] + assert is_integer(result) result = df.loc[0, "b"] assert is_integer(result) expected = Series([666], [0], name="b") + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = df.ix[[0], "b"] + assert_series_equal(result, expected) result = df.loc[[0], "b"] assert_series_equal(result, expected) @@ -1504,12 +2072,45 @@ def test_iloc_duplicates(self): df = DataFrame(np.random.rand(3, 3), columns=list("ABC"), index=list("aab")) result = df.iloc[0] + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result2 = df.ix[0] assert isinstance(result, Series) assert_almost_equal(result.values, df.values[0]) + assert_series_equal(result, result2) - result = df.T.iloc[:, 0] + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = df.T.iloc[:, 0] + result2 = df.T.ix[:, 0] assert isinstance(result, Series) assert_almost_equal(result.values, df.values[0]) + assert_series_equal(result, result2) + + # multiindex + df = DataFrame( + np.random.randn(3, 3), + columns=[["i", "i", "j"], ["A", "A", "B"]], + index=[["i", "i", "j"], ["X", "X", "Y"]], + ) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + rs = df.iloc[0] + xp = df.ix[0] + assert_series_equal(rs, xp) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + rs = df.iloc[:, 0] + xp = df.T.ix[0] + assert_series_equal(rs, xp) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + rs = df.iloc[:, [0]] + xp = df.ix[:, [0]] + assert_frame_equal(rs, xp) # #2259 df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2]) @@ -1762,6 +2363,9 @@ def test_getitem_ix_float_duplicates(self): ) expect = df.iloc[1:] assert_frame_equal(df.loc[0.2], expect) + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[1:, 0] assert_series_equal(df.loc[0.2, "a"], expect) @@ -1769,6 +2373,9 @@ def test_getitem_ix_float_duplicates(self): df.index = [1, 0.2, 0.2] expect = df.iloc[1:] assert_frame_equal(df.loc[0.2], expect) + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[1:, 0] assert_series_equal(df.loc[0.2, "a"], expect) @@ -1778,6 +2385,9 @@ def test_getitem_ix_float_duplicates(self): ) expect = df.iloc[1:-1] assert_frame_equal(df.loc[0.2], expect) + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[1:-1, 0] assert_series_equal(df.loc[0.2, "a"], expect) @@ -1785,6 +2395,9 @@ def test_getitem_ix_float_duplicates(self): df.index = [0.1, 0.2, 2, 0.2] expect = df.iloc[[1, -1]] assert_frame_equal(df.loc[0.2], expect) + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[[1, -1], 0] assert_series_equal(df.loc[0.2, "a"], expect) @@ -2013,6 +2626,11 @@ def test_index_namedtuple(self): index = Index([idx1, idx2], name="composite_index", tupleize_cols=False) df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"]) + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + result = df.ix[IndexType("foo", "bar")]["A"] + assert result == 1 + result = df.loc[IndexType("foo", "bar")]["A"] assert result == 1 diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index b9b459a10eeae..9ceeb06b6fd86 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -1,7 +1,7 @@ """ common utilities """ import itertools -from warnings import catch_warnings +from warnings import catch_warnings, filterwarnings import numpy as np @@ -154,12 +154,9 @@ def get_value(self, f, i, values=False): # for a in reversed(i): # v = v.__getitem__(a) # return v - - # TODO: this used to be f.ix[i]; is loc-then-iloc correct here? - try: - return f.loc[i] - except KeyError: - return f.iloc[i] + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + return f.ix[i] def check_values(self, f, func, values=False): diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index d0f58cbbbcd9f..692a86aa1a338 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -1,3 +1,5 @@ +from warnings import catch_warnings + import numpy as np import pytest @@ -10,6 +12,7 @@ from pandas.util import testing as tm +@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning") class TestMultiIndexSlicers: def test_per_axis_per_level_getitem(self): @@ -634,6 +637,8 @@ def test_multiindex_label_slicing_with_negative_step(self): def assert_slices_equivalent(l_slc, i_slc): tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) + with catch_warnings(record=True): + tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) assert_slices_equivalent(SLC[::-1], SLC[::-1]) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index dd9764beb49f3..702bf0b15dec9 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -365,6 +365,12 @@ def check(result, expected): def test_cache_updating(self): # GH 4939, make sure to update the cache on setitem + df = tm.makeDataFrame() + df["A"] # cache series + df.ix["Hello Friend"] = df.ix[0] + assert "Hello Friend" in df["A"].index + assert "Hello Friend" in df["B"].index + # 10264 df = DataFrame( np.zeros((5, 5), dtype="int64"), diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 3a44f2479ef6f..56a78081bc624 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -134,7 +134,8 @@ def test_scalar_non_numeric(self): # these should prob work # and are inconsisten between series/dataframe ATM - # for idxr in [lambda x: x]: + # for idxr in [lambda x: x.ix, + # lambda x: x]: # s2 = s.copy() # # with pytest.raises(TypeError): diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index aa9ace1556b00..85eab91af3c48 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1,6 +1,6 @@ """ test positional based indexing with iloc """ -from warnings import catch_warnings, simplefilter +from warnings import catch_warnings, filterwarnings, simplefilter import numpy as np import pytest @@ -462,6 +462,71 @@ def test_iloc_setitem_dups(self): df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) tm.assert_frame_equal(df, expected) + def test_iloc_getitem_frame(self): + df = DataFrame( + np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2) + ) + + result = df.iloc[2] + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + exp = df.ix[4] + tm.assert_series_equal(result, exp) + + result = df.iloc[2, 2] + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + exp = df.ix[4, 4] + assert result == exp + + # slice + result = df.iloc[4:8] + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + expected = df.ix[8:14] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 2:3] + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + expected = df.ix[:, 4:5] + tm.assert_frame_equal(result, expected) + + # list of integers + result = df.iloc[[0, 1, 3]] + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + expected = df.ix[[0, 2, 6]] + tm.assert_frame_equal(result, expected) + + result = df.iloc[[0, 1, 3], [0, 1]] + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + expected = df.ix[[0, 2, 6], [0, 2]] + tm.assert_frame_equal(result, expected) + + # neg indices + result = df.iloc[[-1, 1, 3], [-1, 1]] + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + expected = df.ix[[18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # dups indices + result = df.iloc[[-1, -1, 1, 3], [-1, 1]] + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + expected = df.ix[[18, 18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # with index-like + s = Series(index=range(1, 5)) + result = df.iloc[s.index] + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + expected = df.ix[[2, 4, 6, 8]] + tm.assert_frame_equal(result, expected) + def test_iloc_getitem_labelled_frame(self): # try with labelled frame df = DataFrame( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 2e9b149663d70..abe0cd86c90d7 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,6 +1,7 @@ """ test label based indexing with loc """ from io import StringIO import re +from warnings import catch_warnings, filterwarnings import numpy as np import pytest @@ -955,6 +956,11 @@ def test_loc_name(self): result = df.iloc[[0, 1]].index.name assert result == "index_name" + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + result = df.ix[[0, 1]].index.name + assert result == "index_name" + result = df.loc[[0, 1]].index.name assert result == "index_name" diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 1fd96b01b0338..c4505231932c6 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -1,9 +1,11 @@ """ test setting *parts* of objects both positionally and label based -TODO: these should be split among the indexer tests +TOD: these should be split among the indexer tests """ +from warnings import catch_warnings + import numpy as np import pytest @@ -13,6 +15,7 @@ class TestPartialSetting: + @pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning") def test_partial_setting(self): # GH2578, allow ix and friends to partially set @@ -81,6 +84,37 @@ def test_partial_setting(self): df.loc[3] = df.loc[2] tm.assert_frame_equal(df, expected) + # single dtype frame, overwrite + expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]})) + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, "B"] = df.ix[:, "A"] + tm.assert_frame_equal(df, expected) + + # mixed dtype frame, overwrite + expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])})) + df = df_orig.copy() + df["B"] = df["B"].astype(np.float64) + with catch_warnings(record=True): + df.ix[:, "B"] = df.ix[:, "A"] + tm.assert_frame_equal(df, expected) + + # single dtype frame, partial setting + expected = df_orig.copy() + expected["C"] = df["A"] + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, "C"] = df.ix[:, "A"] + tm.assert_frame_equal(df, expected) + + # mixed frame, partial setting + expected = df_orig.copy() + expected["C"] = df["A"] + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, "C"] = df.ix[:, "A"] + tm.assert_frame_equal(df, expected) + # GH 8473 dates = date_range("1/1/2000", periods=8) df_orig = DataFrame( @@ -330,6 +364,41 @@ def test_series_partial_set_with_name(self): result = ser.iloc[[1, 1, 0, 0]] tm.assert_series_equal(result, expected, check_index_type=True) + @pytest.mark.filterwarnings("ignore:\\n.ix") + def test_partial_set_invalid(self): + + # GH 4940 + # allow only setting of 'valid' values + + orig = tm.makeTimeDataFrame() + df = orig.copy() + + # don't allow not string inserts + with pytest.raises(TypeError): + with catch_warnings(record=True): + df.loc[100.0, :] = df.ix[0] + + with pytest.raises(TypeError): + with catch_warnings(record=True): + df.loc[100, :] = df.ix[0] + + with pytest.raises(TypeError): + with catch_warnings(record=True): + df.ix[100.0, :] = df.ix[0] + + with pytest.raises(ValueError): + with catch_warnings(record=True): + df.ix[100, :] = df.ix[0] + + # allow object conversion here + df = orig.copy() + with catch_warnings(record=True): + df.loc["a", :] = df.ix[0] + exp = orig.append(Series(df.ix[0], name="a")) + tm.assert_frame_equal(df, exp) + tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) + assert df.index.dtype == "object" + def test_partial_set_empty_series(self): # GH5226 diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index bce384c4b04de..dc4db6e7902a8 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2,6 +2,7 @@ from io import StringIO import itertools from itertools import product +from warnings import catch_warnings, simplefilter import numpy as np from numpy.random import randn @@ -208,6 +209,11 @@ def test_reindex(self): reindexed = self.frame.loc[[("foo", "one"), ("bar", "one")]] tm.assert_frame_equal(reindexed, expected) + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + reindexed = self.frame.ix[[("foo", "one"), ("bar", "one")]] + tm.assert_frame_equal(reindexed, expected) + def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] chunk = self.ymd.reindex(new_index) @@ -216,6 +222,11 @@ def test_reindex_preserve_levels(self): chunk = self.ymd.loc[new_index] assert chunk.index is new_index + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + chunk = self.ymd.ix[new_index] + assert chunk.index is new_index + ymdT = self.ymd.T chunk = ymdT.reindex(columns=new_index) assert chunk.columns is new_index From d74e5a84d6d30c02c114d8f2e58d4e53153348f8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 23 Aug 2019 11:20:44 -0700 Subject: [PATCH 08/11] Add asv --- asv_bench/benchmarks/replace.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 6137e944e6b9e..61cd8803690c7 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -36,6 +36,23 @@ def time_replace_series(self, inplace): self.s.replace(self.to_rep, inplace=inplace) +class ReplaceList: + # GH#28099 + + params = [True, False] + param_names = ["inplace"] + + def setup_cache(self): + self.df = pd.DataFrame({"A": 0, "B": 0}, index=range(4 * 10 ** 7)) + + def time_replace_list(self, inplace): + self.df.replace([np.inf, -np.inf], np.nan, inplace=inplace) + + def time_replace_list_one_match(self, inplace): + # the 1 can be held in self._df.blocks[0], while the inf and -inf cant + self.df.replace([np.inf, -np.inf, 1], np.nan, inplace=inplace) + + class Convert: params = (["DataFrame", "Series"], ["Timestamp", "Timedelta"]) From 21d3d7ffd91e14edd177da2515ce17a63d27bfc9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 23 Aug 2019 12:59:29 -0700 Subject: [PATCH 09/11] troubleshoot setup_cache --- asv_bench/benchmarks/replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 61cd8803690c7..b05eb269bca56 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -42,7 +42,7 @@ class ReplaceList: params = [True, False] param_names = ["inplace"] - def setup_cache(self): + def setup_cache(self, inplace): self.df = pd.DataFrame({"A": 0, "B": 0}, index=range(4 * 10 ** 7)) def time_replace_list(self, inplace): From 4d1931913803259846ba95f52ffe9fec64042cff Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 23 Aug 2019 14:15:09 -0700 Subject: [PATCH 10/11] Fix param usage --- asv_bench/benchmarks/replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index b05eb269bca56..8d664e235578a 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -39,7 +39,7 @@ def time_replace_series(self, inplace): class ReplaceList: # GH#28099 - params = [True, False] + params = [(True, False)] param_names = ["inplace"] def setup_cache(self, inplace): From 5156b95fdd98883d1f83cc68114e89efb7835016 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 24 Aug 2019 07:36:09 -0700 Subject: [PATCH 11/11] setup_cache->setup --- asv_bench/benchmarks/replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 8d664e235578a..f69ae15028525 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -42,7 +42,7 @@ class ReplaceList: params = [(True, False)] param_names = ["inplace"] - def setup_cache(self, inplace): + def setup(self, inplace): self.df = pd.DataFrame({"A": 0, "B": 0}, index=range(4 * 10 ** 7)) def time_replace_list(self, inplace):