From 4ff8737b32352a1b5c652c65605c467f85d274fe Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 5 Dec 2021 13:10:57 -0800 Subject: [PATCH 1/3] CLN: FIXMEs --- pandas/_config/config.py | 1 - pandas/core/generic.py | 2 +- pandas/core/indexes/datetimes.py | 3 +- pandas/core/internals/blocks.py | 4 +- pandas/core/internals/concat.py | 1 - pandas/io/clipboard/__init__.py | 2 +- .../arrays/floating/test_construction.py | 6 --- pandas/tests/extension/base/getitem.py | 21 ++++++++--- pandas/tests/indexes/multi/test_setops.py | 24 +++++++----- pandas/tests/indexes/period/test_indexing.py | 37 +++++++++---------- .../tests/indexing/multiindex/test_partial.py | 22 ----------- pandas/tests/indexing/test_coercion.py | 2 + pandas/tests/indexing/test_floats.py | 27 ++++---------- pandas/tests/io/excel/test_writers.py | 4 +- pandas/tests/series/methods/test_convert.py | 6 --- pandas/tests/series/test_missing.py | 28 ++++++++------ pandas/tests/series/test_ufunc.py | 6 +-- 17 files changed, 82 insertions(+), 114 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index aef5c3049f295..b22a6840644ec 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -642,7 +642,6 @@ def _warn_if_deprecated(key: str) -> bool: d = _get_deprecated_option(key) if d: if d.msg: - print(d.msg) warnings.warn(d.msg, FutureWarning) else: msg = f"'{key}' is deprecated" diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 476e611eb20d6..d75d60ebcc1ba 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6023,7 +6023,7 @@ def _convert( timedelta: bool_t = False, ) -> NDFrameT: """ - Attempt to infer better dtype for object columns + Attempt to infer better dtype for object columns. Parameters ---------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e283509206344..187fa2f3aa1aa 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -647,7 +647,8 @@ def get_loc(self, key, method=None, tolerance=None): try: key = self._maybe_cast_for_get_loc(key) except ValueError as err: - # FIXME: we get here because parse_with_reso doesn't raise on "t2m" + # FIXME(dateutil#1180): we get here because parse_with_reso + # doesn't raise on "t2m" raise KeyError(key) from err elif isinstance(key, timedelta): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f4b301c25c603..587b9593e58fc 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1797,9 +1797,7 @@ def fillna( value, limit, inplace, downcast ) - values = self.values - values = values if inplace else values.copy() - new_values = values.fillna(value=value, limit=limit) + new_values = self.values.fillna(value=value, limit=limit) return [self.make_block_same_class(values=new_values)] diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 7687e60db8552..f14f3c4a38430 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -528,7 +528,6 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj: return blk.dtype if _is_uniform_reindex(join_units): - # FIXME: integrate property empty_dtype = join_units[0].block.dtype return empty_dtype diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index 1f97b1af81b6e..94cda748e31e8 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -518,7 +518,7 @@ def determine_clipboard(): "cygwin" in platform.system().lower() ): # Cygwin has a variety of values returned by platform.system(), # such as 'CYGWIN_NT-6.1' - # FIXME: pyperclip currently does not support Cygwin, + # FIXME(pyperclip#55): pyperclip currently does not support Cygwin, # see https://github.com/asweigart/pyperclip/issues/55 if os.path.exists("/dev/clipboard"): warnings.warn( diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index 703bdb3dbd5bb..7749e138ccbea 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -56,12 +56,6 @@ def test_floating_array_disallows_float16(request): with pytest.raises(TypeError, match=msg): FloatingArray(arr, mask) - if not np_version_under1p19: - # Troubleshoot - # https://github.com/numpy/numpy/issues/20512#issuecomment-985807740 - lowered = np.core._type_aliases.english_lower("Float16") - assert lowered == "float16", lowered - if np_version_under1p19 or ( locale.getlocale()[0] != "en_US" and not is_platform_windows() ): diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index bf3985ad198dd..71e1e61b50256 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -2,6 +2,7 @@ import pytest import pandas as pd +import pandas._testing as tm from pandas.tests.extension.base.base import BaseExtensionTests @@ -258,12 +259,22 @@ def test_getitem_integer_with_missing_raises(self, data, idx): with pytest.raises(ValueError, match=msg): data[idx] - # FIXME: dont leave commented-out + @pytest.mark.xfail( + reason="Tries label-based and raises KeyError; " + "in some cases raises when calling np.asarray" + ) + @pytest.mark.parametrize( + "idx", + [[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")], + ids=["list", "integer-array"], + ) + def test_getitem_series_integer_with_missing_raises(self, data, idx): + msg = "Cannot index with an integer indexer containing NA values" # TODO: this raises KeyError about labels not found (it tries label-based) - # import pandas._testing as tm - # ser = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))]) - # with pytest.raises(ValueError, match=msg): - # ser[idx] + + ser = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))]) + with pytest.raises(ValueError, match=msg): + ser[idx] def test_getitem_slice(self, data): # getitem[slice] should return an array diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 507449eabfb6e..e95c3df079600 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -260,17 +260,22 @@ def test_union(idx, sort): else: assert result.equals(idx) - # FIXME: don't leave commented-out - # not valid for python 3 - # def test_union_with_regular_index(self): - # other = Index(['A', 'B', 'C']) - # result = other.union(idx) - # assert ('foo', 'one') in result - # assert 'B' in result +@pytest.mark.xfail( + # This test was commented out from Oct 2011 to Dec 2021, may no longer + # be relevant. + reason="Length of names must match number of levels in MultiIndex", + raises=ValueError, +) +def test_union_with_regular_index(idx): + other = Index(["A", "B", "C"]) + + result = other.union(idx) + assert ("foo", "one") in result + assert "B" in result - # result2 = _index.union(other) - # assert result.equals(result2) + result2 = idx.union(other) + assert result.equals(result2) def test_intersection(idx, sort): @@ -355,6 +360,7 @@ def test_union_sort_other_empty(slice_): other = idx[slice_] tm.assert_index_equal(idx.union(other), idx) # MultiIndex does not special case empty.union(idx) + # FIXME: don't leave commented-out # tm.assert_index_equal(other.union(idx), idx) # sort=False diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index df2f114e73df2..e193af062098b 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -193,20 +193,18 @@ def test_getitem_seconds(self): "2013/02/01 9H", "2013/02/01 09:00", ] - for v in values: + for val in values: # GH7116 # these show deprecations as we are trying # to slice with non-integer indexers - # FIXME: don't leave commented-out - # with pytest.raises(IndexError): - # idx[v] - continue + with pytest.raises(IndexError, match="only integers, slices"): + idx[val] - s = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(s["2013/01/01 10:00"], s[3600:3660]) - tm.assert_series_equal(s["2013/01/01 9H"], s[:3600]) + ser = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(ser["2013/01/01 10:00"], ser[3600:3660]) + tm.assert_series_equal(ser["2013/01/01 9H"], ser[:3600]) for d in ["2013/01/01", "2013/01", "2013"]: - tm.assert_series_equal(s[d], s) + tm.assert_series_equal(ser[d], ser) def test_getitem_day(self): # GH#6716 @@ -223,24 +221,23 @@ def test_getitem_day(self): "2013/02/01 9H", "2013/02/01 09:00", ] - for v in values: + for val in values: # GH7116 # these show deprecations as we are trying # to slice with non-integer indexers - # with pytest.raises(IndexError): - # idx[v] - continue + with pytest.raises(IndexError, match="only integers, slices"): + idx[val] - s = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(s["2013/01"], s[0:31]) - tm.assert_series_equal(s["2013/02"], s[31:59]) - tm.assert_series_equal(s["2014"], s[365:]) + ser = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(ser["2013/01"], ser[0:31]) + tm.assert_series_equal(ser["2013/02"], ser[31:59]) + tm.assert_series_equal(ser["2014"], ser[365:]) invalid = ["2013/02/01 9H", "2013/02/01 09:00"] - for v in invalid: - with pytest.raises(KeyError, match=v): - s[v] + for val in invalid: + with pytest.raises(KeyError, match=val): + ser[val] class TestGetLoc: diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index c77a731f5410d..9d5e65e692fdc 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -169,28 +169,6 @@ def test_getitem_intkey_leading_level( with tm.assert_produces_warning(FutureWarning): mi.get_value(ser, 14) - # --------------------------------------------------------------------- - # AMBIGUOUS CASES! - - def test_partial_loc_missing(self, multiindex_year_month_day_dataframe_random_data): - pytest.skip("skipping for now") - - ymd = multiindex_year_month_day_dataframe_random_data - result = ymd.loc[2000, 0] - expected = ymd.loc[2000]["A"] - tm.assert_series_equal(result, expected) - - # need to put in some work here - # FIXME: dont leave commented-out - # self.ymd.loc[2000, 0] = 0 - # assert (self.ymd.loc[2000]['A'] == 0).all() - - # Pretty sure the second (and maybe even the first) is already wrong. - with pytest.raises(KeyError, match="6"): - ymd.loc[(2000, 6)] - with pytest.raises(KeyError, match="(2000, 6)"): - ymd.loc[(2000, 6), 0] - # --------------------------------------------------------------------- def test_setitem_multiple_partial(self, multiindex_dataframe_random_data): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 075007c6b9870..2366dd39c25f2 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -88,6 +88,8 @@ def _assert_setitem_series_conversion( # check dtype explicitly for sure assert temp.dtype == expected_dtype + # AFAICT the problem is in Series.__setitem__ where with integer dtype + # ser[1] = 2.2 casts 2.2 to 2 instead of casting the ser to floating # FIXME: dont leave commented-out # .loc works different rule, temporary disable # temp = original_series.copy() diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 77c04ae34ea5f..902bd943584d9 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -68,27 +68,16 @@ def test_scalar_non_numeric(self, index_func, frame_or_series, indexer_sl): # contains assert 3.0 not in s - # setting with an indexer - if s.index.inferred_type in ["categorical"]: - # Value or Type Error - pass - elif s.index.inferred_type in ["datetime64", "timedelta64", "period"]: - - # FIXME: dont leave commented-out - # these should prob work - # and are inconsistent between series/dataframe ATM - # for idxr in [lambda x: x]: - # s2 = s.copy() - # - # with pytest.raises(TypeError): - # idxr(s2)[3.0] = 0 - pass + s2 = s.copy() + indexer_sl(s2)[3.0] = 10 + if indexer_sl is tm.setitem: + assert 3.0 in s2.axes[-1] + elif indexer_sl is tm.loc: + assert 3.0 in s2.axes[0] else: - - s2 = s.copy() - indexer_sl(s2)[3.0] = 10 - assert s2.index.is_object() + assert 3.0 not in s2.axes[0] + assert 3.0 not in s2.axes[-1] @pytest.mark.parametrize( "index_func", diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 6f1431c6e410d..105da2ce49fca 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1105,8 +1105,8 @@ def test_datetimes(self, path): write_frame = DataFrame({"A": datetimes}) write_frame.to_excel(path, "Sheet1") - if path.endswith("xlsx") or path.endswith("xlsm"): - pytest.skip("Defaults to openpyxl and fails - GH #38644") + # GH#38644 now fixed at one point there was an issue with openpyxl + # and xlsx/xlsm files involving floating point error on datetimes read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0) tm.assert_series_equal(write_frame["A"], read_frame["A"]) diff --git a/pandas/tests/series/methods/test_convert.py b/pandas/tests/series/methods/test_convert.py index 178026c1efc09..346f74d798de9 100644 --- a/pandas/tests/series/methods/test_convert.py +++ b/pandas/tests/series/methods/test_convert.py @@ -113,12 +113,6 @@ def test_convert(self): result = ser._convert(datetime=True) tm.assert_series_equal(result, ser) - # FIXME: dont leave commented-out - # res = ser.copy() - # res[0] = np.nan - # result = res._convert(datetime=True, numeric=False) - # assert result.dtype == 'M8[ns]' - def test_convert_no_arg_error(self): ser = Series(["1.0", "2"]) msg = r"At least one of datetime, numeric or timedelta must be True\." diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 87a0e5cb680c8..8052a81036e51 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1,6 +1,7 @@ from datetime import timedelta import numpy as np +import pytest from pandas._libs import iNaT @@ -73,20 +74,23 @@ def test_timedelta64_nan(self): td1[2] = td[2] assert not isna(td1[2]) - # FIXME: don't leave commented-out # boolean setting - # this doesn't work, not sure numpy even supports it - # result = td[(td>np.timedelta64(timedelta(days=3))) & - # td td3) & (td < td7)] = np.nan + assert isna(td).sum() == 3 + + @pytest.mark.xfail( + reason="Chained inequality raises when trying to define 'selector'" + ) + def test_logical_range_select(self, datetime_series): # NumPy limitation =( - - # def test_logical_range_select(self): - # np.random.seed(12345) - # selector = -0.5 <= datetime_series <= 0.5 - # expected = (datetime_series >= -0.5) & (datetime_series <= 0.5) - # tm.assert_series_equal(selector, expected) + # https://github.com/pandas-dev/pandas/commit/9030dc021f07c76809848925cb34828f6c8484f3 + np.random.seed(12345) + selector = -0.5 <= datetime_series <= 0.5 + expected = (datetime_series >= -0.5) & (datetime_series <= 0.5) + tm.assert_series_equal(selector, expected) def test_valid(self, datetime_series): ts = datetime_series.copy() diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 11a03c364458e..5bc6e7ddf6fa8 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -286,11 +286,7 @@ def test_reduce(values, box, request): mark = pytest.mark.xfail(reason="IntervalArray.min/max not implemented") request.node.add_marker(mark) - if values.dtype == "i8" and box is pd.array: - # FIXME: pd.array casts to Int64 - obj = values - else: - obj = box(values) + obj = box(values) result = np.maximum.reduce(obj) expected = values[1] From c0f65effc40ef5a331e19d6a7f4255af42ff9f73 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 5 Dec 2021 14:19:49 -0800 Subject: [PATCH 2/3] restore skip with updated message --- pandas/tests/io/excel/test_writers.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 105da2ce49fca..8291d0c85b50d 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1105,8 +1105,11 @@ def test_datetimes(self, path): write_frame = DataFrame({"A": datetimes}) write_frame.to_excel(path, "Sheet1") - # GH#38644 now fixed at one point there was an issue with openpyxl - # and xlsx/xlsm files involving floating point error on datetimes + if path.endswith("xlsx") or path.endswith("xlsm"): + pytest.skip( + "Defaults to openpyxl and fails with floating point error on " + "datetimes; may be fixed on newer versions of openpyxl - GH #38644" + ) read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0) tm.assert_series_equal(write_frame["A"], read_frame["A"]) From 764a5b2ae3aeb5d16c6d340ed1e7401a6d55fa11 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 5 Dec 2021 15:27:20 -0800 Subject: [PATCH 3/3] catch warning --- pandas/tests/indexes/multi/test_setops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index e95c3df079600..4e6a0bb67cffe 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -274,7 +274,9 @@ def test_union_with_regular_index(idx): assert ("foo", "one") in result assert "B" in result - result2 = idx.union(other) + msg = "The values in the array are unorderable" + with tm.assert_produces_warning(RuntimeWarning, match=msg): + result2 = idx.union(other) assert result.equals(result2)