From 4d5b0f9c59cedbc96405d579105f06c8db578539 Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 2 Nov 2019 14:52:17 +0100 Subject: [PATCH 01/17] Fixing Issue 29128 --- pandas/core/arrays/integer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 630c3e50f2c09..1899358bbea62 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -652,7 +652,7 @@ def _reduce(self, name, skipna=True, **kwargs): data[mask] = self._na_value op = getattr(nanops, "nan" + name) - result = op(data, axis=0, skipna=skipna, mask=mask) + result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) # if we have a boolean op, don't coerce if name in ["any", "all"]: From 35048922d02f6c4c96f1d5a9664e6bc519e5411e Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 2 Nov 2019 15:31:32 +0100 Subject: [PATCH 02/17] added test --- pandas/tests/arrays/test_integer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 793de66767cc3..98ee53e3cd042 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -828,6 +828,13 @@ def test_arrow_array(data): expected = pa.array(list(data), type=data.dtype.name.lower(), from_pandas=True) assert arr.equals(expected) +@pytest.mark.parametrize("ddof", [0, 1]) +def test_var_ddof(ddof): + s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") + result = s.var(ddof=ddof) + expected = np.var([1, 2, 3, 4, 5, 6, np.nan, np.nan], ddof=ddof) + assert expected == result + # TODO(jreback) - these need testing / are broken From 897c2b470391b19774643cb41dd92923b3346bbd Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 2 Nov 2019 15:39:41 +0100 Subject: [PATCH 03/17] added whatnew reference --- doc/source/whatsnew/v0.25.3.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/source/whatsnew/v0.25.3.rst b/doc/source/whatsnew/v0.25.3.rst index f73a3f956f42e..aceee7b9bf7f8 100644 --- a/doc/source/whatsnew/v0.25.3.rst +++ b/doc/source/whatsnew/v0.25.3.rst @@ -16,6 +16,11 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.quantile` where NA values in the grouping could cause segfaults or incorrect results (:issue:`28882`) +Series +^^^^^^ + +- Bug in :meth:`Series.var` not computing the right value with an integer series with missing values when ddof = 0 + Contributors ~~~~~~~~~~~~ From b3a4aa6dacd344fefac83b372c70e489541b6c12 Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 2 Nov 2019 15:43:19 +0100 Subject: [PATCH 04/17] black pandas --- pandas/core/algorithms.py | 2 +- pandas/core/frame.py | 2 +- pandas/core/generic.py | 2 +- pandas/core/groupby/grouper.py | 6 +++++- pandas/core/indexes/base.py | 2 +- pandas/core/indexing.py | 6 +++--- pandas/core/internals/managers.py | 2 +- pandas/io/common.py | 4 ++-- pandas/io/parsers.py | 14 ++++++++++++-- pandas/io/stata.py | 2 +- pandas/tests/arrays/sparse/test_array.py | 12 ++++++++---- pandas/tests/arrays/test_integer.py | 1 + pandas/tests/dtypes/test_inference.py | 2 +- pandas/tests/frame/test_constructors.py | 6 +++--- pandas/tests/indexes/period/test_construction.py | 2 +- pandas/tests/indexing/multiindex/test_getitem.py | 2 +- pandas/tests/indexing/multiindex/test_xs.py | 2 +- pandas/tests/indexing/test_callable.py | 12 +++++++++--- pandas/tests/io/parser/test_index_col.py | 4 ++-- pandas/tests/reductions/test_reductions.py | 4 ++-- pandas/tests/test_algos.py | 10 +++++----- pandas/tests/test_nanops.py | 2 +- 22 files changed, 63 insertions(+), 38 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c70e623778315..cc6c23929d49c 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1155,7 +1155,7 @@ def compute(self, method): n = min(n, narr) kth_val = algos.kth_smallest(arr.copy(), n - 1) - ns, = np.nonzero(arr <= kth_val) + (ns,) = np.nonzero(arr <= kth_val) inds = ns[arr[ns].argsort(kind="mergesort")] if self.keep != "all": diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 40efc4c65476a..7d8cc0b731017 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4829,7 +4829,7 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False): duplicated = self.duplicated(subset, keep=keep) if inplace: - inds, = (-duplicated)._ndarray_values.nonzero() + (inds,) = (-duplicated)._ndarray_values.nonzero() new_data = self._data.take(inds) self._update_inplace(new_data) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f88c26c7bc782..fa43206b86ec4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3613,7 +3613,7 @@ class animal locomotion if isinstance(loc, np.ndarray): if loc.dtype == np.bool_: - inds, = loc.nonzero() + (inds,) = loc.nonzero() return self.take(inds, axis=axis) else: return self.take(loc, axis=axis) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d7eaaca5ac83a..d6beefbf31db7 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -284,7 +284,11 @@ def __init__( if self.name is None: self.name = index.names[level] - self.grouper, self._labels, self._group_index = index._get_grouper_for_level( # noqa: E501 + ( + self.grouper, + self._labels, + self._group_index, + ) = index._get_grouper_for_level( # noqa: E501 self.grouper, level ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 187c7e2f3a7f7..954607a0dc6de 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1873,7 +1873,7 @@ def _isnan(self): @cache_readonly def _nan_idxs(self): if self._can_hold_na: - w, = self._isnan.nonzero() + (w,) = self._isnan.nonzero() return w else: return np.array([], dtype=np.int64) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 44c786f003369..1315e9d5b1c3f 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -320,7 +320,7 @@ def _setitem_with_indexer(self, indexer, value): # if there is only one block/type, still have to take split path # unless the block is one-dimensional or it can hold the value if not take_split_path and self.obj._data.blocks: - blk, = self.obj._data.blocks + (blk,) = self.obj._data.blocks if 1 < blk.ndim: # in case of dict, keys are indices val = list(value.values()) if isinstance(value, dict) else value take_split_path = not blk._can_hold_element(val) @@ -1120,7 +1120,7 @@ def _getitem_iterable(self, key, axis: int): if com.is_bool_indexer(key): # A boolean indexer key = check_bool_indexer(labels, key) - inds, = key.nonzero() + (inds,) = key.nonzero() return self.obj.take(inds, axis=axis) else: # A collection of keys @@ -1264,7 +1264,7 @@ def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False): if com.is_bool_indexer(obj): obj = check_bool_indexer(labels, obj) - inds, = obj.nonzero() + (inds,) = obj.nonzero() return inds else: # When setting, missing keys are not allowed, even with .loc: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c47aaf7c773c4..db782b4550907 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1868,7 +1868,7 @@ def _shape_compat(x): def _interleaved_dtype( - blocks: List[Block] + blocks: List[Block], ) -> Optional[Union[np.dtype, ExtensionDtype]]: """Find the common dtype for `blocks`. diff --git a/pandas/io/common.py b/pandas/io/common.py index 0bef14e4999c7..e08fd37e65ad9 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -109,7 +109,7 @@ def _is_url(url) -> bool: def _expand_user( - filepath_or_buffer: FilePathOrBuffer[AnyStr] + filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: """Return the argument with an initial component of ~ or ~user replaced by that user's home directory. @@ -139,7 +139,7 @@ def _validate_header_arg(header) -> None: def _stringify_path( - filepath_or_buffer: FilePathOrBuffer[AnyStr] + filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: """Attempt to convert a path-like object to a string. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 058d65b9464ae..4ff988acfe4da 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1918,7 +1918,12 @@ def __init__(self, src, **kwds): else: if len(self._reader.header) > 1: # we have a multi index in the columns - self.names, self.index_names, self.col_names, passed_names = self._extract_multi_indexer_columns( # noqa: E501 + ( + self.names, + self.index_names, + self.col_names, + passed_names, + ) = self._extract_multi_indexer_columns( # noqa: E501 self._reader.header, self.index_names, self.col_names, passed_names ) else: @@ -2307,7 +2312,12 @@ def __init__(self, f, **kwds): # The original set is stored in self.original_columns. if len(self.columns) > 1: # we are processing a multi index column - self.columns, self.index_names, self.col_names, _ = self._extract_multi_indexer_columns( # noqa: E501 + ( + self.columns, + self.index_names, + self.col_names, + _, + ) = self._extract_multi_indexer_columns( # noqa: E501 self.columns, self.index_names, self.col_names ) # Update list of original names to include all indices. diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 07475f224bd5f..8e5fa48d460e8 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -614,7 +614,7 @@ def _cast_to_stata_types(data): data[col] = data[col].astype(np.int32) else: data[col] = data[col].astype(np.float64) - if data[col].max() >= 2 ** 53 or data[col].min() <= -2 ** 53: + if data[col].max() >= 2 ** 53 or data[col].min() <= -(2 ** 53): ws = precision_loss_doc % ("int64", "float64") elif dtype in (np.float32, np.float64): value = data[col].max() diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index f9bb4981df7df..755cbfb716fcd 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -658,12 +658,16 @@ def test_getslice_tuple(self): dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) sparse = SparseArray(dense) - res = sparse[4:,] # noqa: E231 + res = sparse[ + 4:, + ] # noqa: E231 exp = SparseArray(dense[4:,]) # noqa: E231 tm.assert_sp_array_equal(res, exp) sparse = SparseArray(dense, fill_value=0) - res = sparse[4:,] # noqa: E231 + res = sparse[ + 4:, + ] # noqa: E231 exp = SparseArray(dense[4:,], fill_value=0) # noqa: E231 tm.assert_sp_array_equal(res, exp) @@ -823,11 +827,11 @@ def test_nonzero(self): # Tests regression #21172. sa = pd.SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) expected = np.array([2, 5, 9], dtype=np.int32) - result, = sa.nonzero() + (result,) = sa.nonzero() tm.assert_numpy_array_equal(expected, result) sa = pd.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) - result, = sa.nonzero() + (result,) = sa.nonzero() tm.assert_numpy_array_equal(expected, result) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 98ee53e3cd042..9d1982093b793 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -828,6 +828,7 @@ def test_arrow_array(data): expected = pa.array(list(data), type=data.dtype.name.lower(), from_pandas=True) assert arr.equals(expected) + @pytest.mark.parametrize("ddof", [0, 1]) def test_var_ddof(ddof): s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 62fb118f719e3..7d5b1891cbd32 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -505,7 +505,7 @@ def test_convert_numeric_int64_uint64(self, case, coerce): result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce) tm.assert_almost_equal(result, expected) - @pytest.mark.parametrize("value", [-2 ** 63 - 1, 2 ** 64]) + @pytest.mark.parametrize("value", [-(2 ** 63) - 1, 2 ** 64]) def test_convert_int_overflow(self, value): # see gh-18584 arr = np.array([value], dtype=object) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index aa00cf234d9ee..f193f97aedb22 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -245,9 +245,9 @@ def test_constructor_overflow_int64(self): np.array([2 ** 64], dtype=object), np.array([2 ** 65]), [2 ** 64 + 1], - np.array([-2 ** 63 - 4], dtype=object), - np.array([-2 ** 64 - 1]), - [-2 ** 65 - 2], + np.array([-(2 ** 63) - 4], dtype=object), + np.array([-(2 ** 64) - 1]), + [-(2 ** 65) - 2], ], ) def test_constructor_int_overflow(self, values): diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 8c75fbbae7de3..1973cb7f4740d 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -434,7 +434,7 @@ def test_constructor_range_based_deprecated_different_freq(self): with tm.assert_produces_warning(FutureWarning) as m: PeriodIndex(start="2000", periods=2) - warning, = m + (warning,) = m assert 'freq="A-DEC"' in str(warning.message) def test_constructor(self): diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 4f95e6bd28989..519a1eb5b16d8 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -108,7 +108,7 @@ def test_series_getitem_indexing_errors( def test_series_getitem_corner_generator( - multiindex_year_month_day_dataframe_random_data + multiindex_year_month_day_dataframe_random_data, ): s = multiindex_year_month_day_dataframe_random_data["A"] result = s[(x > 0 for x in s)] diff --git a/pandas/tests/indexing/multiindex/test_xs.py b/pandas/tests/indexing/multiindex/test_xs.py index 99f343c2f4a7d..40483ffec4992 100644 --- a/pandas/tests/indexing/multiindex/test_xs.py +++ b/pandas/tests/indexing/multiindex/test_xs.py @@ -211,7 +211,7 @@ def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data): def test_xs_level_series_slice_not_implemented( - multiindex_year_month_day_dataframe_random_data + multiindex_year_month_day_dataframe_random_data, ): # this test is not explicitly testing .xs functionality # TODO: move to another module or refactor diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index aa73bd728595f..81dedfdc74409 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -17,10 +17,14 @@ def test_frame_loc_callable(self): res = df.loc[lambda x: x.A > 2] tm.assert_frame_equal(res, df.loc[df.A > 2]) - res = df.loc[lambda x: x.A > 2,] # noqa: E231 + res = df.loc[ + lambda x: x.A > 2, + ] # noqa: E231 tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 - res = df.loc[lambda x: x.A > 2,] # noqa: E231 + res = df.loc[ + lambda x: x.A > 2, + ] # noqa: E231 tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 res = df.loc[lambda x: x.B == "b", :] @@ -90,7 +94,9 @@ def test_frame_loc_callable_labels(self): res = df.loc[lambda x: ["A", "C"]] tm.assert_frame_equal(res, df.loc[["A", "C"]]) - res = df.loc[lambda x: ["A", "C"],] # noqa: E231 + res = df.loc[ + lambda x: ["A", "C"], + ] # noqa: E231 tm.assert_frame_equal(res, df.loc[["A", "C"],]) # noqa: E231 res = df.loc[lambda x: ["A", "C"], :] diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 4dfb8d3bd2dc8..8122559997cce 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -22,8 +22,8 @@ def test_index_col_named(all_parsers, with_header): KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" # noqa header = ( - "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" - ) # noqa + "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" # noqa + ) if with_header: data = header + no_header diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 05ebff4387908..9bd6fb41cf366 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -179,8 +179,8 @@ class TestIndexReductions: [ (0, 400, 3), (500, 0, -6), - (-10 ** 6, 10 ** 6, 4), - (10 ** 6, -10 ** 6, -4), + (-(10 ** 6), 10 ** 6, 4), + (10 ** 6, -(10 ** 6), -4), (0, 10, 20), ], ) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 9dd88fd5dd25b..885428e5146b2 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -216,10 +216,10 @@ def test_uint64_factorize(self, writable): tm.assert_numpy_array_equal(uniques, exp_uniques) def test_int64_factorize(self, writable): - data = np.array([2 ** 63 - 1, -2 ** 63, 2 ** 63 - 1], dtype=np.int64) + data = np.array([2 ** 63 - 1, -(2 ** 63), 2 ** 63 - 1], dtype=np.int64) data.setflags(write=writable) exp_labels = np.array([0, 1, 0], dtype=np.intp) - exp_uniques = np.array([2 ** 63 - 1, -2 ** 63], dtype=np.int64) + exp_uniques = np.array([2 ** 63 - 1, -(2 ** 63)], dtype=np.int64) labels, uniques = algos.factorize(data) tm.assert_numpy_array_equal(labels, exp_labels) @@ -258,7 +258,7 @@ def test_deprecate_order(self): "data", [ np.array([0, 1, 0], dtype="u8"), - np.array([-2 ** 63, 1, -2 ** 63], dtype="i8"), + np.array([-(2 ** 63), 1, -(2 ** 63)], dtype="i8"), np.array(["__nan__", "foo", "__nan__"], dtype="object"), ], ) @@ -275,8 +275,8 @@ def test_parametrized_factorize_na_value_default(self, data): [ (np.array([0, 1, 0, 2], dtype="u8"), 0), (np.array([1, 0, 1, 2], dtype="u8"), 1), - (np.array([-2 ** 63, 1, -2 ** 63, 0], dtype="i8"), -2 ** 63), - (np.array([1, -2 ** 63, 1, 0], dtype="i8"), 1), + (np.array([-(2 ** 63), 1, -(2 ** 63), 0], dtype="i8"), -(2 ** 63)), + (np.array([1, -(2 ** 63), 1, 0], dtype="i8"), 1), (np.array(["a", "", "a", "b"], dtype=object), "a"), (np.array([(), ("a", 1), (), ("a", 2)], dtype=object), ()), (np.array([("a", 1), (), ("a", 1), ("a", 2)], dtype=object), ("a", 1)), diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 49d1777df0751..e6cff5c266bc6 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -302,7 +302,7 @@ def test_nanmean_overflow(self): # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy - for a in [2 ** 55, -2 ** 55, 20150515061816532]: + for a in [2 ** 55, -(2 ** 55), 20150515061816532]: s = Series(a, index=range(500), dtype=np.int64) result = s.mean() np_result = s.values.mean() From 92c0bb1c85059d4277fdcc9ed03309afb8e8ea93 Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 2 Nov 2019 15:52:31 +0100 Subject: [PATCH 05/17] Minor Changes --- pandas/tests/arrays/test_integer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 9d1982093b793..98ee53e3cd042 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -828,7 +828,6 @@ def test_arrow_array(data): expected = pa.array(list(data), type=data.dtype.name.lower(), from_pandas=True) assert arr.equals(expected) - @pytest.mark.parametrize("ddof", [0, 1]) def test_var_ddof(ddof): s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") From 2da83e45252df7fe21f7bf6d34ea6b75654fdd3d Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 2 Nov 2019 16:53:25 +0100 Subject: [PATCH 06/17] added whatsnew --- doc/source/whatsnew/v1.0.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index fa1669b1f3343..486740f0ca3d0 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -324,7 +324,8 @@ Numeric - :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth: `DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) -- +- Bug in :meth:`Series.var` not computing the right value with an integer series with missing values when ddof = 0 (:issue:`29128`) +- Conversion ^^^^^^^^^^ From 5e4f73d90dcf244a76f2c6a1cf4381d96efbd02a Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 2 Nov 2019 17:10:12 +0100 Subject: [PATCH 07/17] Revert "added test" This reverts commit 35048922d02f6c4c96f1d5a9664e6bc519e5411e. --- pandas/tests/arrays/test_integer.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 98ee53e3cd042..793de66767cc3 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -828,13 +828,6 @@ def test_arrow_array(data): expected = pa.array(list(data), type=data.dtype.name.lower(), from_pandas=True) assert arr.equals(expected) -@pytest.mark.parametrize("ddof", [0, 1]) -def test_var_ddof(ddof): - s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") - result = s.var(ddof=ddof) - expected = np.var([1, 2, 3, 4, 5, 6, np.nan, np.nan], ddof=ddof) - assert expected == result - # TODO(jreback) - these need testing / are broken From 14808f063056d7eb23672ede1512f8e23169b9eb Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 2 Nov 2019 17:18:33 +0100 Subject: [PATCH 08/17] Revert "added whatsnew" This reverts commit 2da83e45252df7fe21f7bf6d34ea6b75654fdd3d. --- doc/source/whatsnew/v1.0.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 486740f0ca3d0..fa1669b1f3343 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -324,8 +324,7 @@ Numeric - :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth: `DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) -- Bug in :meth:`Series.var` not computing the right value with an integer series with missing values when ddof = 0 (:issue:`29128`) -- +- Conversion ^^^^^^^^^^ From 2bacae79767dd9f4890ec9c6b1be9dfb55b727a7 Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 2 Nov 2019 17:21:45 +0100 Subject: [PATCH 09/17] Revert "black pandas" This reverts commit b3a4aa6dacd344fefac83b372c70e489541b6c12. --- pandas/core/algorithms.py | 2 +- pandas/core/frame.py | 2 +- pandas/core/generic.py | 2 +- pandas/core/groupby/grouper.py | 6 +----- pandas/core/indexes/base.py | 2 +- pandas/core/indexing.py | 6 +++--- pandas/core/internals/managers.py | 2 +- pandas/io/common.py | 4 ++-- pandas/io/parsers.py | 14 ++------------ pandas/io/stata.py | 2 +- pandas/tests/arrays/sparse/test_array.py | 12 ++++-------- pandas/tests/arrays/test_integer.py | 8 ++++++++ pandas/tests/dtypes/test_inference.py | 2 +- pandas/tests/frame/test_constructors.py | 6 +++--- pandas/tests/indexes/period/test_construction.py | 2 +- pandas/tests/indexing/multiindex/test_getitem.py | 2 +- pandas/tests/indexing/multiindex/test_xs.py | 2 +- pandas/tests/indexing/test_callable.py | 12 +++--------- pandas/tests/io/parser/test_index_col.py | 4 ++-- pandas/tests/reductions/test_reductions.py | 4 ++-- pandas/tests/test_algos.py | 10 +++++----- pandas/tests/test_nanops.py | 2 +- 22 files changed, 46 insertions(+), 62 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index cc6c23929d49c..c70e623778315 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1155,7 +1155,7 @@ def compute(self, method): n = min(n, narr) kth_val = algos.kth_smallest(arr.copy(), n - 1) - (ns,) = np.nonzero(arr <= kth_val) + ns, = np.nonzero(arr <= kth_val) inds = ns[arr[ns].argsort(kind="mergesort")] if self.keep != "all": diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7d8cc0b731017..40efc4c65476a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4829,7 +4829,7 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False): duplicated = self.duplicated(subset, keep=keep) if inplace: - (inds,) = (-duplicated)._ndarray_values.nonzero() + inds, = (-duplicated)._ndarray_values.nonzero() new_data = self._data.take(inds) self._update_inplace(new_data) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fa43206b86ec4..f88c26c7bc782 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3613,7 +3613,7 @@ class animal locomotion if isinstance(loc, np.ndarray): if loc.dtype == np.bool_: - (inds,) = loc.nonzero() + inds, = loc.nonzero() return self.take(inds, axis=axis) else: return self.take(loc, axis=axis) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d6beefbf31db7..d7eaaca5ac83a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -284,11 +284,7 @@ def __init__( if self.name is None: self.name = index.names[level] - ( - self.grouper, - self._labels, - self._group_index, - ) = index._get_grouper_for_level( # noqa: E501 + self.grouper, self._labels, self._group_index = index._get_grouper_for_level( # noqa: E501 self.grouper, level ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 954607a0dc6de..187c7e2f3a7f7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1873,7 +1873,7 @@ def _isnan(self): @cache_readonly def _nan_idxs(self): if self._can_hold_na: - (w,) = self._isnan.nonzero() + w, = self._isnan.nonzero() return w else: return np.array([], dtype=np.int64) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1315e9d5b1c3f..44c786f003369 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -320,7 +320,7 @@ def _setitem_with_indexer(self, indexer, value): # if there is only one block/type, still have to take split path # unless the block is one-dimensional or it can hold the value if not take_split_path and self.obj._data.blocks: - (blk,) = self.obj._data.blocks + blk, = self.obj._data.blocks if 1 < blk.ndim: # in case of dict, keys are indices val = list(value.values()) if isinstance(value, dict) else value take_split_path = not blk._can_hold_element(val) @@ -1120,7 +1120,7 @@ def _getitem_iterable(self, key, axis: int): if com.is_bool_indexer(key): # A boolean indexer key = check_bool_indexer(labels, key) - (inds,) = key.nonzero() + inds, = key.nonzero() return self.obj.take(inds, axis=axis) else: # A collection of keys @@ -1264,7 +1264,7 @@ def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False): if com.is_bool_indexer(obj): obj = check_bool_indexer(labels, obj) - (inds,) = obj.nonzero() + inds, = obj.nonzero() return inds else: # When setting, missing keys are not allowed, even with .loc: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index db782b4550907..c47aaf7c773c4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1868,7 +1868,7 @@ def _shape_compat(x): def _interleaved_dtype( - blocks: List[Block], + blocks: List[Block] ) -> Optional[Union[np.dtype, ExtensionDtype]]: """Find the common dtype for `blocks`. diff --git a/pandas/io/common.py b/pandas/io/common.py index e08fd37e65ad9..0bef14e4999c7 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -109,7 +109,7 @@ def _is_url(url) -> bool: def _expand_user( - filepath_or_buffer: FilePathOrBuffer[AnyStr], + filepath_or_buffer: FilePathOrBuffer[AnyStr] ) -> FilePathOrBuffer[AnyStr]: """Return the argument with an initial component of ~ or ~user replaced by that user's home directory. @@ -139,7 +139,7 @@ def _validate_header_arg(header) -> None: def _stringify_path( - filepath_or_buffer: FilePathOrBuffer[AnyStr], + filepath_or_buffer: FilePathOrBuffer[AnyStr] ) -> FilePathOrBuffer[AnyStr]: """Attempt to convert a path-like object to a string. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4ff988acfe4da..058d65b9464ae 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1918,12 +1918,7 @@ def __init__(self, src, **kwds): else: if len(self._reader.header) > 1: # we have a multi index in the columns - ( - self.names, - self.index_names, - self.col_names, - passed_names, - ) = self._extract_multi_indexer_columns( # noqa: E501 + self.names, self.index_names, self.col_names, passed_names = self._extract_multi_indexer_columns( # noqa: E501 self._reader.header, self.index_names, self.col_names, passed_names ) else: @@ -2312,12 +2307,7 @@ def __init__(self, f, **kwds): # The original set is stored in self.original_columns. if len(self.columns) > 1: # we are processing a multi index column - ( - self.columns, - self.index_names, - self.col_names, - _, - ) = self._extract_multi_indexer_columns( # noqa: E501 + self.columns, self.index_names, self.col_names, _ = self._extract_multi_indexer_columns( # noqa: E501 self.columns, self.index_names, self.col_names ) # Update list of original names to include all indices. diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 8e5fa48d460e8..07475f224bd5f 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -614,7 +614,7 @@ def _cast_to_stata_types(data): data[col] = data[col].astype(np.int32) else: data[col] = data[col].astype(np.float64) - if data[col].max() >= 2 ** 53 or data[col].min() <= -(2 ** 53): + if data[col].max() >= 2 ** 53 or data[col].min() <= -2 ** 53: ws = precision_loss_doc % ("int64", "float64") elif dtype in (np.float32, np.float64): value = data[col].max() diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 755cbfb716fcd..f9bb4981df7df 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -658,16 +658,12 @@ def test_getslice_tuple(self): dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) sparse = SparseArray(dense) - res = sparse[ - 4:, - ] # noqa: E231 + res = sparse[4:,] # noqa: E231 exp = SparseArray(dense[4:,]) # noqa: E231 tm.assert_sp_array_equal(res, exp) sparse = SparseArray(dense, fill_value=0) - res = sparse[ - 4:, - ] # noqa: E231 + res = sparse[4:,] # noqa: E231 exp = SparseArray(dense[4:,], fill_value=0) # noqa: E231 tm.assert_sp_array_equal(res, exp) @@ -827,11 +823,11 @@ def test_nonzero(self): # Tests regression #21172. sa = pd.SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) expected = np.array([2, 5, 9], dtype=np.int32) - (result,) = sa.nonzero() + result, = sa.nonzero() tm.assert_numpy_array_equal(expected, result) sa = pd.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) - (result,) = sa.nonzero() + result, = sa.nonzero() tm.assert_numpy_array_equal(expected, result) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 793de66767cc3..9d1982093b793 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -829,6 +829,14 @@ def test_arrow_array(data): assert arr.equals(expected) +@pytest.mark.parametrize("ddof", [0, 1]) +def test_var_ddof(ddof): + s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") + result = s.var(ddof=ddof) + expected = np.var([1, 2, 3, 4, 5, 6, np.nan, np.nan], ddof=ddof) + assert expected == result + + # TODO(jreback) - these need testing / are broken # shift diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 7d5b1891cbd32..62fb118f719e3 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -505,7 +505,7 @@ def test_convert_numeric_int64_uint64(self, case, coerce): result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce) tm.assert_almost_equal(result, expected) - @pytest.mark.parametrize("value", [-(2 ** 63) - 1, 2 ** 64]) + @pytest.mark.parametrize("value", [-2 ** 63 - 1, 2 ** 64]) def test_convert_int_overflow(self, value): # see gh-18584 arr = np.array([value], dtype=object) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index f193f97aedb22..aa00cf234d9ee 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -245,9 +245,9 @@ def test_constructor_overflow_int64(self): np.array([2 ** 64], dtype=object), np.array([2 ** 65]), [2 ** 64 + 1], - np.array([-(2 ** 63) - 4], dtype=object), - np.array([-(2 ** 64) - 1]), - [-(2 ** 65) - 2], + np.array([-2 ** 63 - 4], dtype=object), + np.array([-2 ** 64 - 1]), + [-2 ** 65 - 2], ], ) def test_constructor_int_overflow(self, values): diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 1973cb7f4740d..8c75fbbae7de3 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -434,7 +434,7 @@ def test_constructor_range_based_deprecated_different_freq(self): with tm.assert_produces_warning(FutureWarning) as m: PeriodIndex(start="2000", periods=2) - (warning,) = m + warning, = m assert 'freq="A-DEC"' in str(warning.message) def test_constructor(self): diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 519a1eb5b16d8..4f95e6bd28989 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -108,7 +108,7 @@ def test_series_getitem_indexing_errors( def test_series_getitem_corner_generator( - multiindex_year_month_day_dataframe_random_data, + multiindex_year_month_day_dataframe_random_data ): s = multiindex_year_month_day_dataframe_random_data["A"] result = s[(x > 0 for x in s)] diff --git a/pandas/tests/indexing/multiindex/test_xs.py b/pandas/tests/indexing/multiindex/test_xs.py index 40483ffec4992..99f343c2f4a7d 100644 --- a/pandas/tests/indexing/multiindex/test_xs.py +++ b/pandas/tests/indexing/multiindex/test_xs.py @@ -211,7 +211,7 @@ def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data): def test_xs_level_series_slice_not_implemented( - multiindex_year_month_day_dataframe_random_data, + multiindex_year_month_day_dataframe_random_data ): # this test is not explicitly testing .xs functionality # TODO: move to another module or refactor diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index 81dedfdc74409..aa73bd728595f 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -17,14 +17,10 @@ def test_frame_loc_callable(self): res = df.loc[lambda x: x.A > 2] tm.assert_frame_equal(res, df.loc[df.A > 2]) - res = df.loc[ - lambda x: x.A > 2, - ] # noqa: E231 + res = df.loc[lambda x: x.A > 2,] # noqa: E231 tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 - res = df.loc[ - lambda x: x.A > 2, - ] # noqa: E231 + res = df.loc[lambda x: x.A > 2,] # noqa: E231 tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 res = df.loc[lambda x: x.B == "b", :] @@ -94,9 +90,7 @@ def test_frame_loc_callable_labels(self): res = df.loc[lambda x: ["A", "C"]] tm.assert_frame_equal(res, df.loc[["A", "C"]]) - res = df.loc[ - lambda x: ["A", "C"], - ] # noqa: E231 + res = df.loc[lambda x: ["A", "C"],] # noqa: E231 tm.assert_frame_equal(res, df.loc[["A", "C"],]) # noqa: E231 res = df.loc[lambda x: ["A", "C"], :] diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 8122559997cce..4dfb8d3bd2dc8 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -22,8 +22,8 @@ def test_index_col_named(all_parsers, with_header): KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" # noqa header = ( - "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" # noqa - ) + "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" + ) # noqa if with_header: data = header + no_header diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 9bd6fb41cf366..05ebff4387908 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -179,8 +179,8 @@ class TestIndexReductions: [ (0, 400, 3), (500, 0, -6), - (-(10 ** 6), 10 ** 6, 4), - (10 ** 6, -(10 ** 6), -4), + (-10 ** 6, 10 ** 6, 4), + (10 ** 6, -10 ** 6, -4), (0, 10, 20), ], ) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 885428e5146b2..9dd88fd5dd25b 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -216,10 +216,10 @@ def test_uint64_factorize(self, writable): tm.assert_numpy_array_equal(uniques, exp_uniques) def test_int64_factorize(self, writable): - data = np.array([2 ** 63 - 1, -(2 ** 63), 2 ** 63 - 1], dtype=np.int64) + data = np.array([2 ** 63 - 1, -2 ** 63, 2 ** 63 - 1], dtype=np.int64) data.setflags(write=writable) exp_labels = np.array([0, 1, 0], dtype=np.intp) - exp_uniques = np.array([2 ** 63 - 1, -(2 ** 63)], dtype=np.int64) + exp_uniques = np.array([2 ** 63 - 1, -2 ** 63], dtype=np.int64) labels, uniques = algos.factorize(data) tm.assert_numpy_array_equal(labels, exp_labels) @@ -258,7 +258,7 @@ def test_deprecate_order(self): "data", [ np.array([0, 1, 0], dtype="u8"), - np.array([-(2 ** 63), 1, -(2 ** 63)], dtype="i8"), + np.array([-2 ** 63, 1, -2 ** 63], dtype="i8"), np.array(["__nan__", "foo", "__nan__"], dtype="object"), ], ) @@ -275,8 +275,8 @@ def test_parametrized_factorize_na_value_default(self, data): [ (np.array([0, 1, 0, 2], dtype="u8"), 0), (np.array([1, 0, 1, 2], dtype="u8"), 1), - (np.array([-(2 ** 63), 1, -(2 ** 63), 0], dtype="i8"), -(2 ** 63)), - (np.array([1, -(2 ** 63), 1, 0], dtype="i8"), 1), + (np.array([-2 ** 63, 1, -2 ** 63, 0], dtype="i8"), -2 ** 63), + (np.array([1, -2 ** 63, 1, 0], dtype="i8"), 1), (np.array(["a", "", "a", "b"], dtype=object), "a"), (np.array([(), ("a", 1), (), ("a", 2)], dtype=object), ()), (np.array([("a", 1), (), ("a", 1), ("a", 2)], dtype=object), ("a", 1)), diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index e6cff5c266bc6..49d1777df0751 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -302,7 +302,7 @@ def test_nanmean_overflow(self): # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy - for a in [2 ** 55, -(2 ** 55), 20150515061816532]: + for a in [2 ** 55, -2 ** 55, 20150515061816532]: s = Series(a, index=range(500), dtype=np.int64) result = s.mean() np_result = s.values.mean() From b5f52f47d81e639947e34fd2299c2c161932448e Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 2 Nov 2019 17:25:43 +0100 Subject: [PATCH 10/17] whatsnew fix --- doc/source/whatsnew/v0.25.3.rst | 4 ---- doc/source/whatsnew/v1.0.0.rst | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.25.3.rst b/doc/source/whatsnew/v0.25.3.rst index aceee7b9bf7f8..9f7eac40ac2af 100644 --- a/doc/source/whatsnew/v0.25.3.rst +++ b/doc/source/whatsnew/v0.25.3.rst @@ -16,10 +16,6 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.quantile` where NA values in the grouping could cause segfaults or incorrect results (:issue:`28882`) -Series -^^^^^^ - -- Bug in :meth:`Series.var` not computing the right value with an integer series with missing values when ddof = 0 Contributors ~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index fa1669b1f3343..6b78845b5a305 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -324,7 +324,7 @@ Numeric - :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth: `DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) -- +- Bug in :meth:`Series.var` not computing the right value with an integer series with missing values when ddof = 0 (:issue:`29128`) Conversion ^^^^^^^^^^ From 474a3621397ab670b062e9abe0ac5b3598a17c09 Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 2 Nov 2019 17:28:13 +0100 Subject: [PATCH 11/17] minor changes --- doc/source/whatsnew/v0.25.3.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.3.rst b/doc/source/whatsnew/v0.25.3.rst index 9f7eac40ac2af..f73a3f956f42e 100644 --- a/doc/source/whatsnew/v0.25.3.rst +++ b/doc/source/whatsnew/v0.25.3.rst @@ -16,7 +16,6 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.quantile` where NA values in the grouping could cause segfaults or incorrect results (:issue:`28882`) - Contributors ~~~~~~~~~~~~ From b39d40dca85027b46e9f0b76c055a7c6f0fc55d2 Mon Sep 17 00:00:00 2001 From: Moi Date: Sun, 3 Nov 2019 17:51:07 +0100 Subject: [PATCH 12/17] test modified --- pandas/tests/arrays/test_integer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 9d1982093b793..0571299e54cee 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -833,7 +833,7 @@ def test_arrow_array(data): def test_var_ddof(ddof): s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") result = s.var(ddof=ddof) - expected = np.var([1, 2, 3, 4, 5, 6, np.nan, np.nan], ddof=ddof) + expected = np.nanvar([1, 2, 3, 4, 5, 6, np.nan, np.nan], ddof=ddof) assert expected == result From d38f87bfe6c43359bff49b4f852e62ed945ccfaf Mon Sep 17 00:00:00 2001 From: Moi Date: Tue, 5 Nov 2019 01:09:45 +0100 Subject: [PATCH 13/17] fixing whatsnew --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 6b78845b5a305..fc1d2de37a1ce 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -324,7 +324,7 @@ Numeric - :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth: `DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) -- Bug in :meth:`Series.var` not computing the right value with an integer series with missing values when ddof = 0 (:issue:`29128`) +- Bug in :meth:`Series.var` not computing the right value with a nullable integer dtype series not passing through ddof argument (:issue:`29128`) Conversion ^^^^^^^^^^ From c38623f5e947ab5f2b5d6c1c7c5909398b377ced Mon Sep 17 00:00:00 2001 From: Moi Date: Tue, 5 Nov 2019 02:26:04 +0100 Subject: [PATCH 14/17] test updated --- pandas/tests/arrays/test_integer.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 0571299e54cee..a98142673bf56 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -1,5 +1,6 @@ import numpy as np import pytest +from scipy.stats import kurtosis, skew, sem import pandas.util._test_decorators as td @@ -829,13 +830,18 @@ def test_arrow_array(data): assert arr.equals(expected) -@pytest.mark.parametrize("ddof", [0, 1]) -def test_var_ddof(ddof): +@pytest.mark.parametrize("pandasmethname, npfunction, kwargs, np_kwargs", + [("var", np.nanvar, {'ddof': 0}, {'ddof': 0}), + ("var", np.nanvar, {'ddof': 1}, {'ddof': 1}), + ("kurtosis", kurtosis, {}, {'bias': False, 'nan_policy': 'omit'}), + ("skew", skew, {}, {'nan_policy': 'omit'}), + ("sem", sem, {}, {'nan_policy': 'omit'})]) +def test_stat_method(pandasmethname, npfunction, kwargs, np_kwargs): s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") - result = s.var(ddof=ddof) - expected = np.nanvar([1, 2, 3, 4, 5, 6, np.nan, np.nan], ddof=ddof) - assert expected == result - + pandasmeth = getattr(s, pandasmethname) + result = pandasmeth(**kwargs) + expected = npfunction([1, 2, 3, 4, 5, 6, np.nan, np.nan], **np_kwargs) + assert np.isclose(expected, result) # TODO(jreback) - these need testing / are broken From ef15fb94a415fe34d9ed3d1f68d537b6a7dd0c2b Mon Sep 17 00:00:00 2001 From: Moi Date: Tue, 5 Nov 2019 03:07:21 +0100 Subject: [PATCH 15/17] fix test --- pandas/tests/arrays/test_integer.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index a98142673bf56..ad84cbbeb4930 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -1,6 +1,5 @@ import numpy as np import pytest -from scipy.stats import kurtosis, skew, sem import pandas.util._test_decorators as td @@ -830,18 +829,20 @@ def test_arrow_array(data): assert arr.equals(expected) -@pytest.mark.parametrize("pandasmethname, npfunction, kwargs, np_kwargs", - [("var", np.nanvar, {'ddof': 0}, {'ddof': 0}), - ("var", np.nanvar, {'ddof': 1}, {'ddof': 1}), - ("kurtosis", kurtosis, {}, {'bias': False, 'nan_policy': 'omit'}), - ("skew", skew, {}, {'nan_policy': 'omit'}), - ("sem", sem, {}, {'nan_policy': 'omit'})]) -def test_stat_method(pandasmethname, npfunction, kwargs, np_kwargs): +@pytest.mark.parametrize("pandasmethname, kwargs", + [("var", {'ddof': 0}), + ("var", {'ddof': 1}), + ("kurtosis", {}), + ("skew", {}), + ("sem", {})]) +def test_stat_method(pandasmethname, kwargs): s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") pandasmeth = getattr(s, pandasmethname) result = pandasmeth(**kwargs) - expected = npfunction([1, 2, 3, 4, 5, 6, np.nan, np.nan], **np_kwargs) - assert np.isclose(expected, result) + s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64") + pandasmeth = getattr(s2, pandasmethname) + expected = pandasmeth(**kwargs) + assert expected == result # TODO(jreback) - these need testing / are broken From 0fe89699027ef38feb7572427283516ff852fb37 Mon Sep 17 00:00:00 2001 From: Moi Date: Tue, 5 Nov 2019 09:30:43 +0100 Subject: [PATCH 16/17] minor changes --- pandas/tests/arrays/test_integer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index ad84cbbeb4930..50a73501397d9 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -841,7 +841,7 @@ def test_stat_method(pandasmethname, kwargs): result = pandasmeth(**kwargs) s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64") pandasmeth = getattr(s2, pandasmethname) - expected = pandasmeth(**kwargs) + expected = pandasmeth(**kwargs) assert expected == result # TODO(jreback) - these need testing / are broken From bc6ccc43d720aa4df6ec23da16e74bd2487c5486 Mon Sep 17 00:00:00 2001 From: Moi Date: Tue, 5 Nov 2019 10:02:23 +0100 Subject: [PATCH 17/17] black pandas --- pandas/tests/arrays/test_integer.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 50a73501397d9..025366e5b210b 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -829,12 +829,16 @@ def test_arrow_array(data): assert arr.equals(expected) -@pytest.mark.parametrize("pandasmethname, kwargs", - [("var", {'ddof': 0}), - ("var", {'ddof': 1}), - ("kurtosis", {}), - ("skew", {}), - ("sem", {})]) +@pytest.mark.parametrize( + "pandasmethname, kwargs", + [ + ("var", {"ddof": 0}), + ("var", {"ddof": 1}), + ("kurtosis", {}), + ("skew", {}), + ("sem", {}), + ], +) def test_stat_method(pandasmethname, kwargs): s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") pandasmeth = getattr(s, pandasmethname) @@ -844,6 +848,7 @@ def test_stat_method(pandasmethname, kwargs): expected = pandasmeth(**kwargs) assert expected == result + # TODO(jreback) - these need testing / are broken # shift