From b332b817acdd36f2df7cea53a8e00d4d38b569c4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 17 Mar 2020 12:33:22 -0500 Subject: [PATCH 01/11] Add boolean test dir --- pandas/tests/arrays/boolean/__init__.py | 0 .../tests/arrays/boolean/test_arithmetic.py | 48 +++ pandas/tests/arrays/boolean/test_astype.py | 53 +++ .../tests/arrays/boolean/test_comparison.py | 127 ++++++ .../tests/arrays/boolean/test_construction.py | 381 ++++++++++++++++++ pandas/tests/arrays/boolean/test_function.py | 130 ++++++ pandas/tests/arrays/boolean/test_logical.py | 230 +++++++++++ pandas/tests/arrays/boolean/test_misc.py | 43 ++ 8 files changed, 1012 insertions(+) create mode 100644 pandas/tests/arrays/boolean/__init__.py create mode 100644 pandas/tests/arrays/boolean/test_arithmetic.py create mode 100644 pandas/tests/arrays/boolean/test_astype.py create mode 100644 pandas/tests/arrays/boolean/test_comparison.py create mode 100644 pandas/tests/arrays/boolean/test_construction.py create mode 100644 pandas/tests/arrays/boolean/test_function.py create mode 100644 pandas/tests/arrays/boolean/test_logical.py create mode 100644 pandas/tests/arrays/boolean/test_misc.py diff --git a/pandas/tests/arrays/boolean/__init__.py b/pandas/tests/arrays/boolean/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py new file mode 100644 index 0000000000000..e6df27be5b5f4 --- /dev/null +++ b/pandas/tests/arrays/boolean/test_arithmetic.py @@ -0,0 +1,48 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.tests.extension.base import BaseOpsUtil + + +def make_data(): + return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] + + +@pytest.fixture +def dtype(): + return pd.BooleanDtype() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +class TestArithmeticOps(BaseOpsUtil): + def test_error(self, data, all_arithmetic_operators): + # invalid ops + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + opa = getattr(data, op) + + # invalid scalars + with pytest.raises(TypeError): + ops("foo") + with pytest.raises(TypeError): + ops(pd.Timestamp("20180101")) + + # invalid array-likes + if op not in ("__mul__", "__rmul__"): + # TODO(extension) numpy's mul with object array sees booleans as numbers + with pytest.raises(TypeError): + ops(pd.Series("foo", index=s.index)) + + # 2d + result = opa(pd.DataFrame({"A": s})) + assert result is NotImplemented + + with pytest.raises(NotImplementedError): + opa(np.arange(len(s)).reshape(-1, len(s))) diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py new file mode 100644 index 0000000000000..90fe9a6905d40 --- /dev/null +++ b/pandas/tests/arrays/boolean/test_astype.py @@ -0,0 +1,53 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_astype(): + # with missing values + arr = pd.array([True, False, None], dtype="boolean") + + with pytest.raises(ValueError, match="cannot convert NA to integer"): + arr.astype("int64") + + with pytest.raises(ValueError, match="cannot convert float NaN to"): + arr.astype("bool") + + result = arr.astype("float64") + expected = np.array([1, 0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype("str") + expected = np.array(["True", "False", ""], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + # no missing values + arr = pd.array([True, False, True], dtype="boolean") + result = arr.astype("int64") + expected = np.array([1, 0, 1], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype("bool") + expected = np.array([True, False, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_to_boolean_array(): + # astype to BooleanArray + arr = pd.array([True, False, None], dtype="boolean") + + result = arr.astype("boolean") + tm.assert_extension_array_equal(result, arr) + result = arr.astype(pd.BooleanDtype()) + tm.assert_extension_array_equal(result, arr) + + +def test_astype_to_integer_array(): + # astype to IntegerArray + arr = pd.array([True, False, None], dtype="boolean") + + result = arr.astype("Int64") + expected = pd.array([1, 0, None], dtype="Int64") + tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/arrays/boolean/test_comparison.py b/pandas/tests/arrays/boolean/test_comparison.py new file mode 100644 index 0000000000000..40bebcc0c7fbf --- /dev/null +++ b/pandas/tests/arrays/boolean/test_comparison.py @@ -0,0 +1,127 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import BooleanArray +from pandas.tests.extension.base import BaseOpsUtil + + +def make_data(): + return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] + + +@pytest.fixture +def dtype(): + return pd.BooleanDtype() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +class TestComparisonOps(BaseOpsUtil): + def _compare_other(self, data, op_name, other): + op = self.get_op_from_name(op_name) + + # array + result = pd.Series(op(data, other)) + expected = pd.Series(op(data._data, other), dtype="boolean") + # propagate NAs + expected[data._mask] = pd.NA + + tm.assert_series_equal(result, expected) + + # series + s = pd.Series(data) + result = op(s, other) + + expected = pd.Series(data._data) + expected = op(expected, other) + expected = expected.astype("boolean") + # propagate NAs + expected[data._mask] = pd.NA + + tm.assert_series_equal(result, expected) + + def test_compare_scalar(self, data, all_compare_operators): + op_name = all_compare_operators + self._compare_other(data, op_name, True) + + def test_compare_array(self, data, all_compare_operators): + op_name = all_compare_operators + other = pd.array([True] * len(data), dtype="boolean") + self._compare_other(data, op_name, other) + other = np.array([True] * len(data)) + self._compare_other(data, op_name, other) + other = pd.Series([True] * len(data)) + self._compare_other(data, op_name, other) + + @pytest.mark.parametrize("other", [True, False, pd.NA]) + def test_scalar(self, other, all_compare_operators): + op = self.get_op_from_name(all_compare_operators) + a = pd.array([True, False, None], dtype="boolean") + + result = op(a, other) + + if other is pd.NA: + expected = pd.array([None, None, None], dtype="boolean") + else: + values = op(a._data, other) + expected = BooleanArray(values, a._mask, copy=True) + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + result[0] = None + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + def test_array(self, all_compare_operators): + op = self.get_op_from_name(all_compare_operators) + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + + result = op(a, b) + + values = op(a._data, b._data) + mask = a._mask | b._mask + expected = BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + result[0] = None + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + +@pytest.mark.parametrize( + "values, exp_any, exp_all, exp_any_noskip, exp_all_noskip", + [ + ([True, pd.NA], True, True, True, pd.NA), + ([False, pd.NA], False, False, pd.NA, False), + ([pd.NA], False, True, pd.NA, pd.NA), + ([], False, True, False, True), + ], +) +def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): + # the methods return numpy scalars + exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) + exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) + exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) + exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) + + for con in [pd.array, pd.Series]: + a = con(values, dtype="boolean") + assert a.any() is exp_any + assert a.all() is exp_all + assert a.any(skipna=False) is exp_any_noskip + assert a.all(skipna=False) is exp_all_noskip + + assert np.any(a.any()) is exp_any + assert np.all(a.all()) is exp_all diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py new file mode 100644 index 0000000000000..8142b79a7bfce --- /dev/null +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -0,0 +1,381 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import BooleanArray +from pandas.core.arrays.boolean import coerce_to_array + + +def make_data(): + return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] + + +@pytest.fixture +def dtype(): + return pd.BooleanDtype() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +def test_boolean_array_constructor(): + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + + result = BooleanArray(values, mask) + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + with pytest.raises(TypeError, match="values should be boolean numpy array"): + BooleanArray(values.tolist(), mask) + + with pytest.raises(TypeError, match="mask should be boolean numpy array"): + BooleanArray(values, mask.tolist()) + + with pytest.raises(TypeError, match="values should be boolean numpy array"): + BooleanArray(values.astype(int), mask) + + with pytest.raises(TypeError, match="mask should be boolean numpy array"): + BooleanArray(values, None) + + with pytest.raises(ValueError, match="values must be a 1D array"): + BooleanArray(values.reshape(1, -1), mask) + + with pytest.raises(ValueError, match="mask must be a 1D array"): + BooleanArray(values, mask.reshape(1, -1)) + + +def test_boolean_array_constructor_copy(): + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + + result = BooleanArray(values, mask) + assert result._data is values + assert result._mask is mask + + result = BooleanArray(values, mask, copy=True) + assert result._data is not values + assert result._mask is not mask + + +def test_to_boolean_array(): + expected = BooleanArray( + np.array([True, False, True]), np.array([False, False, False]) + ) + + result = pd.array([True, False, True], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([True, False, True]), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + expected = BooleanArray( + np.array([True, False, True]), np.array([False, False, True]) + ) + + result = pd.array([True, False, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_all_none(): + expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True])) + + result = pd.array([None, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "a, b", + [ + ([True, False, None, np.nan, pd.NA], [True, False, None, None, None]), + ([True, np.nan], [True, None]), + ([True, pd.NA], [True, None]), + ([np.nan, np.nan], [None, None]), + (np.array([np.nan, np.nan], dtype=float), [None, None]), + ], +) +def test_to_boolean_array_missing_indicators(a, b): + result = pd.array(a, dtype="boolean") + expected = pd.array(b, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + ["foo", "bar"], + ["1", "2"], + # "foo", + [1, 2], + [1.0, 2.0], + pd.date_range("20130101", periods=2), + np.array(["foo"]), + np.array([1, 2]), + np.array([1.0, 2.0]), + [np.nan, {"a": 1}], + ], +) +def test_to_boolean_array_error(values): + # error in converting existing arrays to BooleanArray + msg = "Need to pass bool-like value" + with pytest.raises(TypeError, match=msg): + pd.array(values, dtype="boolean") + + +def test_to_boolean_array_from_integer_array(): + result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean") + expected = pd.array([True, False, True, False], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + result = pd.array(np.array([1, 0, 1, None]), dtype="boolean") + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_from_float_array(): + result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean") + expected = pd.array([True, False, True, False], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean") + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_integer_like(): + # integers of 0's and 1's + result = pd.array([1, 0, 1, 0], dtype="boolean") + expected = pd.array([True, False, True, False], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + result = pd.array([1, 0, 1, None], dtype="boolean") + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_coerce_to_array(): + # TODO this is currently not public API + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + result = BooleanArray(*coerce_to_array(values, mask=mask)) + expected = BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + assert result._data is values + assert result._mask is mask + result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True)) + expected = BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + assert result._data is not values + assert result._mask is not mask + + # mixed missing from values and mask + values = [True, False, None, False] + mask = np.array([False, False, False, True], dtype="bool") + result = BooleanArray(*coerce_to_array(values, mask=mask)) + expected = BooleanArray( + np.array([True, False, True, True]), np.array([False, False, True, True]) + ) + tm.assert_extension_array_equal(result, expected) + result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask)) + tm.assert_extension_array_equal(result, expected) + result = BooleanArray(*coerce_to_array(values, mask=mask.tolist())) + tm.assert_extension_array_equal(result, expected) + + # raise errors for wrong dimension + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + + with pytest.raises(ValueError, match="values must be a 1D list-like"): + coerce_to_array(values.reshape(1, -1)) + + with pytest.raises(ValueError, match="mask must be a 1D list-like"): + coerce_to_array(values, mask=mask.reshape(1, -1)) + + +def test_coerce_to_array_from_boolean_array(): + # passing BooleanArray to coerce_to_array + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + arr = BooleanArray(values, mask) + result = BooleanArray(*coerce_to_array(arr)) + tm.assert_extension_array_equal(result, arr) + # no copy + assert result._data is arr._data + assert result._mask is arr._mask + + result = BooleanArray(*coerce_to_array(arr), copy=True) + tm.assert_extension_array_equal(result, arr) + assert result._data is not arr._data + assert result._mask is not arr._mask + + with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"): + coerce_to_array(arr, mask=mask) + + +def test_coerce_to_numpy_array(): + # with missing values -> object dtype + arr = pd.array([True, False, None], dtype="boolean") + result = np.array(arr) + expected = np.array([True, False, pd.NA], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + # also with no missing values -> object dtype + arr = pd.array([True, False, True], dtype="boolean") + result = np.array(arr) + expected = np.array([True, False, True], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + # force bool dtype + result = np.array(arr, dtype="bool") + expected = np.array([True, False, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + # with missing values will raise error + arr = pd.array([True, False, None], dtype="boolean") + msg = ( + "cannot convert to 'bool'-dtype NumPy array with missing values. " + "Specify an appropriate 'na_value' for this dtype." + ) + with pytest.raises(ValueError, match=msg): + np.array(arr, dtype="bool") + + +def test_to_boolean_array_from_strings(): + result = BooleanArray._from_sequence_of_strings( + np.array(["True", "False", np.nan], dtype=object) + ) + expected = BooleanArray( + np.array([True, False, False]), np.array([False, False, True]) + ) + + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_from_strings_invalid_string(): + with pytest.raises(ValueError, match="cannot be cast"): + BooleanArray._from_sequence_of_strings(["donkey"]) + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy(box): + con = pd.Series if box else pd.array + # default (with or without missing values) -> object dtype + arr = con([True, False, True], dtype="boolean") + result = arr.to_numpy() + expected = np.array([True, False, True], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy() + expected = np.array([True, False, pd.NA], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy(dtype="str") + expected = np.array([True, False, pd.NA], dtype=" can convert to bool, otherwise raises + arr = con([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype="bool") + expected = np.array([True, False, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"): + result = arr.to_numpy(dtype="bool") + + # specify dtype and na_value + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy(dtype=object, na_value=None) + expected = np.array([True, False, None], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype=bool, na_value=False) + expected = np.array([True, False, False], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype="int64", na_value=-99) + expected = np.array([1, 0, -99], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype="float64", na_value=np.nan) + expected = np.array([1, 0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + # converting to int or float without specifying na_value raises + with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"): + arr.to_numpy(dtype="int64") + with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"): + arr.to_numpy(dtype="float64") + + +def test_to_numpy_copy(): + # to_numpy can be zero-copy if no missing values + arr = pd.array([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype=bool) + result[0] = False + tm.assert_extension_array_equal( + arr, pd.array([False, False, True], dtype="boolean") + ) + + arr = pd.array([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype=bool, copy=True) + result[0] = False + tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean")) + + +# TODO when BooleanArray coerces to object dtype numpy array, need to do conversion +# manually in the indexing code +# def test_indexing_boolean_mask(): +# arr = pd.array([1, 2, 3, 4], dtype="Int64") +# mask = pd.array([True, False, True, False], dtype="boolean") +# result = arr[mask] +# expected = pd.array([1, 3], dtype="Int64") +# tm.assert_extension_array_equal(result, expected) + +# # missing values -> error +# mask = pd.array([True, False, True, None], dtype="boolean") +# with pytest.raises(IndexError): +# result = arr[mask] + + +@td.skip_if_no("pyarrow", min_version="0.15.0") +def test_arrow_array(data): + # protocol added in 0.15.0 + import pyarrow as pa + + arr = pa.array(data) + + # TODO use to_numpy(na_value=None) here + data_object = np.array(data, dtype=object) + data_object[data.isna()] = None + expected = pa.array(data_object, type=pa.bool_(), from_pandas=True) + assert arr.equals(expected) + + +@td.skip_if_no("pyarrow", min_version="0.15.1.dev") +def test_arrow_roundtrip(): + # roundtrip possible from arrow 1.0.0 + import pyarrow as pa + + data = pd.array([True, False, None], dtype="boolean") + df = pd.DataFrame({"a": data}) + table = pa.table(df) + assert table.field("a").type == "bool" + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.BooleanDtype) + tm.assert_frame_equal(result, df) diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py new file mode 100644 index 0000000000000..13a7eb3a789fa --- /dev/null +++ b/pandas/tests/arrays/boolean/test_function.py @@ -0,0 +1,130 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.boolean import coerce_to_array + + +def make_data(): + return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] + + +@pytest.fixture +def dtype(): + return pd.BooleanDtype() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +@pytest.mark.parametrize( + "ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor] +) +def test_ufuncs_binary(ufunc): + # two BooleanArrays + a = pd.array([True, False, None], dtype="boolean") + result = ufunc(a, a) + expected = pd.array(ufunc(a._data, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s, a) + expected = pd.Series(ufunc(a._data, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_series_equal(result, expected) + + # Boolean with numpy array + arr = np.array([True, True, False]) + result = ufunc(a, arr) + expected = pd.array(ufunc(a._data, arr), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + result = ufunc(arr, a) + expected = pd.array(ufunc(arr, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + # BooleanArray with scalar + result = ufunc(a, True) + expected = pd.array(ufunc(a._data, True), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + result = ufunc(True, a) + expected = pd.array(ufunc(True, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + # not handled types + with pytest.raises(TypeError): + ufunc(a, "test") + + +@pytest.mark.parametrize("ufunc", [np.logical_not]) +def test_ufuncs_unary(ufunc): + a = pd.array([True, False, None], dtype="boolean") + result = ufunc(a) + expected = pd.array(ufunc(a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s) + expected = pd.Series(ufunc(a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("values", [[True, False], [True, None]]) +def test_ufunc_reduce_raises(values): + a = pd.array(values, dtype="boolean") + with pytest.raises(NotImplementedError): + np.add.reduce(a) + + +def test_value_counts_na(): + arr = pd.array([True, False, pd.NA], dtype="boolean") + result = arr.value_counts(dropna=False) + expected = pd.Series([1, 1, 1], index=[True, False, pd.NA], dtype="Int64") + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([1, 1], index=[True, False], dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_diff(): + a = pd.array( + [True, True, False, False, True, None, True, None, False], dtype="boolean" + ) + result = pd.core.algorithms.diff(a, 1) + expected = pd.array( + [None, False, True, False, True, None, None, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = s.diff() + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_reductions_return_types(dropna, data, all_numeric_reductions): + op = all_numeric_reductions + s = pd.Series(data) + if dropna: + s = s.dropna() + + if op in ("sum", "prod"): + assert isinstance(getattr(s, op)(), np.int64) + elif op in ("min", "max"): + assert isinstance(getattr(s, op)(), np.bool_) + else: + # "mean", "std", "var", "median", "kurt", "skew" + assert isinstance(getattr(s, op)(), np.float64) diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py new file mode 100644 index 0000000000000..6cfe19e2fe3eb --- /dev/null +++ b/pandas/tests/arrays/boolean/test_logical.py @@ -0,0 +1,230 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import BooleanArray +from pandas.tests.extension.base import BaseOpsUtil + + +class TestLogicalOps(BaseOpsUtil): + def test_numpy_scalars_ok(self, all_logical_operators): + a = pd.array([True, False, None], dtype="boolean") + op = getattr(a, all_logical_operators) + + tm.assert_extension_array_equal(op(True), op(np.bool(True))) + tm.assert_extension_array_equal(op(False), op(np.bool(False))) + + def get_op_from_name(self, op_name): + short_opname = op_name.strip("_") + short_opname = short_opname if "xor" in short_opname else short_opname + "_" + try: + op = getattr(operator, short_opname) + except AttributeError: + # Assume it is the reverse operator + rop = getattr(operator, short_opname[1:]) + op = lambda x, y: rop(y, x) + + return op + + def test_empty_ok(self, all_logical_operators): + a = pd.array([], dtype="boolean") + op_name = all_logical_operators + result = getattr(a, op_name)(True) + tm.assert_extension_array_equal(a, result) + + result = getattr(a, op_name)(False) + tm.assert_extension_array_equal(a, result) + + # TODO: pd.NA + # result = getattr(a, op_name)(pd.NA) + # tm.assert_extension_array_equal(a, result) + + def test_logical_length_mismatch_raises(self, all_logical_operators): + op_name = all_logical_operators + a = pd.array([True, False, None], dtype="boolean") + msg = "Lengths must match to compare" + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)([True, False]) + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)(np.array([True, False])) + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)(pd.array([True, False], dtype="boolean")) + + def test_logical_nan_raises(self, all_logical_operators): + op_name = all_logical_operators + a = pd.array([True, False, None], dtype="boolean") + msg = "Got float instead" + + with pytest.raises(TypeError, match=msg): + getattr(a, op_name)(np.nan) + + @pytest.mark.parametrize("other", ["a", 1]) + def test_non_bool_or_na_other_raises(self, other, all_logical_operators): + a = pd.array([True, False], dtype="boolean") + with pytest.raises(TypeError, match=str(type(other).__name__)): + getattr(a, all_logical_operators)(other) + + def test_kleene_or(self): + # A clear test of behavior. + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a | b + expected = pd.array( + [True, True, True, True, False, None, True, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b | a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [True, None, None]), + (True, [True, True, True]), + (np.bool_(True), [True, True, True]), + (False, [True, False, None]), + (np.bool_(False), [True, False, None]), + ], + ) + def test_kleene_or_scalar(self, other, expected): + # TODO: test True & False + a = pd.array([True, False, None], dtype="boolean") + result = a | other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other | a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + def test_kleene_and(self): + # A clear test of behavior. + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a & b + expected = pd.array( + [True, False, None, False, False, False, None, False, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b & a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [None, False, None]), + (True, [True, False, None]), + (False, [False, False, False]), + (np.bool_(True), [True, False, None]), + (np.bool_(False), [False, False, False]), + ], + ) + def test_kleene_and_scalar(self, other, expected): + a = pd.array([True, False, None], dtype="boolean") + result = a & other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other & a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + def test_kleene_xor(self): + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a ^ b + expected = pd.array( + [False, True, None, True, False, None, None, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b ^ a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [None, None, None]), + (True, [False, True, None]), + (np.bool_(True), [False, True, None]), + (np.bool_(False), [True, False, None]), + ], + ) + def test_kleene_xor_scalar(self, other, expected): + a = pd.array([True, False, None], dtype="boolean") + result = a ^ other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other ^ a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + @pytest.mark.parametrize( + "other", [True, False, pd.NA, [True, False, None] * 3], + ) + def test_no_masked_assumptions(self, other, all_logical_operators): + # The logical operations should not assume that masked values are False! + a = pd.arrays.BooleanArray( + np.array([True, True, True, False, False, False, True, False, True]), + np.array([False] * 6 + [True, True, True]), + ) + b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + if isinstance(other, list): + other = pd.array(other, dtype="boolean") + + result = getattr(a, all_logical_operators)(other) + expected = getattr(b, all_logical_operators)(other) + tm.assert_extension_array_equal(result, expected) + + if isinstance(other, BooleanArray): + other._data[other._mask] = True + a._data[a._mask] = False + + result = getattr(a, all_logical_operators)(other) + expected = getattr(b, all_logical_operators)(other) + tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/arrays/boolean/test_misc.py b/pandas/tests/arrays/boolean/test_misc.py new file mode 100644 index 0000000000000..b6fedcecd94eb --- /dev/null +++ b/pandas/tests/arrays/boolean/test_misc.py @@ -0,0 +1,43 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestUnaryOps: + def test_invert(self): + a = pd.array([True, False, None], dtype="boolean") + expected = pd.array([False, True, None], dtype="boolean") + tm.assert_extension_array_equal(~a, expected) + + expected = pd.Series(expected, index=["a", "b", "c"], name="name") + result = ~pd.Series(a, index=["a", "b", "c"], name="name") + tm.assert_series_equal(result, expected) + + df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"]) + result = ~df + expected = pd.DataFrame( + {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] + ) + tm.assert_frame_equal(result, expected) + + +def test_repr(): + df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) + expected = " A\n0 True\n1 False\n2 " + assert repr(df) == expected + + expected = "0 True\n1 False\n2 \nName: A, dtype: boolean" + assert repr(df.A) == expected + + expected = "\n[True, False, ]\nLength: 3, dtype: boolean" + assert repr(df.A.array) == expected + + +@pytest.mark.parametrize("na", [None, np.nan, pd.NA]) +def test_setitem_missing_values(na): + arr = pd.array([True, False, None], dtype="boolean") + expected = pd.array([True, None, None], dtype="boolean") + arr[1] = na + tm.assert_extension_array_equal(arr, expected) From e0ddb1b15e72a9f2fe323b8dfa280e6927ac7bdc Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 17 Mar 2020 12:34:03 -0500 Subject: [PATCH 02/11] Remove test_boolean.py --- pandas/tests/arrays/test_boolean.py | 936 ---------------------------- 1 file changed, 936 deletions(-) delete mode 100644 pandas/tests/arrays/test_boolean.py diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py deleted file mode 100644 index f4b466f4804c7..0000000000000 --- a/pandas/tests/arrays/test_boolean.py +++ /dev/null @@ -1,936 +0,0 @@ -import operator - -import numpy as np -import pytest - -import pandas.util._test_decorators as td - -import pandas as pd -import pandas._testing as tm -from pandas.arrays import BooleanArray -from pandas.core.arrays.boolean import coerce_to_array -from pandas.tests.extension.base import BaseOpsUtil - - -def make_data(): - return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] - - -@pytest.fixture -def dtype(): - return pd.BooleanDtype() - - -@pytest.fixture -def data(dtype): - return pd.array(make_data(), dtype=dtype) - - -def test_boolean_array_constructor(): - values = np.array([True, False, True, False], dtype="bool") - mask = np.array([False, False, False, True], dtype="bool") - - result = BooleanArray(values, mask) - expected = pd.array([True, False, True, None], dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - with pytest.raises(TypeError, match="values should be boolean numpy array"): - BooleanArray(values.tolist(), mask) - - with pytest.raises(TypeError, match="mask should be boolean numpy array"): - BooleanArray(values, mask.tolist()) - - with pytest.raises(TypeError, match="values should be boolean numpy array"): - BooleanArray(values.astype(int), mask) - - with pytest.raises(TypeError, match="mask should be boolean numpy array"): - BooleanArray(values, None) - - with pytest.raises(ValueError, match="values must be a 1D array"): - BooleanArray(values.reshape(1, -1), mask) - - with pytest.raises(ValueError, match="mask must be a 1D array"): - BooleanArray(values, mask.reshape(1, -1)) - - -def test_boolean_array_constructor_copy(): - values = np.array([True, False, True, False], dtype="bool") - mask = np.array([False, False, False, True], dtype="bool") - - result = BooleanArray(values, mask) - assert result._data is values - assert result._mask is mask - - result = BooleanArray(values, mask, copy=True) - assert result._data is not values - assert result._mask is not mask - - -def test_to_boolean_array(): - expected = BooleanArray( - np.array([True, False, True]), np.array([False, False, False]) - ) - - result = pd.array([True, False, True], dtype="boolean") - tm.assert_extension_array_equal(result, expected) - result = pd.array(np.array([True, False, True]), dtype="boolean") - tm.assert_extension_array_equal(result, expected) - result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - # with missing values - expected = BooleanArray( - np.array([True, False, True]), np.array([False, False, True]) - ) - - result = pd.array([True, False, None], dtype="boolean") - tm.assert_extension_array_equal(result, expected) - result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - -def test_to_boolean_array_all_none(): - expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True])) - - result = pd.array([None, None, None], dtype="boolean") - tm.assert_extension_array_equal(result, expected) - result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - -@pytest.mark.parametrize( - "a, b", - [ - ([True, False, None, np.nan, pd.NA], [True, False, None, None, None]), - ([True, np.nan], [True, None]), - ([True, pd.NA], [True, None]), - ([np.nan, np.nan], [None, None]), - (np.array([np.nan, np.nan], dtype=float), [None, None]), - ], -) -def test_to_boolean_array_missing_indicators(a, b): - result = pd.array(a, dtype="boolean") - expected = pd.array(b, dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - -@pytest.mark.parametrize( - "values", - [ - ["foo", "bar"], - ["1", "2"], - # "foo", - [1, 2], - [1.0, 2.0], - pd.date_range("20130101", periods=2), - np.array(["foo"]), - np.array([1, 2]), - np.array([1.0, 2.0]), - [np.nan, {"a": 1}], - ], -) -def test_to_boolean_array_error(values): - # error in converting existing arrays to BooleanArray - msg = "Need to pass bool-like value" - with pytest.raises(TypeError, match=msg): - pd.array(values, dtype="boolean") - - -def test_to_boolean_array_from_integer_array(): - result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean") - expected = pd.array([True, False, True, False], dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - # with missing values - result = pd.array(np.array([1, 0, 1, None]), dtype="boolean") - expected = pd.array([True, False, True, None], dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - -def test_to_boolean_array_from_float_array(): - result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean") - expected = pd.array([True, False, True, False], dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - # with missing values - result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean") - expected = pd.array([True, False, True, None], dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - -def test_to_boolean_array_integer_like(): - # integers of 0's and 1's - result = pd.array([1, 0, 1, 0], dtype="boolean") - expected = pd.array([True, False, True, False], dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - # with missing values - result = pd.array([1, 0, 1, None], dtype="boolean") - expected = pd.array([True, False, True, None], dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - -def test_coerce_to_array(): - # TODO this is currently not public API - values = np.array([True, False, True, False], dtype="bool") - mask = np.array([False, False, False, True], dtype="bool") - result = BooleanArray(*coerce_to_array(values, mask=mask)) - expected = BooleanArray(values, mask) - tm.assert_extension_array_equal(result, expected) - assert result._data is values - assert result._mask is mask - result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True)) - expected = BooleanArray(values, mask) - tm.assert_extension_array_equal(result, expected) - assert result._data is not values - assert result._mask is not mask - - # mixed missing from values and mask - values = [True, False, None, False] - mask = np.array([False, False, False, True], dtype="bool") - result = BooleanArray(*coerce_to_array(values, mask=mask)) - expected = BooleanArray( - np.array([True, False, True, True]), np.array([False, False, True, True]) - ) - tm.assert_extension_array_equal(result, expected) - result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask)) - tm.assert_extension_array_equal(result, expected) - result = BooleanArray(*coerce_to_array(values, mask=mask.tolist())) - tm.assert_extension_array_equal(result, expected) - - # raise errors for wrong dimension - values = np.array([True, False, True, False], dtype="bool") - mask = np.array([False, False, False, True], dtype="bool") - - with pytest.raises(ValueError, match="values must be a 1D list-like"): - coerce_to_array(values.reshape(1, -1)) - - with pytest.raises(ValueError, match="mask must be a 1D list-like"): - coerce_to_array(values, mask=mask.reshape(1, -1)) - - -def test_coerce_to_array_from_boolean_array(): - # passing BooleanArray to coerce_to_array - values = np.array([True, False, True, False], dtype="bool") - mask = np.array([False, False, False, True], dtype="bool") - arr = BooleanArray(values, mask) - result = BooleanArray(*coerce_to_array(arr)) - tm.assert_extension_array_equal(result, arr) - # no copy - assert result._data is arr._data - assert result._mask is arr._mask - - result = BooleanArray(*coerce_to_array(arr), copy=True) - tm.assert_extension_array_equal(result, arr) - assert result._data is not arr._data - assert result._mask is not arr._mask - - with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"): - coerce_to_array(arr, mask=mask) - - -def test_coerce_to_numpy_array(): - # with missing values -> object dtype - arr = pd.array([True, False, None], dtype="boolean") - result = np.array(arr) - expected = np.array([True, False, pd.NA], dtype="object") - tm.assert_numpy_array_equal(result, expected) - - # also with no missing values -> object dtype - arr = pd.array([True, False, True], dtype="boolean") - result = np.array(arr) - expected = np.array([True, False, True], dtype="object") - tm.assert_numpy_array_equal(result, expected) - - # force bool dtype - result = np.array(arr, dtype="bool") - expected = np.array([True, False, True], dtype="bool") - tm.assert_numpy_array_equal(result, expected) - # with missing values will raise error - arr = pd.array([True, False, None], dtype="boolean") - msg = ( - "cannot convert to 'bool'-dtype NumPy array with missing values. " - "Specify an appropriate 'na_value' for this dtype." - ) - with pytest.raises(ValueError, match=msg): - np.array(arr, dtype="bool") - - -def test_to_boolean_array_from_strings(): - result = BooleanArray._from_sequence_of_strings( - np.array(["True", "False", np.nan], dtype=object) - ) - expected = BooleanArray( - np.array([True, False, False]), np.array([False, False, True]) - ) - - tm.assert_extension_array_equal(result, expected) - - -def test_to_boolean_array_from_strings_invalid_string(): - with pytest.raises(ValueError, match="cannot be cast"): - BooleanArray._from_sequence_of_strings(["donkey"]) - - -def test_repr(): - df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) - expected = " A\n0 True\n1 False\n2 " - assert repr(df) == expected - - expected = "0 True\n1 False\n2 \nName: A, dtype: boolean" - assert repr(df.A) == expected - - expected = "\n[True, False, ]\nLength: 3, dtype: boolean" - assert repr(df.A.array) == expected - - -@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) -def test_to_numpy(box): - con = pd.Series if box else pd.array - # default (with or without missing values) -> object dtype - arr = con([True, False, True], dtype="boolean") - result = arr.to_numpy() - expected = np.array([True, False, True], dtype="object") - tm.assert_numpy_array_equal(result, expected) - - arr = con([True, False, None], dtype="boolean") - result = arr.to_numpy() - expected = np.array([True, False, pd.NA], dtype="object") - tm.assert_numpy_array_equal(result, expected) - - arr = con([True, False, None], dtype="boolean") - result = arr.to_numpy(dtype="str") - expected = np.array([True, False, pd.NA], dtype=" can convert to bool, otherwise raises - arr = con([True, False, True], dtype="boolean") - result = arr.to_numpy(dtype="bool") - expected = np.array([True, False, True], dtype="bool") - tm.assert_numpy_array_equal(result, expected) - - arr = con([True, False, None], dtype="boolean") - with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"): - result = arr.to_numpy(dtype="bool") - - # specify dtype and na_value - arr = con([True, False, None], dtype="boolean") - result = arr.to_numpy(dtype=object, na_value=None) - expected = np.array([True, False, None], dtype="object") - tm.assert_numpy_array_equal(result, expected) - - result = arr.to_numpy(dtype=bool, na_value=False) - expected = np.array([True, False, False], dtype="bool") - tm.assert_numpy_array_equal(result, expected) - - result = arr.to_numpy(dtype="int64", na_value=-99) - expected = np.array([1, 0, -99], dtype="int64") - tm.assert_numpy_array_equal(result, expected) - - result = arr.to_numpy(dtype="float64", na_value=np.nan) - expected = np.array([1, 0, np.nan], dtype="float64") - tm.assert_numpy_array_equal(result, expected) - - # converting to int or float without specifying na_value raises - with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"): - arr.to_numpy(dtype="int64") - with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"): - arr.to_numpy(dtype="float64") - - -def test_to_numpy_copy(): - # to_numpy can be zero-copy if no missing values - arr = pd.array([True, False, True], dtype="boolean") - result = arr.to_numpy(dtype=bool) - result[0] = False - tm.assert_extension_array_equal( - arr, pd.array([False, False, True], dtype="boolean") - ) - - arr = pd.array([True, False, True], dtype="boolean") - result = arr.to_numpy(dtype=bool, copy=True) - result[0] = False - tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean")) - - -def test_astype(): - # with missing values - arr = pd.array([True, False, None], dtype="boolean") - - with pytest.raises(ValueError, match="cannot convert NA to integer"): - arr.astype("int64") - - with pytest.raises(ValueError, match="cannot convert float NaN to"): - arr.astype("bool") - - result = arr.astype("float64") - expected = np.array([1, 0, np.nan], dtype="float64") - tm.assert_numpy_array_equal(result, expected) - - result = arr.astype("str") - expected = np.array(["True", "False", ""], dtype="object") - tm.assert_numpy_array_equal(result, expected) - - # no missing values - arr = pd.array([True, False, True], dtype="boolean") - result = arr.astype("int64") - expected = np.array([1, 0, 1], dtype="int64") - tm.assert_numpy_array_equal(result, expected) - - result = arr.astype("bool") - expected = np.array([True, False, True], dtype="bool") - tm.assert_numpy_array_equal(result, expected) - - -def test_astype_to_boolean_array(): - # astype to BooleanArray - arr = pd.array([True, False, None], dtype="boolean") - - result = arr.astype("boolean") - tm.assert_extension_array_equal(result, arr) - result = arr.astype(pd.BooleanDtype()) - tm.assert_extension_array_equal(result, arr) - - -def test_astype_to_integer_array(): - # astype to IntegerArray - arr = pd.array([True, False, None], dtype="boolean") - - result = arr.astype("Int64") - expected = pd.array([1, 0, None], dtype="Int64") - tm.assert_extension_array_equal(result, expected) - - -@pytest.mark.parametrize("na", [None, np.nan, pd.NA]) -def test_setitem_missing_values(na): - arr = pd.array([True, False, None], dtype="boolean") - expected = pd.array([True, None, None], dtype="boolean") - arr[1] = na - tm.assert_extension_array_equal(arr, expected) - - -@pytest.mark.parametrize( - "ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor] -) -def test_ufuncs_binary(ufunc): - # two BooleanArrays - a = pd.array([True, False, None], dtype="boolean") - result = ufunc(a, a) - expected = pd.array(ufunc(a._data, a._data), dtype="boolean") - expected[a._mask] = np.nan - tm.assert_extension_array_equal(result, expected) - - s = pd.Series(a) - result = ufunc(s, a) - expected = pd.Series(ufunc(a._data, a._data), dtype="boolean") - expected[a._mask] = np.nan - tm.assert_series_equal(result, expected) - - # Boolean with numpy array - arr = np.array([True, True, False]) - result = ufunc(a, arr) - expected = pd.array(ufunc(a._data, arr), dtype="boolean") - expected[a._mask] = np.nan - tm.assert_extension_array_equal(result, expected) - - result = ufunc(arr, a) - expected = pd.array(ufunc(arr, a._data), dtype="boolean") - expected[a._mask] = np.nan - tm.assert_extension_array_equal(result, expected) - - # BooleanArray with scalar - result = ufunc(a, True) - expected = pd.array(ufunc(a._data, True), dtype="boolean") - expected[a._mask] = np.nan - tm.assert_extension_array_equal(result, expected) - - result = ufunc(True, a) - expected = pd.array(ufunc(True, a._data), dtype="boolean") - expected[a._mask] = np.nan - tm.assert_extension_array_equal(result, expected) - - # not handled types - with pytest.raises(TypeError): - ufunc(a, "test") - - -@pytest.mark.parametrize("ufunc", [np.logical_not]) -def test_ufuncs_unary(ufunc): - a = pd.array([True, False, None], dtype="boolean") - result = ufunc(a) - expected = pd.array(ufunc(a._data), dtype="boolean") - expected[a._mask] = np.nan - tm.assert_extension_array_equal(result, expected) - - s = pd.Series(a) - result = ufunc(s) - expected = pd.Series(ufunc(a._data), dtype="boolean") - expected[a._mask] = np.nan - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize("values", [[True, False], [True, None]]) -def test_ufunc_reduce_raises(values): - a = pd.array(values, dtype="boolean") - with pytest.raises(NotImplementedError): - np.add.reduce(a) - - -class TestUnaryOps: - def test_invert(self): - a = pd.array([True, False, None], dtype="boolean") - expected = pd.array([False, True, None], dtype="boolean") - tm.assert_extension_array_equal(~a, expected) - - expected = pd.Series(expected, index=["a", "b", "c"], name="name") - result = ~pd.Series(a, index=["a", "b", "c"], name="name") - tm.assert_series_equal(result, expected) - - df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"]) - result = ~df - expected = pd.DataFrame( - {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] - ) - tm.assert_frame_equal(result, expected) - - -class TestLogicalOps(BaseOpsUtil): - def test_numpy_scalars_ok(self, all_logical_operators): - a = pd.array([True, False, None], dtype="boolean") - op = getattr(a, all_logical_operators) - - tm.assert_extension_array_equal(op(True), op(np.bool(True))) - tm.assert_extension_array_equal(op(False), op(np.bool(False))) - - def get_op_from_name(self, op_name): - short_opname = op_name.strip("_") - short_opname = short_opname if "xor" in short_opname else short_opname + "_" - try: - op = getattr(operator, short_opname) - except AttributeError: - # Assume it is the reverse operator - rop = getattr(operator, short_opname[1:]) - op = lambda x, y: rop(y, x) - - return op - - def test_empty_ok(self, all_logical_operators): - a = pd.array([], dtype="boolean") - op_name = all_logical_operators - result = getattr(a, op_name)(True) - tm.assert_extension_array_equal(a, result) - - result = getattr(a, op_name)(False) - tm.assert_extension_array_equal(a, result) - - # TODO: pd.NA - # result = getattr(a, op_name)(pd.NA) - # tm.assert_extension_array_equal(a, result) - - def test_logical_length_mismatch_raises(self, all_logical_operators): - op_name = all_logical_operators - a = pd.array([True, False, None], dtype="boolean") - msg = "Lengths must match to compare" - - with pytest.raises(ValueError, match=msg): - getattr(a, op_name)([True, False]) - - with pytest.raises(ValueError, match=msg): - getattr(a, op_name)(np.array([True, False])) - - with pytest.raises(ValueError, match=msg): - getattr(a, op_name)(pd.array([True, False], dtype="boolean")) - - def test_logical_nan_raises(self, all_logical_operators): - op_name = all_logical_operators - a = pd.array([True, False, None], dtype="boolean") - msg = "Got float instead" - - with pytest.raises(TypeError, match=msg): - getattr(a, op_name)(np.nan) - - @pytest.mark.parametrize("other", ["a", 1]) - def test_non_bool_or_na_other_raises(self, other, all_logical_operators): - a = pd.array([True, False], dtype="boolean") - with pytest.raises(TypeError, match=str(type(other).__name__)): - getattr(a, all_logical_operators)(other) - - def test_kleene_or(self): - # A clear test of behavior. - a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") - b = pd.array([True, False, None] * 3, dtype="boolean") - result = a | b - expected = pd.array( - [True, True, True, True, False, None, True, None, None], dtype="boolean" - ) - tm.assert_extension_array_equal(result, expected) - - result = b | a - tm.assert_extension_array_equal(result, expected) - - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") - ) - tm.assert_extension_array_equal( - b, pd.array([True, False, None] * 3, dtype="boolean") - ) - - @pytest.mark.parametrize( - "other, expected", - [ - (pd.NA, [True, None, None]), - (True, [True, True, True]), - (np.bool_(True), [True, True, True]), - (False, [True, False, None]), - (np.bool_(False), [True, False, None]), - ], - ) - def test_kleene_or_scalar(self, other, expected): - # TODO: test True & False - a = pd.array([True, False, None], dtype="boolean") - result = a | other - expected = pd.array(expected, dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - result = other | a - tm.assert_extension_array_equal(result, expected) - - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True, False, None], dtype="boolean") - ) - - def test_kleene_and(self): - # A clear test of behavior. - a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") - b = pd.array([True, False, None] * 3, dtype="boolean") - result = a & b - expected = pd.array( - [True, False, None, False, False, False, None, False, None], dtype="boolean" - ) - tm.assert_extension_array_equal(result, expected) - - result = b & a - tm.assert_extension_array_equal(result, expected) - - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") - ) - tm.assert_extension_array_equal( - b, pd.array([True, False, None] * 3, dtype="boolean") - ) - - @pytest.mark.parametrize( - "other, expected", - [ - (pd.NA, [None, False, None]), - (True, [True, False, None]), - (False, [False, False, False]), - (np.bool_(True), [True, False, None]), - (np.bool_(False), [False, False, False]), - ], - ) - def test_kleene_and_scalar(self, other, expected): - a = pd.array([True, False, None], dtype="boolean") - result = a & other - expected = pd.array(expected, dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - result = other & a - tm.assert_extension_array_equal(result, expected) - - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True, False, None], dtype="boolean") - ) - - def test_kleene_xor(self): - a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") - b = pd.array([True, False, None] * 3, dtype="boolean") - result = a ^ b - expected = pd.array( - [False, True, None, True, False, None, None, None, None], dtype="boolean" - ) - tm.assert_extension_array_equal(result, expected) - - result = b ^ a - tm.assert_extension_array_equal(result, expected) - - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") - ) - tm.assert_extension_array_equal( - b, pd.array([True, False, None] * 3, dtype="boolean") - ) - - @pytest.mark.parametrize( - "other, expected", - [ - (pd.NA, [None, None, None]), - (True, [False, True, None]), - (np.bool_(True), [False, True, None]), - (np.bool_(False), [True, False, None]), - ], - ) - def test_kleene_xor_scalar(self, other, expected): - a = pd.array([True, False, None], dtype="boolean") - result = a ^ other - expected = pd.array(expected, dtype="boolean") - tm.assert_extension_array_equal(result, expected) - - result = other ^ a - tm.assert_extension_array_equal(result, expected) - - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True, False, None], dtype="boolean") - ) - - @pytest.mark.parametrize( - "other", [True, False, pd.NA, [True, False, None] * 3], - ) - def test_no_masked_assumptions(self, other, all_logical_operators): - # The logical operations should not assume that masked values are False! - a = pd.arrays.BooleanArray( - np.array([True, True, True, False, False, False, True, False, True]), - np.array([False] * 6 + [True, True, True]), - ) - b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") - if isinstance(other, list): - other = pd.array(other, dtype="boolean") - - result = getattr(a, all_logical_operators)(other) - expected = getattr(b, all_logical_operators)(other) - tm.assert_extension_array_equal(result, expected) - - if isinstance(other, BooleanArray): - other._data[other._mask] = True - a._data[a._mask] = False - - result = getattr(a, all_logical_operators)(other) - expected = getattr(b, all_logical_operators)(other) - tm.assert_extension_array_equal(result, expected) - - -class TestComparisonOps(BaseOpsUtil): - def _compare_other(self, data, op_name, other): - op = self.get_op_from_name(op_name) - - # array - result = pd.Series(op(data, other)) - expected = pd.Series(op(data._data, other), dtype="boolean") - # propagate NAs - expected[data._mask] = pd.NA - - tm.assert_series_equal(result, expected) - - # series - s = pd.Series(data) - result = op(s, other) - - expected = pd.Series(data._data) - expected = op(expected, other) - expected = expected.astype("boolean") - # propagate NAs - expected[data._mask] = pd.NA - - tm.assert_series_equal(result, expected) - - def test_compare_scalar(self, data, all_compare_operators): - op_name = all_compare_operators - self._compare_other(data, op_name, True) - - def test_compare_array(self, data, all_compare_operators): - op_name = all_compare_operators - other = pd.array([True] * len(data), dtype="boolean") - self._compare_other(data, op_name, other) - other = np.array([True] * len(data)) - self._compare_other(data, op_name, other) - other = pd.Series([True] * len(data)) - self._compare_other(data, op_name, other) - - @pytest.mark.parametrize("other", [True, False, pd.NA]) - def test_scalar(self, other, all_compare_operators): - op = self.get_op_from_name(all_compare_operators) - a = pd.array([True, False, None], dtype="boolean") - - result = op(a, other) - - if other is pd.NA: - expected = pd.array([None, None, None], dtype="boolean") - else: - values = op(a._data, other) - expected = BooleanArray(values, a._mask, copy=True) - tm.assert_extension_array_equal(result, expected) - - # ensure we haven't mutated anything inplace - result[0] = None - tm.assert_extension_array_equal( - a, pd.array([True, False, None], dtype="boolean") - ) - - def test_array(self, all_compare_operators): - op = self.get_op_from_name(all_compare_operators) - a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") - b = pd.array([True, False, None] * 3, dtype="boolean") - - result = op(a, b) - - values = op(a._data, b._data) - mask = a._mask | b._mask - expected = BooleanArray(values, mask) - tm.assert_extension_array_equal(result, expected) - - # ensure we haven't mutated anything inplace - result[0] = None - tm.assert_extension_array_equal( - a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") - ) - tm.assert_extension_array_equal( - b, pd.array([True, False, None] * 3, dtype="boolean") - ) - - -class TestArithmeticOps(BaseOpsUtil): - def test_error(self, data, all_arithmetic_operators): - # invalid ops - - op = all_arithmetic_operators - s = pd.Series(data) - ops = getattr(s, op) - opa = getattr(data, op) - - # invalid scalars - with pytest.raises(TypeError): - ops("foo") - with pytest.raises(TypeError): - ops(pd.Timestamp("20180101")) - - # invalid array-likes - if op not in ("__mul__", "__rmul__"): - # TODO(extension) numpy's mul with object array sees booleans as numbers - with pytest.raises(TypeError): - ops(pd.Series("foo", index=s.index)) - - # 2d - result = opa(pd.DataFrame({"A": s})) - assert result is NotImplemented - - with pytest.raises(NotImplementedError): - opa(np.arange(len(s)).reshape(-1, len(s))) - - -@pytest.mark.parametrize("dropna", [True, False]) -def test_reductions_return_types(dropna, data, all_numeric_reductions): - op = all_numeric_reductions - s = pd.Series(data) - if dropna: - s = s.dropna() - - if op in ("sum", "prod"): - assert isinstance(getattr(s, op)(), np.int64) - elif op in ("min", "max"): - assert isinstance(getattr(s, op)(), np.bool_) - else: - # "mean", "std", "var", "median", "kurt", "skew" - assert isinstance(getattr(s, op)(), np.float64) - - -@pytest.mark.parametrize( - "values, exp_any, exp_all, exp_any_noskip, exp_all_noskip", - [ - ([True, pd.NA], True, True, True, pd.NA), - ([False, pd.NA], False, False, pd.NA, False), - ([pd.NA], False, True, pd.NA, pd.NA), - ([], False, True, False, True), - ], -) -def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): - # the methods return numpy scalars - exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) - exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) - exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) - exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) - - for con in [pd.array, pd.Series]: - a = con(values, dtype="boolean") - assert a.any() is exp_any - assert a.all() is exp_all - assert a.any(skipna=False) is exp_any_noskip - assert a.all(skipna=False) is exp_all_noskip - - assert np.any(a.any()) is exp_any - assert np.all(a.all()) is exp_all - - -# TODO when BooleanArray coerces to object dtype numpy array, need to do conversion -# manually in the indexing code -# def test_indexing_boolean_mask(): -# arr = pd.array([1, 2, 3, 4], dtype="Int64") -# mask = pd.array([True, False, True, False], dtype="boolean") -# result = arr[mask] -# expected = pd.array([1, 3], dtype="Int64") -# tm.assert_extension_array_equal(result, expected) - -# # missing values -> error -# mask = pd.array([True, False, True, None], dtype="boolean") -# with pytest.raises(IndexError): -# result = arr[mask] - - -@td.skip_if_no("pyarrow", min_version="0.15.0") -def test_arrow_array(data): - # protocol added in 0.15.0 - import pyarrow as pa - - arr = pa.array(data) - - # TODO use to_numpy(na_value=None) here - data_object = np.array(data, dtype=object) - data_object[data.isna()] = None - expected = pa.array(data_object, type=pa.bool_(), from_pandas=True) - assert arr.equals(expected) - - -@td.skip_if_no("pyarrow", min_version="0.15.1.dev") -def test_arrow_roundtrip(): - # roundtrip possible from arrow 1.0.0 - import pyarrow as pa - - data = pd.array([True, False, None], dtype="boolean") - df = pd.DataFrame({"a": data}) - table = pa.table(df) - assert table.field("a").type == "bool" - result = table.to_pandas() - assert isinstance(result["a"].dtype, pd.BooleanDtype) - tm.assert_frame_equal(result, df) - - -def test_value_counts_na(): - arr = pd.array([True, False, pd.NA], dtype="boolean") - result = arr.value_counts(dropna=False) - expected = pd.Series([1, 1, 1], index=[True, False, pd.NA], dtype="Int64") - tm.assert_series_equal(result, expected) - - result = arr.value_counts(dropna=True) - expected = pd.Series([1, 1], index=[True, False], dtype="Int64") - tm.assert_series_equal(result, expected) - - -def test_diff(): - a = pd.array( - [True, True, False, False, True, None, True, None, False], dtype="boolean" - ) - result = pd.core.algorithms.diff(a, 1) - expected = pd.array( - [None, False, True, False, True, None, None, None, None], dtype="boolean" - ) - tm.assert_extension_array_equal(result, expected) - - s = pd.Series(a) - result = s.diff() - expected = pd.Series(expected) - tm.assert_series_equal(result, expected) From 1887e966311d9dd8553ca348a66b35d59ed069ce Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 17 Mar 2020 13:37:31 -0500 Subject: [PATCH 03/11] Don't import unused --- pandas/tests/arrays/boolean/test_function.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py index 13a7eb3a789fa..6c1cbc46c2d09 100644 --- a/pandas/tests/arrays/boolean/test_function.py +++ b/pandas/tests/arrays/boolean/test_function.py @@ -3,7 +3,6 @@ import pandas as pd import pandas._testing as tm -from pandas.core.arrays.boolean import coerce_to_array def make_data(): From 189d53dd3fbe72412ba275184aa38126221f289b Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 17 Mar 2020 14:46:21 -0500 Subject: [PATCH 04/11] Add new test files --- pandas/tests/arrays/boolean/test_reduction.py | 61 +++++++++++++++++++ pandas/tests/arrays/boolean/test_repr.py | 17 ++++++ pandas/tests/arrays/boolean/test_setitem.py | 13 ++++ pandas/tests/arrays/boolean/test_unary_ops.py | 23 +++++++ 4 files changed, 114 insertions(+) create mode 100644 pandas/tests/arrays/boolean/test_reduction.py create mode 100644 pandas/tests/arrays/boolean/test_repr.py create mode 100644 pandas/tests/arrays/boolean/test_setitem.py create mode 100644 pandas/tests/arrays/boolean/test_unary_ops.py diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py new file mode 100644 index 0000000000000..9c17ddc73a71f --- /dev/null +++ b/pandas/tests/arrays/boolean/test_reduction.py @@ -0,0 +1,61 @@ +import numpy as np +import pytest + +import pandas as pd + + +def make_data(): + return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] + + +@pytest.fixture +def dtype(): + return pd.BooleanDtype() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +@pytest.mark.parametrize( + "values, exp_any, exp_all, exp_any_noskip, exp_all_noskip", + [ + ([True, pd.NA], True, True, True, pd.NA), + ([False, pd.NA], False, False, pd.NA, False), + ([pd.NA], False, True, pd.NA, pd.NA), + ([], False, True, False, True), + ], +) +def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): + # the methods return numpy scalars + exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) + exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) + exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) + exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) + + for con in [pd.array, pd.Series]: + a = con(values, dtype="boolean") + assert a.any() is exp_any + assert a.all() is exp_all + assert a.any(skipna=False) is exp_any_noskip + assert a.all(skipna=False) is exp_all_noskip + + assert np.any(a.any()) is exp_any + assert np.all(a.all()) is exp_all + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_reductions_return_types(dropna, data, all_numeric_reductions): + op = all_numeric_reductions + s = pd.Series(data) + if dropna: + s = s.dropna() + + if op in ("sum", "prod"): + assert isinstance(getattr(s, op)(), np.int64) + elif op in ("min", "max"): + assert isinstance(getattr(s, op)(), np.bool_) + else: + # "mean", "std", "var", "median", "kurt", "skew" + assert isinstance(getattr(s, op)(), np.float64) diff --git a/pandas/tests/arrays/boolean/test_repr.py b/pandas/tests/arrays/boolean/test_repr.py new file mode 100644 index 0000000000000..964c05914424c --- /dev/null +++ b/pandas/tests/arrays/boolean/test_repr.py @@ -0,0 +1,17 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_repr(): + df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) + expected = " A\n0 True\n1 False\n2 " + assert repr(df) == expected + + expected = "0 True\n1 False\n2 \nName: A, dtype: boolean" + assert repr(df.A) == expected + + expected = "\n[True, False, ]\nLength: 3, dtype: boolean" + assert repr(df.A.array) == expected diff --git a/pandas/tests/arrays/boolean/test_setitem.py b/pandas/tests/arrays/boolean/test_setitem.py new file mode 100644 index 0000000000000..6a7daea16963c --- /dev/null +++ b/pandas/tests/arrays/boolean/test_setitem.py @@ -0,0 +1,13 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("na", [None, np.nan, pd.NA]) +def test_setitem_missing_values(na): + arr = pd.array([True, False, None], dtype="boolean") + expected = pd.array([True, None, None], dtype="boolean") + arr[1] = na + tm.assert_extension_array_equal(arr, expected) diff --git a/pandas/tests/arrays/boolean/test_unary_ops.py b/pandas/tests/arrays/boolean/test_unary_ops.py new file mode 100644 index 0000000000000..6b9e97b3c6687 --- /dev/null +++ b/pandas/tests/arrays/boolean/test_unary_ops.py @@ -0,0 +1,23 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestUnaryOps: + def test_invert(self): + a = pd.array([True, False, None], dtype="boolean") + expected = pd.array([False, True, None], dtype="boolean") + tm.assert_extension_array_equal(~a, expected) + + expected = pd.Series(expected, index=["a", "b", "c"], name="name") + result = ~pd.Series(a, index=["a", "b", "c"], name="name") + tm.assert_series_equal(result, expected) + + df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"]) + result = ~df + expected = pd.DataFrame( + {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] + ) + tm.assert_frame_equal(result, expected) From 42dd4c2e9aa1eb49e3f221249ecb838ca05734b1 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 17 Mar 2020 14:47:35 -0500 Subject: [PATCH 05/11] Fixme --- pandas/tests/arrays/boolean/test_construction.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index 8142b79a7bfce..f2190c0f1dabb 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -338,6 +338,7 @@ def test_to_numpy_copy(): tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean")) +# FIXME: don't leave commented out # TODO when BooleanArray coerces to object dtype numpy array, need to do conversion # manually in the indexing code # def test_indexing_boolean_mask(): From d45764863e484b3b3b6dbd296b96d975387552b6 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 17 Mar 2020 14:48:07 -0500 Subject: [PATCH 06/11] Move to reductions --- pandas/tests/arrays/boolean/test_function.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py index 6c1cbc46c2d09..4ad365d5d32c5 100644 --- a/pandas/tests/arrays/boolean/test_function.py +++ b/pandas/tests/arrays/boolean/test_function.py @@ -111,19 +111,3 @@ def test_diff(): result = s.diff() expected = pd.Series(expected) tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize("dropna", [True, False]) -def test_reductions_return_types(dropna, data, all_numeric_reductions): - op = all_numeric_reductions - s = pd.Series(data) - if dropna: - s = s.dropna() - - if op in ("sum", "prod"): - assert isinstance(getattr(s, op)(), np.int64) - elif op in ("min", "max"): - assert isinstance(getattr(s, op)(), np.bool_) - else: - # "mean", "std", "var", "median", "kurt", "skew" - assert isinstance(getattr(s, op)(), np.float64) From d2d4e10d50a15322dcbce98422b5761e9612f06a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 17 Mar 2020 14:48:33 -0500 Subject: [PATCH 07/11] Move to reductions --- .../tests/arrays/boolean/test_comparison.py | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/pandas/tests/arrays/boolean/test_comparison.py b/pandas/tests/arrays/boolean/test_comparison.py index 40bebcc0c7fbf..6fcbcfc9b0fc8 100644 --- a/pandas/tests/arrays/boolean/test_comparison.py +++ b/pandas/tests/arrays/boolean/test_comparison.py @@ -98,30 +98,3 @@ def test_array(self, all_compare_operators): tm.assert_extension_array_equal( b, pd.array([True, False, None] * 3, dtype="boolean") ) - - -@pytest.mark.parametrize( - "values, exp_any, exp_all, exp_any_noskip, exp_all_noskip", - [ - ([True, pd.NA], True, True, True, pd.NA), - ([False, pd.NA], False, False, pd.NA, False), - ([pd.NA], False, True, pd.NA, pd.NA), - ([], False, True, False, True), - ], -) -def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): - # the methods return numpy scalars - exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) - exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) - exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) - exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) - - for con in [pd.array, pd.Series]: - a = con(values, dtype="boolean") - assert a.any() is exp_any - assert a.all() is exp_all - assert a.any(skipna=False) is exp_any_noskip - assert a.all(skipna=False) is exp_all_noskip - - assert np.any(a.any()) is exp_any - assert np.all(a.all()) is exp_all From 39c1ef2dbfcbc26dd0eba63408b15f4d543bb423 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 17 Mar 2020 14:49:16 -0500 Subject: [PATCH 08/11] Delete test_misc.py --- pandas/tests/arrays/boolean/test_misc.py | 43 ------------------------ 1 file changed, 43 deletions(-) delete mode 100644 pandas/tests/arrays/boolean/test_misc.py diff --git a/pandas/tests/arrays/boolean/test_misc.py b/pandas/tests/arrays/boolean/test_misc.py deleted file mode 100644 index b6fedcecd94eb..0000000000000 --- a/pandas/tests/arrays/boolean/test_misc.py +++ /dev/null @@ -1,43 +0,0 @@ -import numpy as np -import pytest - -import pandas as pd -import pandas._testing as tm - - -class TestUnaryOps: - def test_invert(self): - a = pd.array([True, False, None], dtype="boolean") - expected = pd.array([False, True, None], dtype="boolean") - tm.assert_extension_array_equal(~a, expected) - - expected = pd.Series(expected, index=["a", "b", "c"], name="name") - result = ~pd.Series(a, index=["a", "b", "c"], name="name") - tm.assert_series_equal(result, expected) - - df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"]) - result = ~df - expected = pd.DataFrame( - {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] - ) - tm.assert_frame_equal(result, expected) - - -def test_repr(): - df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) - expected = " A\n0 True\n1 False\n2 " - assert repr(df) == expected - - expected = "0 True\n1 False\n2 \nName: A, dtype: boolean" - assert repr(df.A) == expected - - expected = "\n[True, False, ]\nLength: 3, dtype: boolean" - assert repr(df.A.array) == expected - - -@pytest.mark.parametrize("na", [None, np.nan, pd.NA]) -def test_setitem_missing_values(na): - arr = pd.array([True, False, None], dtype="boolean") - expected = pd.array([True, None, None], dtype="boolean") - arr[1] = na - tm.assert_extension_array_equal(arr, expected) From bb003fe77cf90ed58690916b83c59b01352d7b11 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 17 Mar 2020 14:56:32 -0500 Subject: [PATCH 09/11] Don't import unused --- pandas/tests/arrays/boolean/test_repr.py | 4 ---- pandas/tests/arrays/boolean/test_unary_ops.py | 3 --- 2 files changed, 7 deletions(-) diff --git a/pandas/tests/arrays/boolean/test_repr.py b/pandas/tests/arrays/boolean/test_repr.py index 964c05914424c..0ee904b18cc9e 100644 --- a/pandas/tests/arrays/boolean/test_repr.py +++ b/pandas/tests/arrays/boolean/test_repr.py @@ -1,8 +1,4 @@ -import numpy as np -import pytest - import pandas as pd -import pandas._testing as tm def test_repr(): diff --git a/pandas/tests/arrays/boolean/test_unary_ops.py b/pandas/tests/arrays/boolean/test_unary_ops.py index 6b9e97b3c6687..52f602258a049 100644 --- a/pandas/tests/arrays/boolean/test_unary_ops.py +++ b/pandas/tests/arrays/boolean/test_unary_ops.py @@ -1,6 +1,3 @@ -import numpy as np -import pytest - import pandas as pd import pandas._testing as tm From 33c35ebfe07502298f052b21bcbadfacc679e419 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 17 Mar 2020 15:01:03 -0500 Subject: [PATCH 10/11] Replace bloated fixture --- pandas/tests/arrays/boolean/test_arithmetic.py | 16 +++++----------- pandas/tests/arrays/boolean/test_comparison.py | 16 +++++----------- pandas/tests/arrays/boolean/test_construction.py | 16 +++++----------- pandas/tests/arrays/boolean/test_function.py | 16 +++++----------- pandas/tests/arrays/boolean/test_reduction.py | 16 +++++----------- 5 files changed, 25 insertions(+), 55 deletions(-) diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py index e6df27be5b5f4..df4c218cbf9bf 100644 --- a/pandas/tests/arrays/boolean/test_arithmetic.py +++ b/pandas/tests/arrays/boolean/test_arithmetic.py @@ -5,18 +5,12 @@ from pandas.tests.extension.base import BaseOpsUtil -def make_data(): - return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] - - -@pytest.fixture -def dtype(): - return pd.BooleanDtype() - - @pytest.fixture -def data(dtype): - return pd.array(make_data(), dtype=dtype) +def data(): + return pd.array( + [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], + dtype="boolean", + ) class TestArithmeticOps(BaseOpsUtil): diff --git a/pandas/tests/arrays/boolean/test_comparison.py b/pandas/tests/arrays/boolean/test_comparison.py index 6fcbcfc9b0fc8..726b78fbd43bd 100644 --- a/pandas/tests/arrays/boolean/test_comparison.py +++ b/pandas/tests/arrays/boolean/test_comparison.py @@ -7,18 +7,12 @@ from pandas.tests.extension.base import BaseOpsUtil -def make_data(): - return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] - - -@pytest.fixture -def dtype(): - return pd.BooleanDtype() - - @pytest.fixture -def data(dtype): - return pd.array(make_data(), dtype=dtype) +def data(): + return pd.array( + [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], + dtype="boolean", + ) class TestComparisonOps(BaseOpsUtil): diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index f2190c0f1dabb..bf1aba190f3e2 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -9,18 +9,12 @@ from pandas.core.arrays.boolean import coerce_to_array -def make_data(): - return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] - - -@pytest.fixture -def dtype(): - return pd.BooleanDtype() - - @pytest.fixture -def data(dtype): - return pd.array(make_data(), dtype=dtype) +def data(): + return pd.array( + [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], + dtype="boolean", + ) def test_boolean_array_constructor(): diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py index 4ad365d5d32c5..c2987dc37b960 100644 --- a/pandas/tests/arrays/boolean/test_function.py +++ b/pandas/tests/arrays/boolean/test_function.py @@ -5,18 +5,12 @@ import pandas._testing as tm -def make_data(): - return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] - - -@pytest.fixture -def dtype(): - return pd.BooleanDtype() - - @pytest.fixture -def data(dtype): - return pd.array(make_data(), dtype=dtype) +def data(): + return pd.array( + [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], + dtype="boolean", + ) @pytest.mark.parametrize( diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py index 9c17ddc73a71f..7a8146ef14de0 100644 --- a/pandas/tests/arrays/boolean/test_reduction.py +++ b/pandas/tests/arrays/boolean/test_reduction.py @@ -4,18 +4,12 @@ import pandas as pd -def make_data(): - return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] - - -@pytest.fixture -def dtype(): - return pd.BooleanDtype() - - @pytest.fixture -def data(dtype): - return pd.array(make_data(), dtype=dtype) +def data(): + return pd.array( + [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], + dtype="boolean", + ) @pytest.mark.parametrize( From 13e69963b99a52573644d5e191f2bef26af8ee07 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 18 Mar 2020 10:32:52 -0500 Subject: [PATCH 11/11] Rename --- pandas/tests/arrays/boolean/{test_setitem.py => test_indexing.py} | 0 pandas/tests/arrays/boolean/{test_unary_ops.py => test_ops.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename pandas/tests/arrays/boolean/{test_setitem.py => test_indexing.py} (100%) rename pandas/tests/arrays/boolean/{test_unary_ops.py => test_ops.py} (100%) diff --git a/pandas/tests/arrays/boolean/test_setitem.py b/pandas/tests/arrays/boolean/test_indexing.py similarity index 100% rename from pandas/tests/arrays/boolean/test_setitem.py rename to pandas/tests/arrays/boolean/test_indexing.py diff --git a/pandas/tests/arrays/boolean/test_unary_ops.py b/pandas/tests/arrays/boolean/test_ops.py similarity index 100% rename from pandas/tests/arrays/boolean/test_unary_ops.py rename to pandas/tests/arrays/boolean/test_ops.py