From 5f002db4661f8d637312862b5aabaed19e43fb46 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 19 Nov 2022 11:38:25 +0000 Subject: [PATCH 1/3] TST: add tests/arrays/interval/test_constructors.py --- .../arrays/interval/test_constructors.py | 483 ++++++++++++++++++ 1 file changed, 483 insertions(+) create mode 100644 pandas/tests/arrays/interval/test_constructors.py diff --git a/pandas/tests/arrays/interval/test_constructors.py b/pandas/tests/arrays/interval/test_constructors.py new file mode 100644 index 0000000000000..bc2cf439882a9 --- /dev/null +++ b/pandas/tests/arrays/interval/test_constructors.py @@ -0,0 +1,483 @@ +from functools import partial + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_categorical_dtype +from pandas.core.dtypes.dtypes import IntervalDtype + +from pandas import ( + Categorical, + CategoricalIndex, + Index, + Interval, + Series, + date_range, + notna, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.api.types import is_unsigned_integer_dtype +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) +from pandas.core.arrays import IntervalArray +import pandas.core.common as com + + +@pytest.fixture(params=[None, "foo"]) +def name(request): + return request.param + + +class ConstructorTests: + """ + Common tests for all variations of IntervalArray construction. Input data + to be supplied in breaks format, then converted by the subclass method + get_kwargs_from_breaks to the expected format. + """ + + # get_kwargs_from_breaks in TestFromTuples and TestClassconstructors just return + # tuples of ints, so IntervalArray can't know the original dtype was uint + _use_dtype_in_test_constructor_uint = False + + @pytest.mark.parametrize( + "breaks", + [ + [3, 14, 15, 92, 653], + np.arange(10, dtype="int64"), + np.arange(10, dtype="uint64"), + np.arange(20, 30, 0.5), + date_range("20180101", periods=10), + date_range("20180101", periods=10, tz="US/Eastern"), + timedelta_range("1 day", periods=10), + ], + ) + @pytest.mark.parametrize("use_dtype", [True, False]) + def test_constructor(self, constructor, breaks, closed, use_dtype): + breaks_dtype = getattr(breaks, "dtype", "int64") + result_kwargs = self.get_kwargs_from_breaks(breaks, closed) + is_uint = is_unsigned_integer_dtype(breaks_dtype) + if use_dtype or (self._use_dtype_in_test_constructor_uint and is_uint): + result_kwargs["dtype"] = IntervalDtype(breaks_dtype, closed=closed) + + result = constructor(closed=closed, **result_kwargs) + + assert result.closed == closed + assert result.dtype.subtype == breaks_dtype + tm.assert_index_equal(result.left, Index(breaks[:-1])) + tm.assert_index_equal(result.right, Index(breaks[1:])) + + @pytest.mark.parametrize( + "breaks, subtype", + [ + (Int64Index([0, 1, 2, 3, 4]), "float64"), + (Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"), + (Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"), + (Float64Index([0, 1, 2, 3, 4]), "int64"), + (date_range("2017-01-01", periods=5), "int64"), + (timedelta_range("1 day", periods=5), "int64"), + ], + ) + def test_constructor_dtype(self, constructor, breaks, subtype): + # GH 19262: conversion via dtype parameter + expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) + expected = constructor(**expected_kwargs) + + result_kwargs = self.get_kwargs_from_breaks(breaks) + iv_dtype = IntervalDtype(subtype, "right") + for dtype in (iv_dtype, str(iv_dtype)): + result = constructor(dtype=dtype, **result_kwargs) + tm.assert_interval_array_equal(result, expected) + + @pytest.mark.parametrize( + "breaks", + [ + Int64Index([0, 1, 2, 3, 4]), + UInt64Index([0, 1, 2, 3, 4]), + Float64Index([0, 1, 2, 3, 4]), + date_range("2017-01-01", periods=5), + timedelta_range("1 day", periods=5), + ], + ) + def test_constructor_pass_closed(self, constructor, breaks): + # not passing closed to IntervalDtype, but to IntervalArray constructor + iv_dtype = IntervalDtype(breaks.dtype) + + result_kwargs = self.get_kwargs_from_breaks(breaks) + + for dtype in (iv_dtype, str(iv_dtype)): + with tm.assert_produces_warning(None): + + result = constructor(dtype=dtype, closed="left", **result_kwargs) + assert result.dtype.closed == "left" + + @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) + def test_constructor_nan(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_subtype = np.float64 + expected_values = np.array(breaks[:-1], dtype=object) + + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(np.array(result), expected_values) + + @pytest.mark.parametrize( + "breaks", + [ + [], + np.array([], dtype="int64"), + np.array([], dtype="uint64"), + np.array([], dtype="float64"), + np.array([], dtype="datetime64[ns]"), + np.array([], dtype="timedelta64[ns]"), + ], + ) + def test_constructor_empty(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_values = np.array([], dtype=object) + expected_subtype = getattr(breaks, "dtype", np.int64) + + assert len(result) == 0 + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(np.array(result), expected_values) + + @pytest.mark.parametrize( + "breaks", + [ + tuple("0123456789"), + list("abcdefghij"), + np.array(list("abcdefghij"), dtype=object), + np.array(list("abcdefghij"), dtype=" with value 5 is not an interval" + with pytest.raises(TypeError, match=msg): + klass(5) + + # not an interval; dtype depends on 32bit/windows builds + msg = "type with value 0 is not an interval" + with pytest.raises(TypeError, match=msg): + klass([0, 1]) + + @pytest.mark.parametrize( + "data, closed", + [ + ([], "both"), + ([np.nan, np.nan], "neither"), + ( + [Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")], + "left", + ), + ( + [Interval(0, 3, closed="left"), Interval(2, 5, closed="right")], + "neither", + ), + (IntervalArray.from_breaks(range(5), closed="both"), "right"), + ], + ) + def test_override_inferred_closed(self, constructor, data, closed): + # GH 19370 + if isinstance(data, IntervalArray): + tuples = data.to_tuples() + else: + tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data] + expected = IntervalArray.from_tuples(tuples, closed=closed) + result = constructor(data, closed=closed) + tm.assert_interval_array_equal(result, expected) + + @pytest.mark.parametrize( + "values_constructor", [list, np.array, IntervalArray, Series] + ) + def test_index_object_dtype(self, values_constructor): + intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)] + values = values_constructor(intervals) + result = Series(values, dtype="interval") + + assert result.dtype == "interval" + tm.assert_numpy_array_equal(result.values, np.array(values)) + + def test_array_mixed_closed(self): + # GH27172 + intervals = [ + Interval(0, 1, closed="left"), + Interval(1, 2, closed="right"), + Interval(2, 3, closed="neither"), + Interval(3, 4, closed="both"), + ] + msg = "intervals must all be closed on the same side" + with pytest.raises(ValueError, match=msg): + IntervalArray(intervals) + + +def test_dtype_closed_mismatch(): + # GH#38394 closed specified in both dtype and IntervalArray constructor + + dtype = IntervalDtype(np.int64, "left") + + msg = "closed keyword does not match dtype.closed" + with pytest.raises(ValueError, match=msg): + IntervalArray([], dtype=dtype, closed="neither") From 87229fc47836c741b0083442d024348a3f7ff92e Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 19 Nov 2022 12:17:13 +0000 Subject: [PATCH 2/3] fix spelling --- pandas/tests/arrays/interval/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/interval/test_constructors.py b/pandas/tests/arrays/interval/test_constructors.py index bc2cf439882a9..10d712b12c240 100644 --- a/pandas/tests/arrays/interval/test_constructors.py +++ b/pandas/tests/arrays/interval/test_constructors.py @@ -353,7 +353,7 @@ def test_na_tuples(self): class TestClassConstructors(ConstructorTests): - """Tests specific to the IntervalArray/Serie constructors""" + """Tests specific to the IntervalArray/Series constructors""" _use_dtype_in_test_constructor_uint = True From e3d99e2c483c66a7d4cf82fcb27194da71b6cb38 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 19 Nov 2022 13:27:41 +0000 Subject: [PATCH 3/3] fix 32bit error msg --- pandas/tests/arrays/interval/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/interval/test_constructors.py b/pandas/tests/arrays/interval/test_constructors.py index 10d712b12c240..136ded26c9955 100644 --- a/pandas/tests/arrays/interval/test_constructors.py +++ b/pandas/tests/arrays/interval/test_constructors.py @@ -414,7 +414,7 @@ def test_constructor_errors(self, klass): "some kind, 5 was passed" ) else: - msg = r"type with value 5 is not an interval" + msg = "type with value 5 is not an interval" with pytest.raises(TypeError, match=msg): klass(5)