diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt index e96adc2bd9559..486ba9cbadd7f 100644 --- a/doc/source/whatsnew/v0.15.1.txt +++ b/doc/source/whatsnew/v0.15.1.txt @@ -186,6 +186,7 @@ Bug Fixes - Bug in selecting from a ``Categorical`` with ``.iloc`` (:issue:`8623`) - Bug in groupby-transform with a Categorical (:issue:`8623`) - Bug in duplicated/drop_duplicates with a Categorical (:issue:`8623`) +- Bug in ``Categorical`` reflected comparison operator raising if the first argument was a numpy array scalar (e.g. np.int64) (:issue:`8658`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 598b29bf77e47..150da65580223 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -42,7 +42,16 @@ def f(self, other): # In other series, the leads to False, so do that here too ret[na_mask] = False return ret - elif lib.isscalar(other): + + # Numpy-1.9 and earlier may convert a scalar to a zerodim array during + # comparison operation when second arg has higher priority, e.g. + # + # cat[0] < cat + # + # With cat[0], for example, being ``np.int64(1)`` by the time it gets + # into this function would become ``np.array(1)``. + other = lib.item_from_zerodim(other) + if lib.isscalar(other): if other in self.categories: i = self.categories.get_loc(other) return getattr(self._codes, op)(i) diff --git a/pandas/core/common.py b/pandas/core/common.py index 51464e1809e75..1c117e1cae7dd 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -84,7 +84,7 @@ def _check(cls, inst): ABCSparseArray = create_pandas_abc_type("ABCSparseArray", "_subtyp", ('sparse_array', 'sparse_series')) ABCCategorical = create_pandas_abc_type("ABCCategorical","_typ",("categorical")) - +ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period",)) class _ABCGeneric(type): diff --git a/pandas/lib.pyx b/pandas/lib.pyx index 88c458ce95226..221ffe24a713b 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -4,6 +4,7 @@ import numpy as np from numpy cimport * +np.import_array() cdef extern from "numpy/arrayobject.h": cdef enum NPY_TYPES: @@ -234,8 +235,54 @@ cpdef checknull_old(object val): else: return util._checknull(val) +# ABCPeriod cannot be imported right away from pandas.core.common. +ABCPeriod = None def isscalar(object val): - return np.isscalar(val) or val is None or PyDateTime_Check(val) or PyDelta_Check(val) + """ + Return True if given value is scalar. + + This includes: + - numpy array scalar (e.g. np.int64) + - Python builtin numerics + - Python builtin byte arrays and strings + - None + - instances of datetime.datetime + - instances of datetime.timedelta + - any type previously registered with :func:`register_scalar_type` function + + """ + global ABCPeriod + if ABCPeriod is None: + from pandas.core.common import ABCPeriod as _ABCPeriod + ABCPeriod = _ABCPeriod + + return (np.PyArray_IsAnyScalar(val) + # As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3. + or PyBytes_Check(val) + or val is None + or PyDate_Check(val) + or PyDelta_Check(val) + or PyTime_Check(val) + or isinstance(val, ABCPeriod)) + + +def item_from_zerodim(object val): + """ + If the value is a zerodim array, return the item it contains. + + Examples + -------- + >>> item_from_zerodim(1) + 1 + >>> item_from_zerodim('foobar') + 'foobar' + >>> item_from_zerodim(np.array(1)) + 1 + >>> item_from_zerodim(np.array([1])) + array([1]) + + """ + return util.unbox_if_zerodim(val) @cython.wraparound(False) diff --git a/pandas/src/numpy_helper.h b/pandas/src/numpy_helper.h index 69b849de47fe7..8b79bbe79ff2f 100644 --- a/pandas/src/numpy_helper.h +++ b/pandas/src/numpy_helper.h @@ -167,6 +167,21 @@ void set_array_not_contiguous(PyArrayObject *ao) { } +// If arr is zerodim array, return a proper array scalar (e.g. np.int64). +// Otherwise, return arr as is. +PANDAS_INLINE PyObject* +unbox_if_zerodim(PyObject* arr) { + if (PyArray_IsZeroDim(arr)) { + PyObject *ret; + ret = PyArray_ToScalar(PyArray_DATA(arr), arr); + return ret; + } else { + Py_INCREF(arr); + return arr; + } +} + + // PANDAS_INLINE PyObject* // get_base_ndarray(PyObject* ap) { // // if (!ap || (NULL == ap)) { diff --git a/pandas/src/util.pxd b/pandas/src/util.pxd index cc1921e6367c5..eff1728c6921a 100644 --- a/pandas/src/util.pxd +++ b/pandas/src/util.pxd @@ -22,6 +22,7 @@ cdef extern from "numpy_helper.h": inline void transfer_object_column(char *dst, char *src, size_t stride, size_t length) object sarr_from_data(cnp.dtype, int length, void* data) + inline object unbox_if_zerodim(object arr) cdef inline object get_value_at(ndarray arr, object loc): cdef: @@ -64,7 +65,6 @@ cdef inline int is_contiguous(ndarray arr): cdef inline is_array(object o): return cnp.PyArray_Check(o) - cdef inline bint _checknull(object val): try: return val is None or (cpython.PyFloat_Check(val) and val != val) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 444eb87a399e5..4bc7084c93b6b 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -917,6 +917,12 @@ def test_datetime_categorical_comparison(self): self.assert_numpy_array_equal(dt_cat > dt_cat[0], [False, True, True]) self.assert_numpy_array_equal(dt_cat[0] < dt_cat, [False, True, True]) + def test_reflected_comparison_with_scalars(self): + # GH8658 + cat = pd.Categorical([1, 2, 3]) + self.assert_numpy_array_equal(cat > cat[0], [False, True, True]) + self.assert_numpy_array_equal(cat[0] < cat, [False, True, True]) + class TestCategoricalAsBlock(tm.TestCase): _multiprocess_can_split_ = True diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py new file mode 100644 index 0000000000000..1b7b6c5c5ee4e --- /dev/null +++ b/pandas/tests/test_lib.py @@ -0,0 +1,72 @@ +from datetime import datetime, timedelta, date, time + +import numpy as np + +import pandas as pd +from pandas.lib import isscalar, item_from_zerodim +import pandas.util.testing as tm + + +class TestIsscalar(tm.TestCase): + def test_isscalar_builtin_scalars(self): + self.assertTrue(isscalar(None)) + self.assertTrue(isscalar(True)) + self.assertTrue(isscalar(False)) + self.assertTrue(isscalar(0.)) + self.assertTrue(isscalar(np.nan)) + self.assertTrue(isscalar('foobar')) + self.assertTrue(isscalar(b'foobar')) + self.assertTrue(isscalar(u'foobar')) + self.assertTrue(isscalar(datetime(2014, 1, 1))) + self.assertTrue(isscalar(date(2014, 1, 1))) + self.assertTrue(isscalar(time(12, 0))) + self.assertTrue(isscalar(timedelta(hours=1))) + self.assertTrue(isscalar(pd.NaT)) + + def test_isscalar_builtin_nonscalars(self): + self.assertFalse(isscalar({})) + self.assertFalse(isscalar([])) + self.assertFalse(isscalar([1])) + self.assertFalse(isscalar(())) + self.assertFalse(isscalar((1,))) + self.assertFalse(isscalar(slice(None))) + self.assertFalse(isscalar(Ellipsis)) + + def test_isscalar_numpy_array_scalars(self): + self.assertTrue(isscalar(np.int64(1))) + self.assertTrue(isscalar(np.float64(1.))) + self.assertTrue(isscalar(np.int32(1))) + self.assertTrue(isscalar(np.object_('foobar'))) + self.assertTrue(isscalar(np.str_('foobar'))) + self.assertTrue(isscalar(np.unicode_(u'foobar'))) + self.assertTrue(isscalar(np.bytes_(b'foobar'))) + self.assertTrue(isscalar(np.datetime64('2014-01-01'))) + self.assertTrue(isscalar(np.timedelta64(1, 'h'))) + + def test_isscalar_numpy_zerodim_arrays(self): + for zerodim in [np.array(1), + np.array('foobar'), + np.array(np.datetime64('2014-01-01')), + np.array(np.timedelta64(1, 'h'))]: + self.assertFalse(isscalar(zerodim)) + self.assertTrue(isscalar(item_from_zerodim(zerodim))) + + def test_isscalar_numpy_arrays(self): + self.assertFalse(isscalar(np.array([]))) + self.assertFalse(isscalar(np.array([[]]))) + self.assertFalse(isscalar(np.matrix('1; 2'))) + + def test_isscalar_pandas_scalars(self): + self.assertTrue(isscalar(pd.Timestamp('2014-01-01'))) + self.assertTrue(isscalar(pd.Timedelta(hours=1))) + self.assertTrue(isscalar(pd.Period('2014-01-01'))) + + def test_isscalar_pandas_containers(self): + self.assertFalse(isscalar(pd.Series())) + self.assertFalse(isscalar(pd.Series([1]))) + self.assertFalse(isscalar(pd.DataFrame())) + self.assertFalse(isscalar(pd.DataFrame([[1]]))) + self.assertFalse(isscalar(pd.Panel())) + self.assertFalse(isscalar(pd.Panel([[[1]]]))) + self.assertFalse(isscalar(pd.Index([]))) + self.assertFalse(isscalar(pd.Index([1]))) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index cba449b9596e1..742d8651a4035 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -63,6 +63,7 @@ class Period(PandasObject): """ __slots__ = ['freq', 'ordinal'] _comparables = ['name','freqstr'] + _typ = 'period' @classmethod def _from_ordinal(cls, ordinal, freq): @@ -498,7 +499,6 @@ def strftime(self, fmt): base, mult = _gfc(self.freq) return tslib.period_format(self.ordinal, base, fmt) - def _get_ordinals(data, freq): f = lambda x: Period(x, freq=freq).ordinal if isinstance(data[0], Period):