diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 3f300deddebeb..ff3cdf60a2f8c 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -344,6 +344,7 @@ Reshaping - Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`) - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) - Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) +- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) - Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 6e1c4397810b7..bd9dd1f9bae37 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -255,10 +255,56 @@ dtypes = [('Float64', 'float64', 'val != val', True), ('UInt64', 'uint64', 'False', False), ('Int64', 'int64', 'val == iNaT', False)] +def get_dispatch(dtypes): + for (name, dtype, null_condition, float_group) in dtypes: + unique_template = """\ + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + {dtype}_t val + khiter_t k + bint seen_na = 0 + {name}Vector uniques = {name}Vector() + {name}VectorData *ud + + ud = uniques.data + + with nogil: + for i in range(n): + val = values[i] + IF {float_group}: + if val == val: + k = kh_get_{dtype}(self.table, val) + if k == self.table.n_buckets: + kh_put_{dtype}(self.table, val, &ret) + if needs_resize(ud): + with gil: + uniques.resize() + append_data_{dtype}(ud, val) + elif not seen_na: + seen_na = 1 + if needs_resize(ud): + with gil: + uniques.resize() + append_data_{dtype}(ud, NAN) + ELSE: + k = kh_get_{dtype}(self.table, val) + if k == self.table.n_buckets: + kh_put_{dtype}(self.table, val, &ret) + if needs_resize(ud): + with gil: + uniques.resize() + append_data_{dtype}(ud, val) + return uniques.to_array() + """ + + unique_template = unique_template.format(name=name, dtype=dtype, null_condition=null_condition, float_group=float_group) + + yield (name, dtype, null_condition, float_group, unique_template) }} -{{for name, dtype, null_condition, float_group in dtypes}} +{{for name, dtype, null_condition, float_group, unique_template in get_dispatch(dtypes)}} cdef class {{name}}HashTable(HashTable): @@ -450,48 +496,20 @@ cdef class {{name}}HashTable(HashTable): return np.asarray(labels), arr_uniques @cython.boundscheck(False) - def unique(self, {{dtype}}_t[:] values): - cdef: - Py_ssize_t i, n = len(values) - int ret = 0 - {{dtype}}_t val - khiter_t k - bint seen_na = 0 - {{name}}Vector uniques = {{name}}Vector() - {{name}}VectorData *ud + def unique(self, ndarray[{{dtype}}_t, ndim=1] values): + if values.flags.writeable: + # If the value is writeable (mutable) then use memview + return self.unique_memview(values) - ud = uniques.data - - with nogil: - for i in range(n): - val = values[i] - - {{if float_group}} - if val == val: - k = kh_get_{{dtype}}(self.table, val) - if k == self.table.n_buckets: - kh_put_{{dtype}}(self.table, val, &ret) - if needs_resize(ud): - with gil: - uniques.resize() - append_data_{{dtype}}(ud, val) - elif not seen_na: - seen_na = 1 - if needs_resize(ud): - with gil: - uniques.resize() - append_data_{{dtype}}(ud, NAN) - {{else}} - k = kh_get_{{dtype}}(self.table, val) - if k == self.table.n_buckets: - kh_put_{{dtype}}(self.table, val, &ret) - if needs_resize(ud): - with gil: - uniques.resize() - append_data_{{dtype}}(ud, val) - {{endif}} + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. + # see https://github.com/cython/cython/issues/1605 +{{unique_template}} - return uniques.to_array() + @cython.boundscheck(False) + def unique_memview(self, {{dtype}}_t[:] values): +{{unique_template}} {{endfor}} diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index c27af7a5bf8e4..48f25112b45cf 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -512,7 +512,18 @@ def f(): tm.assert_numpy_array_equal( mask, np.array([False, True, True, True, True])) + @pytest.mark.parametrize( + "array_1_writeable, array_2_writeable", + [(True, True), (True, False), (False, False)]) + def test_cut_read_only(self, array_1_writeable, array_2_writeable): + # issue 18773 + array_1 = np.arange(0, 100, 10) + array_1.flags.writeable = array_1_writeable -def curpath(): - pth, _ = os.path.split(os.path.abspath(__file__)) - return pth + array_2 = np.arange(0, 100, 10) + array_2.flags.writeable = array_2_writeable + + hundred_elements = np.arange(100) + + tm.assert_categorical_equal(cut(hundred_elements, array_1), + cut(hundred_elements, array_2))