Skip to content

Commit 5232375

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents 84b119f + eaefc5c commit 5232375

File tree

8 files changed

+108
-91
lines changed

8 files changed

+108
-91
lines changed

doc/source/user_guide/index.rst

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,43 @@ For a high level summary of the pandas fundamentals, see :ref:`dsintro` and :ref
1717
Further information on any specific method can be obtained in the
1818
:ref:`api`.
1919

20+
How to read these guides
21+
------------------------
22+
In these guides you will see input code inside code blocks such as:
23+
24+
::
25+
26+
import pandas as pd
27+
pd.DataFrame({'A': [1, 2, 3]})
28+
29+
30+
or:
31+
32+
.. ipython:: python
33+
34+
import pandas as pd
35+
pd.DataFrame({'A': [1, 2, 3]})
36+
37+
The first block is a standard python input, while in the second the ``In [1]:`` indicates the input is inside a `notebook <https://jupyter.org>`__. In Jupyter Notebooks the last line is printed and plots are shown inline.
38+
39+
For example:
40+
41+
.. ipython:: python
42+
43+
a = 1
44+
a
45+
is equivalent to:
46+
47+
::
48+
49+
a = 1
50+
print(a)
51+
52+
53+
54+
Guides
55+
-------
56+
2057
.. If you update this toctree, also update the manual toctree in the
2158
main index.rst.template
2259

doc/source/user_guide/indexing.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ Getting values from an object with multi-axes selection uses the following
8989
notation (using ``.loc`` as an example, but the following applies to ``.iloc`` as
9090
well). Any of the axes accessors may be the null slice ``:``. Axes left out of
9191
the specification are assumed to be ``:``, e.g. ``p.loc['a']`` is equivalent to
92-
``p.loc['a', :, :]``.
92+
``p.loc['a', :]``.
9393

9494
.. csv-table::
9595
:header: "Object Type", "Indexers"

pandas/_libs/hashtable_func_helper.pxi.in

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ dtypes = [('Complex128', 'complex128', 'complex128',
1717
('UInt32', 'uint32', 'uint32', 'uint32_t', ''),
1818
('UInt16', 'uint16', 'uint16', 'uint16_t', ''),
1919
('UInt8', 'uint8', 'uint8', 'uint8_t', ''),
20-
('Object', 'object', 'pymap', 'object', ''),
20+
('Object', 'object', 'pymap', 'object', '<PyObject*>'),
2121
('Int64', 'int64', 'int64', 'int64_t', ''),
2222
('Int32', 'int32', 'int32', 'int32_t', ''),
2323
('Int16', 'int16', 'int16', 'int16_t', ''),
@@ -61,11 +61,11 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
6161
for i in range(n):
6262
val = values[i]
6363
if not dropna or not checknull(val):
64-
k = kh_get_{{ttype}}(table, <PyObject*>val)
64+
k = kh_get_{{ttype}}(table, {{to_c_type}}val)
6565
if k != table.n_buckets:
6666
table.vals[k] += 1
6767
else:
68-
k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
68+
k = kh_put_{{ttype}}(table, {{to_c_type}}val, &ret)
6969
table.vals[k] = 1
7070
result_keys.append(val)
7171
{{else}}
@@ -110,6 +110,8 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'):
110110
int ret = 0
111111
{{if dtype != 'object'}}
112112
{{c_type}} value
113+
{{else}}
114+
PyObject* value
113115
{{endif}}
114116
Py_ssize_t i, n = len(values)
115117
khiter_t k
@@ -123,44 +125,33 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'):
123125

124126
if keep == 'last':
125127
{{if dtype == 'object'}}
126-
for i in range(n - 1, -1, -1):
127-
# equivalent: range(n)[::-1], which cython doesn't like in nogil
128-
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
129-
out[i] = ret == 0
128+
if True:
130129
{{else}}
131130
with nogil:
131+
{{endif}}
132132
for i in range(n - 1, -1, -1):
133133
# equivalent: range(n)[::-1], which cython doesn't like in nogil
134134
value = {{to_c_type}}(values[i])
135135
kh_put_{{ttype}}(table, value, &ret)
136136
out[i] = ret == 0
137-
{{endif}}
137+
138138
elif keep == 'first':
139139
{{if dtype == 'object'}}
140-
for i in range(n):
141-
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
142-
out[i] = ret == 0
140+
if True:
143141
{{else}}
144142
with nogil:
143+
{{endif}}
145144
for i in range(n):
146145
value = {{to_c_type}}(values[i])
147146
kh_put_{{ttype}}(table, value, &ret)
148147
out[i] = ret == 0
149-
{{endif}}
148+
150149
else:
151150
{{if dtype == 'object'}}
152-
for i in range(n):
153-
value = values[i]
154-
k = kh_get_{{ttype}}(table, <PyObject*>value)
155-
if k != table.n_buckets:
156-
out[table.vals[k]] = 1
157-
out[i] = 1
158-
else:
159-
k = kh_put_{{ttype}}(table, <PyObject*>value, &ret)
160-
table.vals[k] = i
161-
out[i] = 0
151+
if True:
162152
{{else}}
163153
with nogil:
154+
{{endif}}
164155
for i in range(n):
165156
value = {{to_c_type}}(values[i])
166157
k = kh_get_{{ttype}}(table, value)
@@ -171,7 +162,7 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'):
171162
k = kh_put_{{ttype}}(table, value, &ret)
172163
table.vals[k] = i
173164
out[i] = 0
174-
{{endif}}
165+
175166
kh_destroy_{{ttype}}(table)
176167
return out
177168

@@ -206,39 +197,41 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
206197
khiter_t k
207198
int ret = 0
208199
ndarray[uint8_t] result
200+
201+
{{if dtype == "object"}}
202+
PyObject* val
203+
{{else}}
209204
{{c_type}} val
205+
{{endif}}
206+
210207
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
211208

212209
# construct the table
213210
n = len(values)
214211
kh_resize_{{ttype}}(table, n)
215212

216213
{{if dtype == 'object'}}
217-
for i in range(n):
218-
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
214+
if True:
219215
{{else}}
220216
with nogil:
217+
{{endif}}
221218
for i in range(n):
222219
val = {{to_c_type}}(values[i])
223220
kh_put_{{ttype}}(table, val, &ret)
224-
{{endif}}
225221

226222
# test membership
227223
n = len(arr)
228224
result = np.empty(n, dtype=np.uint8)
229225

230226
{{if dtype == 'object'}}
231-
for i in range(n):
232-
val = arr[i]
233-
k = kh_get_{{ttype}}(table, <PyObject*>val)
234-
result[i] = (k != table.n_buckets)
227+
if True:
235228
{{else}}
236229
with nogil:
230+
{{endif}}
237231
for i in range(n):
238232
val = {{to_c_type}}(arr[i])
239233
k = kh_get_{{ttype}}(table, val)
240234
result[i] = (k != table.n_buckets)
241-
{{endif}}
242235

243236
kh_destroy_{{ttype}}(table)
244237
return result.view(np.bool_)

pandas/tests/arrays/numpy_/__init__.py

Whitespace-only changes.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy as np
2+
3+
from pandas.core.dtypes.common import is_scalar
4+
5+
import pandas as pd
6+
import pandas._testing as tm
7+
8+
9+
class TestSearchsorted:
10+
def test_searchsorted_numeric_dtypes_scalar(self, any_real_numpy_dtype):
11+
arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype)
12+
result = arr.searchsorted(30)
13+
assert is_scalar(result)
14+
assert result == 2
15+
16+
result = arr.searchsorted([30])
17+
expected = np.array([2], dtype=np.intp)
18+
tm.assert_numpy_array_equal(result, expected)
19+
20+
def test_searchsorted_numeric_dtypes_vector(self, any_real_numpy_dtype):
21+
arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype)
22+
result = arr.searchsorted([2, 30])
23+
expected = np.array([1, 2], dtype=np.intp)
24+
tm.assert_numpy_array_equal(result, expected)
25+
26+
def test_searchsorted_sorter(self, any_real_numpy_dtype):
27+
arr = pd.array([3, 1, 2], dtype=any_real_numpy_dtype)
28+
result = arr.searchsorted([0, 3], sorter=np.argsort(arr))
29+
expected = np.array([0, 2], dtype=np.intp)
30+
tm.assert_numpy_array_equal(result, expected)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from pandas.core.dtypes.common import is_scalar
2+
3+
import pandas as pd
4+
5+
6+
class TestSearchsorted:
7+
def test_searchsorted(self, string_dtype):
8+
arr = pd.array(["a", "b", "c"], dtype=string_dtype)
9+
10+
result = arr.searchsorted("a", side="left")
11+
assert is_scalar(result)
12+
assert result == 0
13+
14+
result = arr.searchsorted("a", side="right")
15+
assert is_scalar(result)
16+
assert result == 1

pandas/tests/arrays/test_array.py

Lines changed: 0 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import pandas as pd
1111
import pandas._testing as tm
1212
from pandas.api.extensions import register_extension_dtype
13-
from pandas.api.types import is_scalar
1413
from pandas.arrays import (
1514
BooleanArray,
1615
DatetimeArray,
@@ -391,61 +390,3 @@ def test_array_not_registered(registry_without_decimal):
391390
result = pd.array(data, dtype=DecimalDtype)
392391
expected = DecimalArray._from_sequence(data)
393392
tm.assert_equal(result, expected)
394-
395-
396-
class TestArrayAnalytics:
397-
def test_searchsorted(self, string_dtype):
398-
arr = pd.array(["a", "b", "c"], dtype=string_dtype)
399-
400-
result = arr.searchsorted("a", side="left")
401-
assert is_scalar(result)
402-
assert result == 0
403-
404-
result = arr.searchsorted("a", side="right")
405-
assert is_scalar(result)
406-
assert result == 1
407-
408-
def test_searchsorted_numeric_dtypes_scalar(self, any_real_numpy_dtype):
409-
arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype)
410-
result = arr.searchsorted(30)
411-
assert is_scalar(result)
412-
assert result == 2
413-
414-
result = arr.searchsorted([30])
415-
expected = np.array([2], dtype=np.intp)
416-
tm.assert_numpy_array_equal(result, expected)
417-
418-
def test_searchsorted_numeric_dtypes_vector(self, any_real_numpy_dtype):
419-
arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype)
420-
result = arr.searchsorted([2, 30])
421-
expected = np.array([1, 2], dtype=np.intp)
422-
tm.assert_numpy_array_equal(result, expected)
423-
424-
@pytest.mark.parametrize(
425-
"arr, val",
426-
[
427-
[
428-
pd.date_range("20120101", periods=10, freq="2D"),
429-
pd.Timestamp("20120102"),
430-
],
431-
[
432-
pd.date_range("20120101", periods=10, freq="2D", tz="Asia/Hong_Kong"),
433-
pd.Timestamp("20120102", tz="Asia/Hong_Kong"),
434-
],
435-
[
436-
pd.timedelta_range(start="1 day", end="10 days", periods=10),
437-
pd.Timedelta("2 days"),
438-
],
439-
],
440-
)
441-
def test_search_sorted_datetime64_scalar(self, arr, val):
442-
arr = pd.array(arr)
443-
result = arr.searchsorted(val)
444-
assert is_scalar(result)
445-
assert result == 1
446-
447-
def test_searchsorted_sorter(self, any_real_numpy_dtype):
448-
arr = pd.array([3, 1, 2], dtype=any_real_numpy_dtype)
449-
result = arr.searchsorted([0, 3], sorter=np.argsort(arr))
450-
expected = np.array([0, 2], dtype=np.intp)
451-
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)