Skip to content

Commit 03f4472

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into udfs_poc
2 parents 29181a5 + ae049ae commit 03f4472

File tree

10 files changed

+76
-45
lines changed

10 files changed

+76
-45
lines changed

asv_bench/benchmarks/indexing_engines.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def setup(self, engine_and_dtype, index_type):
4848
"non_monotonic": np.array([1, 2, 3] * N, dtype=dtype),
4949
}[index_type]
5050

51-
self.data = engine(lambda: arr, len(arr))
51+
self.data = engine(arr)
5252
# code belows avoids populating the mapping etc. while timing.
5353
self.data.get_loc(2)
5454

@@ -70,7 +70,7 @@ def setup(self, index_type):
7070
"non_monotonic": np.array(list("abc") * N, dtype=object),
7171
}[index_type]
7272

73-
self.data = libindex.ObjectEngine(lambda: arr, len(arr))
73+
self.data = libindex.ObjectEngine(arr)
7474
# code belows avoids populating the mapping etc. while timing.
7575
self.data.get_loc("b")
7676

doc/source/whatsnew/v1.4.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ Other Deprecations
334334
- Deprecated the 'include_start' and 'include_end' arguments in :meth:`DataFrame.between_time`; in a future version passing 'include_start' or 'include_end' will raise (:issue:`40245`)
335335
- Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`)
336336
- Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`)
337-
-
337+
- Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
338338

339339
.. ---------------------------------------------------------------------------
340340
@@ -486,6 +486,7 @@ Reshaping
486486
- Bug in :meth:`DataFrame.append` failing to retain dtypes when appended columns do not match (:issue:`43392`)
487487
- Bug in :func:`concat` of ``bool`` and ``boolean`` dtypes resulting in ``object`` dtype instead of ``boolean`` dtype (:issue:`42800`)
488488
- Bug in :func:`crosstab` when inputs are are categorical Series, there are categories that are not present in one or both of the Series, and ``margins=True``. Previously the margin value for missing categories was ``NaN``. It is now correctly reported as 0 (:issue:`43505`)
489+
- Bug in :func:`concat` would fail when the ``objs`` argument all had the same index and the ``keys`` argument contained duplicates (:issue:`43595`)
489490

490491
Sparse
491492
^^^^^^

pandas/_libs/index.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ from pandas import MultiIndex
66

77
class IndexEngine:
88
over_size_threshold: bool
9-
def __init__(self, vgetter, n: int): ...
9+
def __init__(self, values: np.ndarray): ...
1010
def __contains__(self, val: object) -> bool: ...
1111
# -> int | slice | np.ndarray[bool]
1212
def get_loc(self, val: object) -> int | slice | np.ndarray: ...

pandas/_libs/index.pyx

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -53,18 +53,18 @@ _SIZE_CUTOFF = 1_000_000
5353
cdef class IndexEngine:
5454

5555
cdef readonly:
56-
object vgetter
56+
ndarray values
5757
HashTable mapping
5858
bint over_size_threshold
5959

6060
cdef:
6161
bint unique, monotonic_inc, monotonic_dec
6262
bint need_monotonic_check, need_unique_check
6363

64-
def __init__(self, vgetter, n):
65-
self.vgetter = vgetter
64+
def __init__(self, ndarray values):
65+
self.values = values
6666

67-
self.over_size_threshold = n >= _SIZE_CUTOFF
67+
self.over_size_threshold = len(values) >= _SIZE_CUTOFF
6868
self.clear_mapping()
6969

7070
def __contains__(self, val: object) -> bool:
@@ -214,8 +214,8 @@ cdef class IndexEngine:
214214
self.unique = 1
215215
self.need_unique_check = 0
216216

217-
cdef _get_index_values(self):
218-
return self.vgetter()
217+
cdef ndarray _get_index_values(self):
218+
return self.values
219219

220220
cdef _call_monotonic(self, values):
221221
return algos.is_monotonic(values, timelike=False)
@@ -438,8 +438,8 @@ cdef class DatetimeEngine(Int64Engine):
438438
self._ensure_mapping_populated()
439439
return conv in self.mapping
440440

441-
cdef _get_index_values(self):
442-
return self.vgetter().view('i8')
441+
cdef ndarray _get_index_values(self):
442+
return self.values.view('i8')
443443

444444
cdef _call_monotonic(self, values):
445445
return algos.is_monotonic(values, timelike=True)
@@ -537,9 +537,6 @@ cdef class PeriodEngine(Int64Engine):
537537

538538
return Int64Engine.get_loc(self, conv)
539539

540-
cdef _get_index_values(self):
541-
return super(PeriodEngine, self).vgetter().view("i8")
542-
543540
cdef _call_monotonic(self, values):
544541
return algos.is_monotonic(values, timelike=True)
545542

@@ -598,7 +595,7 @@ cdef class BaseMultiIndexCodesEngine:
598595

599596
# Initialize underlying index (e.g. libindex.UInt64Engine) with
600597
# integers representing labels: we will use its get_loc and get_indexer
601-
self._base.__init__(self, lambda: lab_ints, len(lab_ints))
598+
self._base.__init__(self, lab_ints)
602599

603600
def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
604601
raise NotImplementedError("Implemented by subclass")

pandas/_libs/index_class_helper.pxi.in

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ dtypes = [('Float64', 'float64'),
2828

2929

3030
cdef class {{name}}Engine(IndexEngine):
31-
# constructor-caller is responsible for ensuring that vgetter()
32-
# returns an ndarray with dtype {{dtype}}_t
3331

3432
cdef _make_hash_table(self, Py_ssize_t n):
3533
return _hash.{{name}}HashTable(n)

pandas/core/indexes/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ def _engine(self) -> libindex.IndexEngine:
819819
# to avoid a reference cycle, bind `target_values` to a local variable, so
820820
# `self` is not passed into the lambda.
821821
target_values = self._get_engine_target()
822-
return self._engine_type(lambda: target_values, len(self))
822+
return self._engine_type(target_values)
823823

824824
@final
825825
@cache_readonly

pandas/core/window/rolling.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def __init__(
156156
)
157157

158158
self._selection = selection
159-
self.validate()
159+
self._validate()
160160

161161
@property
162162
def win_type(self):
@@ -180,6 +180,14 @@ def is_datetimelike(self) -> bool:
180180
return self._win_freq_i8 is not None
181181

182182
def validate(self) -> None:
183+
warnings.warn(
184+
"validate is deprecated and will be removed in a future version.",
185+
FutureWarning,
186+
stacklevel=2,
187+
)
188+
return self._validate()
189+
190+
def _validate(self) -> None:
183191
if self.center is not None and not is_bool(self.center):
184192
raise ValueError("center must be a boolean")
185193
if self.min_periods is not None:
@@ -960,8 +968,8 @@ class Window(BaseWindow):
960968
"method",
961969
]
962970

963-
def validate(self):
964-
super().validate()
971+
def _validate(self):
972+
super()._validate()
965973

966974
if not isinstance(self.win_type, str):
967975
raise ValueError(f"Invalid win_type {self.win_type}")
@@ -1528,8 +1536,8 @@ class Rolling(RollingAndExpandingMixin):
15281536
"method",
15291537
]
15301538

1531-
def validate(self):
1532-
super().validate()
1539+
def _validate(self):
1540+
super()._validate()
15331541

15341542
# we allow rolling on a datetimelike index
15351543
if (

pandas/tests/indexes/test_engines.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -96,18 +96,18 @@ def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype):
9696
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
9797

9898
# monotonic increasing
99-
engine = engine_type(lambda: arr, len(arr))
99+
engine = engine_type(arr)
100100
assert engine.is_monotonic_increasing is True
101101
assert engine.is_monotonic_decreasing is False
102102

103103
# monotonic decreasing
104-
engine = engine_type(lambda: arr[::-1], len(arr))
104+
engine = engine_type(arr[::-1])
105105
assert engine.is_monotonic_increasing is False
106106
assert engine.is_monotonic_decreasing is True
107107

108108
# neither monotonic increasing or decreasing
109109
arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype)
110-
engine = engine_type(lambda: arr[::-1], len(arr))
110+
engine = engine_type(arr[::-1])
111111
assert engine.is_monotonic_increasing is False
112112
assert engine.is_monotonic_decreasing is False
113113

@@ -116,31 +116,31 @@ def test_is_unique(self, numeric_indexing_engine_type_and_dtype):
116116

117117
# unique
118118
arr = np.array([1, 3, 2], dtype=dtype)
119-
engine = engine_type(lambda: arr, len(arr))
119+
engine = engine_type(arr)
120120
assert engine.is_unique is True
121121

122122
# not unique
123123
arr = np.array([1, 2, 1], dtype=dtype)
124-
engine = engine_type(lambda: arr, len(arr))
124+
engine = engine_type(arr)
125125
assert engine.is_unique is False
126126

127127
def test_get_loc(self, numeric_indexing_engine_type_and_dtype):
128128
engine_type, dtype = numeric_indexing_engine_type_and_dtype
129129

130130
# unique
131131
arr = np.array([1, 2, 3], dtype=dtype)
132-
engine = engine_type(lambda: arr, len(arr))
132+
engine = engine_type(arr)
133133
assert engine.get_loc(2) == 1
134134

135135
# monotonic
136136
num = 1000
137137
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
138-
engine = engine_type(lambda: arr, len(arr))
138+
engine = engine_type(arr)
139139
assert engine.get_loc(2) == slice(1000, 2000)
140140

141141
# not monotonic
142142
arr = np.array([1, 2, 3] * num, dtype=dtype)
143-
engine = engine_type(lambda: arr, len(arr))
143+
engine = engine_type(arr)
144144
expected = np.array([False, True, False] * num, dtype=bool)
145145
result = engine.get_loc(2)
146146
assert (result == expected).all()
@@ -149,7 +149,7 @@ def test_get_backfill_indexer(self, numeric_indexing_engine_type_and_dtype):
149149
engine_type, dtype = numeric_indexing_engine_type_and_dtype
150150

151151
arr = np.array([1, 5, 10], dtype=dtype)
152-
engine = engine_type(lambda: arr, len(arr))
152+
engine = engine_type(arr)
153153

154154
new = np.arange(12, dtype=dtype)
155155
result = engine.get_backfill_indexer(new)
@@ -161,7 +161,7 @@ def test_get_pad_indexer(self, numeric_indexing_engine_type_and_dtype):
161161
engine_type, dtype = numeric_indexing_engine_type_and_dtype
162162

163163
arr = np.array([1, 5, 10], dtype=dtype)
164-
engine = engine_type(lambda: arr, len(arr))
164+
engine = engine_type(arr)
165165

166166
new = np.arange(12, dtype=dtype)
167167
result = engine.get_pad_indexer(new)
@@ -181,54 +181,54 @@ def test_is_monotonic(self):
181181
arr = np.array(["a"] * num + ["a"] * num + ["c"] * num, dtype=self.dtype)
182182

183183
# monotonic increasing
184-
engine = self.engine_type(lambda: arr, len(arr))
184+
engine = self.engine_type(arr)
185185
assert engine.is_monotonic_increasing is True
186186
assert engine.is_monotonic_decreasing is False
187187

188188
# monotonic decreasing
189-
engine = self.engine_type(lambda: arr[::-1], len(arr))
189+
engine = self.engine_type(arr[::-1])
190190
assert engine.is_monotonic_increasing is False
191191
assert engine.is_monotonic_decreasing is True
192192

193193
# neither monotonic increasing or decreasing
194194
arr = np.array(["a"] * num + ["b"] * num + ["a"] * num, dtype=self.dtype)
195-
engine = self.engine_type(lambda: arr[::-1], len(arr))
195+
engine = self.engine_type(arr[::-1])
196196
assert engine.is_monotonic_increasing is False
197197
assert engine.is_monotonic_decreasing is False
198198

199199
def test_is_unique(self):
200200
# unique
201201
arr = np.array(self.values, dtype=self.dtype)
202-
engine = self.engine_type(lambda: arr, len(arr))
202+
engine = self.engine_type(arr)
203203
assert engine.is_unique is True
204204

205205
# not unique
206206
arr = np.array(["a", "b", "a"], dtype=self.dtype)
207-
engine = self.engine_type(lambda: arr, len(arr))
207+
engine = self.engine_type(arr)
208208
assert engine.is_unique is False
209209

210210
def test_get_loc(self):
211211
# unique
212212
arr = np.array(self.values, dtype=self.dtype)
213-
engine = self.engine_type(lambda: arr, len(arr))
213+
engine = self.engine_type(arr)
214214
assert engine.get_loc("b") == 1
215215

216216
# monotonic
217217
num = 1000
218218
arr = np.array(["a"] * num + ["b"] * num + ["c"] * num, dtype=self.dtype)
219-
engine = self.engine_type(lambda: arr, len(arr))
219+
engine = self.engine_type(arr)
220220
assert engine.get_loc("b") == slice(1000, 2000)
221221

222222
# not monotonic
223223
arr = np.array(self.values * num, dtype=self.dtype)
224-
engine = self.engine_type(lambda: arr, len(arr))
224+
engine = self.engine_type(arr)
225225
expected = np.array([False, True, False] * num, dtype=bool)
226226
result = engine.get_loc("b")
227227
assert (result == expected).all()
228228

229229
def test_get_backfill_indexer(self):
230230
arr = np.array(["a", "e", "j"], dtype=self.dtype)
231-
engine = self.engine_type(lambda: arr, len(arr))
231+
engine = self.engine_type(arr)
232232

233233
new = np.array(list("abcdefghij"), dtype=self.dtype)
234234
result = engine.get_backfill_indexer(new)
@@ -238,7 +238,7 @@ def test_get_backfill_indexer(self):
238238

239239
def test_get_pad_indexer(self):
240240
arr = np.array(["a", "e", "j"], dtype=self.dtype)
241-
engine = self.engine_type(lambda: arr, len(arr))
241+
engine = self.engine_type(arr)
242242

243243
new = np.array(list("abcdefghij"), dtype=self.dtype)
244244
result = engine.get_pad_indexer(new)

pandas/tests/reshape/concat/test_concat.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,16 @@
33
deque,
44
)
55
from decimal import Decimal
6-
from warnings import catch_warnings
6+
from warnings import (
7+
catch_warnings,
8+
simplefilter,
9+
)
710

811
import numpy as np
912
import pytest
1013

14+
from pandas.errors import PerformanceWarning
15+
1116
import pandas as pd
1217
from pandas import (
1318
DataFrame,
@@ -560,6 +565,22 @@ def test_duplicate_keys(keys):
560565
tm.assert_frame_equal(result, expected)
561566

562567

568+
def test_duplicate_keys_same_frame():
569+
# GH 43595
570+
keys = ["e", "e"]
571+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
572+
result = concat([df, df], axis=1, keys=keys)
573+
expected_values = [[1, 4, 1, 4], [2, 5, 2, 5], [3, 6, 3, 6]]
574+
expected_columns = MultiIndex.from_tuples(
575+
[(keys[0], "a"), (keys[0], "b"), (keys[1], "a"), (keys[1], "b")]
576+
)
577+
expected = DataFrame(expected_values, columns=expected_columns)
578+
with catch_warnings():
579+
# result.columns not sorted, resulting in performance warning
580+
simplefilter("ignore", PerformanceWarning)
581+
tm.assert_frame_equal(result, expected)
582+
583+
563584
@pytest.mark.parametrize(
564585
"obj",
565586
[

pandas/tests/window/test_api.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,12 @@ def test_is_datetimelike_deprecated():
339339
assert not s.is_datetimelike
340340

341341

342+
def test_validate_deprecated():
343+
s = Series(range(1)).rolling(1)
344+
with tm.assert_produces_warning(FutureWarning):
345+
assert s.validate() is None
346+
347+
342348
@pytest.mark.filterwarnings("ignore:min_periods:FutureWarning")
343349
def test_dont_modify_attributes_after_methods(
344350
arithmetic_win_operators, closed, center, min_periods

0 commit comments

Comments
 (0)