Skip to content

Commit 575645f

Browse files
committed
fix conflict with master
2 parents f8191f8 + 4007513 commit 575645f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+325
-207
lines changed

.github/workflows/ci.yml

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -155,25 +155,16 @@ jobs:
155155
run: |
156156
source activate pandas-dev
157157
158-
pytest pandas/tests/frame/methods
159-
pytest pandas/tests/frame/test_constructors.py
160-
pytest pandas/tests/frame/test_*
161-
pytest pandas/tests/frame/test_reductions.py
158+
pytest pandas/tests/frame/
162159
pytest pandas/tests/reductions/
163160
pytest pandas/tests/generic/test_generic.py
164161
pytest pandas/tests/arithmetic/
165162
pytest pandas/tests/groupby/
166163
pytest pandas/tests/resample/
167164
pytest pandas/tests/reshape/merge
168-
169-
pytest pandas/tests/series/methods
170-
pytest pandas/tests/series/test_*
165+
pytest pandas/tests/series/
171166
172167
# indexing subset (temporary since other tests don't pass yet)
173-
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean
174-
pytest pandas/tests/frame/indexing/test_where.py
175-
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index
176-
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns
177168
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
178169
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
179170
@@ -185,6 +176,12 @@ jobs:
185176
pytest pandas/tests/dtypes/
186177
pytest pandas/tests/generic/
187178
pytest pandas/tests/indexes/
179+
pytest pandas/tests/io/test_* -m "not slow and not clipboard"
180+
pytest pandas/tests/io/excel/ -m "not slow and not clipboard"
181+
pytest pandas/tests/io/formats/ -m "not slow and not clipboard"
182+
pytest pandas/tests/io/parser/ -m "not slow and not clipboard"
183+
pytest pandas/tests/io/sas/ -m "not slow and not clipboard"
184+
pytest pandas/tests/io/xml/ -m "not slow and not clipboard"
188185
pytest pandas/tests/libs/
189186
pytest pandas/tests/plotting/
190187
pytest pandas/tests/scalar/

doc/source/_static/css/pandas.css

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
:root {
44
/* Use softer blue from bootstrap's default info color */
5-
--color-info: 23, 162, 184;
5+
--pst-color-info: 23, 162, 184;
66
}
77

88
/* Getting started index page */

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,5 +113,5 @@ dependencies:
113113
- tabulate>=0.8.3 # DataFrame.to_markdown
114114
- natsort # DataFrame.sort_values
115115
- pip:
116-
- git+https://github.com/pandas-dev/pydata-sphinx-theme.git@2488b7defbd3d753dd5fcfc890fc4a7e79d25103
116+
- git+https://github.com/pydata/pydata-sphinx-theme.git@master
117117
- numpydoc < 1.2 # 2021-02-09 1.2dev breaking CI

pandas/core/algorithms.py

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1635,10 +1635,10 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
16351635
16361636
Parameters
16371637
----------
1638-
arr : ndarray
1638+
arr : ndarray or ExtensionArray
16391639
n : int
16401640
number of periods
1641-
axis : int
1641+
axis : {0, 1}
16421642
axis to shift on
16431643
stacklevel : int
16441644
The stacklevel for the lost dtype warning.
@@ -1652,7 +1652,8 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
16521652
na = np.nan
16531653
dtype = arr.dtype
16541654

1655-
if dtype.kind == "b":
1655+
is_bool = is_bool_dtype(dtype)
1656+
if is_bool:
16561657
op = operator.xor
16571658
else:
16581659
op = operator.sub
@@ -1678,17 +1679,15 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
16781679
dtype = arr.dtype
16791680

16801681
is_timedelta = False
1681-
is_bool = False
16821682
if needs_i8_conversion(arr.dtype):
16831683
dtype = np.int64
16841684
arr = arr.view("i8")
16851685
na = iNaT
16861686
is_timedelta = True
16871687

1688-
elif is_bool_dtype(dtype):
1688+
elif is_bool:
16891689
# We have to cast in order to be able to hold np.nan
16901690
dtype = np.object_
1691-
is_bool = True
16921691

16931692
elif is_integer_dtype(dtype):
16941693
# We have to cast in order to be able to hold np.nan
@@ -1709,45 +1708,26 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
17091708
dtype = np.dtype(dtype)
17101709
out_arr = np.empty(arr.shape, dtype=dtype)
17111710

1712-
na_indexer = [slice(None)] * arr.ndim
1711+
na_indexer = [slice(None)] * 2
17131712
na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None)
17141713
out_arr[tuple(na_indexer)] = na
17151714

1716-
if arr.ndim == 2 and arr.dtype.name in _diff_special:
1715+
if arr.dtype.name in _diff_special:
17171716
# TODO: can diff_2d dtype specialization troubles be fixed by defining
17181717
# out_arr inside diff_2d?
17191718
algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta)
17201719
else:
17211720
# To keep mypy happy, _res_indexer is a list while res_indexer is
17221721
# a tuple, ditto for lag_indexer.
1723-
_res_indexer = [slice(None)] * arr.ndim
1722+
_res_indexer = [slice(None)] * 2
17241723
_res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
17251724
res_indexer = tuple(_res_indexer)
17261725

1727-
_lag_indexer = [slice(None)] * arr.ndim
1726+
_lag_indexer = [slice(None)] * 2
17281727
_lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
17291728
lag_indexer = tuple(_lag_indexer)
17301729

1731-
# need to make sure that we account for na for datelike/timedelta
1732-
# we don't actually want to subtract these i8 numbers
1733-
if is_timedelta:
1734-
res = arr[res_indexer]
1735-
lag = arr[lag_indexer]
1736-
1737-
mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na)
1738-
if mask.any():
1739-
res = res.copy()
1740-
res[mask] = 0
1741-
lag = lag.copy()
1742-
lag[mask] = 0
1743-
1744-
result = res - lag
1745-
result[mask] = na
1746-
out_arr[res_indexer] = result
1747-
elif is_bool:
1748-
out_arr[res_indexer] = arr[res_indexer] ^ arr[lag_indexer]
1749-
else:
1750-
out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]
1730+
out_arr[res_indexer] = op(arr[res_indexer], arr[lag_indexer])
17511731

17521732
if is_timedelta:
17531733
out_arr = out_arr.view("timedelta64[ns]")

pandas/core/frame.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3784,7 +3784,7 @@ def _box_col_values(self, values, loc: int) -> Series:
37843784
# Unsorted
37853785

37863786
def query(self, expr: str, inplace: bool = False, **kwargs):
3787-
"""
3787+
r"""
37883788
Query the columns of a DataFrame with a boolean expression.
37893789
37903790
Parameters
@@ -3799,8 +3799,8 @@ def query(self, expr: str, inplace: bool = False, **kwargs):
37993799
You can refer to column names that are not valid Python variable names
38003800
by surrounding them in backticks. Thus, column names containing spaces
38013801
or punctuations (besides underscores) or starting with digits must be
3802-
surrounded by backticks. (For example, a column named "Area (cm^2) would
3803-
be referenced as `Area (cm^2)`). Column names which are Python keywords
3802+
surrounded by backticks. (For example, a column named "Area (cm^2)" would
3803+
be referenced as \`Area (cm^2)\`). Column names which are Python keywords
38043804
(like "list", "for", "import", etc) cannot be used.
38053805
38063806
For example, if one of your columns is called ``a a`` and you want

pandas/core/groupby/groupby.py

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class providing the base-class of operations.
4646
)
4747
import pandas._libs.groupby as libgroupby
4848
from pandas._typing import (
49+
ArrayLike,
4950
F,
5051
FrameOrSeries,
5152
FrameOrSeriesUnion,
@@ -68,7 +69,6 @@ class providing the base-class of operations.
6869
ensure_float,
6970
is_bool_dtype,
7071
is_datetime64_dtype,
71-
is_extension_array_dtype,
7272
is_integer_dtype,
7373
is_numeric_dtype,
7474
is_object_dtype,
@@ -85,6 +85,7 @@ class providing the base-class of operations.
8585
from pandas.core.arrays import (
8686
Categorical,
8787
DatetimeArray,
88+
ExtensionArray,
8889
)
8990
from pandas.core.base import (
9091
DataError,
@@ -2265,37 +2266,31 @@ def quantile(self, q=0.5, interpolation: str = "linear"):
22652266
"""
22662267
from pandas import concat
22672268

2268-
def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
2269+
def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]:
22692270
if is_object_dtype(vals):
22702271
raise TypeError(
22712272
"'quantile' cannot be performed against 'object' dtypes!"
22722273
)
22732274

2274-
inference = None
2275+
inference: Optional[np.dtype] = None
22752276
if is_integer_dtype(vals.dtype):
2276-
if is_extension_array_dtype(vals.dtype):
2277-
# error: "ndarray" has no attribute "to_numpy"
2278-
vals = vals.to_numpy( # type: ignore[attr-defined]
2279-
dtype=float, na_value=np.nan
2280-
)
2281-
inference = np.int64
2282-
elif is_bool_dtype(vals.dtype) and is_extension_array_dtype(vals.dtype):
2283-
# error: "ndarray" has no attribute "to_numpy"
2284-
vals = vals.to_numpy( # type: ignore[attr-defined]
2285-
dtype=float, na_value=np.nan
2286-
)
2277+
if isinstance(vals, ExtensionArray):
2278+
out = vals.to_numpy(dtype=float, na_value=np.nan)
2279+
else:
2280+
out = vals
2281+
inference = np.dtype(np.int64)
2282+
elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray):
2283+
out = vals.to_numpy(dtype=float, na_value=np.nan)
22872284
elif is_datetime64_dtype(vals.dtype):
2288-
# error: Incompatible types in assignment (expression has type
2289-
# "str", variable has type "Optional[Type[int64]]")
2290-
inference = "datetime64[ns]" # type: ignore[assignment]
2291-
vals = np.asarray(vals).astype(float)
2285+
inference = np.dtype("datetime64[ns]")
2286+
out = np.asarray(vals).astype(float)
22922287
elif is_timedelta64_dtype(vals.dtype):
2293-
# error: Incompatible types in assignment (expression has type "str",
2294-
# variable has type "Optional[Type[signedinteger[Any]]]")
2295-
inference = "timedelta64[ns]" # type: ignore[assignment]
2296-
vals = np.asarray(vals).astype(float)
2288+
inference = np.dtype("timedelta64[ns]")
2289+
out = np.asarray(vals).astype(float)
2290+
else:
2291+
out = np.asarray(vals)
22972292

2298-
return vals, inference
2293+
return out, inference
22992294

23002295
def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
23012296
if inference:

pandas/core/groupby/ops.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
is_timedelta64_dtype,
6666
needs_i8_conversion,
6767
)
68+
from pandas.core.dtypes.dtypes import ExtensionDtype
6869
from pandas.core.dtypes.generic import ABCCategoricalIndex
6970
from pandas.core.dtypes.missing import (
7071
isna,
@@ -522,7 +523,7 @@ def _disallow_invalid_ops(self, values: ArrayLike, how: str):
522523
@final
523524
def _ea_wrap_cython_operation(
524525
self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs
525-
) -> Tuple[np.ndarray, Optional[List[str]]]:
526+
) -> np.ndarray:
526527
"""
527528
If we have an ExtensionArray, unwrap, call _cython_operation, and
528529
re-wrap if appropriate.
@@ -539,10 +540,7 @@ def _ea_wrap_cython_operation(
539540
)
540541
if how in ["rank"]:
541542
# preserve float64 dtype
542-
543-
# error: Incompatible return value type (got "ndarray", expected
544-
# "Tuple[ndarray, Optional[List[str]]]")
545-
return res_values # type: ignore[return-value]
543+
return res_values
546544

547545
res_values = res_values.astype("i8", copy=False)
548546
result = type(orig_values)(res_values, dtype=orig_values.dtype)
@@ -555,14 +553,11 @@ def _ea_wrap_cython_operation(
555553
kind, values, how, axis, min_count, **kwargs
556554
)
557555
dtype = maybe_cast_result_dtype(orig_values.dtype, how)
558-
if is_extension_array_dtype(dtype):
559-
# error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no
560-
# attribute "construct_array_type"
561-
cls = dtype.construct_array_type() # type: ignore[union-attr]
556+
if isinstance(dtype, ExtensionDtype):
557+
cls = dtype.construct_array_type()
562558
return cls._from_sequence(res_values, dtype=dtype)
563-
# error: Incompatible return value type (got "ndarray", expected
564-
# "Tuple[ndarray, Optional[List[str]]]")
565-
return res_values # type: ignore[return-value]
559+
560+
return res_values
566561

567562
elif is_float_dtype(values.dtype):
568563
# FloatingArray
@@ -599,9 +594,7 @@ def _cython_operation(
599594
self._disallow_invalid_ops(values, how)
600595

601596
if is_extension_array_dtype(values.dtype):
602-
# error: Incompatible return value type (got "Tuple[ndarray,
603-
# Optional[List[str]]]", expected "ndarray")
604-
return self._ea_wrap_cython_operation( # type: ignore[return-value]
597+
return self._ea_wrap_cython_operation(
605598
kind, values, how, axis, min_count, **kwargs
606599
)
607600

pandas/core/indexers.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def validate_indices(indices: np.ndarray, n: int) -> None:
235235
# Indexer Conversion
236236

237237

238-
def maybe_convert_indices(indices, n: int):
238+
def maybe_convert_indices(indices, n: int, verify: bool = True):
239239
"""
240240
Attempt to convert indices into valid, positive indices.
241241
@@ -248,6 +248,8 @@ def maybe_convert_indices(indices, n: int):
248248
Array of indices that we are to convert.
249249
n : int
250250
Number of elements in the array that we are indexing.
251+
verify : bool, default True
252+
Check that all entries are between 0 and n - 1, inclusive.
251253
252254
Returns
253255
-------
@@ -273,9 +275,10 @@ def maybe_convert_indices(indices, n: int):
273275
indices = indices.copy()
274276
indices[mask] += n
275277

276-
mask = (indices >= n) | (indices < 0)
277-
if mask.any():
278-
raise IndexError("indices are out-of-bounds")
278+
if verify:
279+
mask = (indices >= n) | (indices < 0)
280+
if mask.any():
281+
raise IndexError("indices are out-of-bounds")
279282
return indices
280283

281284

pandas/core/indexes/base.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3876,7 +3876,14 @@ def _reindex_non_unique(self, target):
38763876
# --------------------------------------------------------------------
38773877
# Join Methods
38783878

3879-
def join(self, other, how="left", level=None, return_indexers=False, sort=False):
3879+
def join(
3880+
self,
3881+
other,
3882+
how: str_t = "left",
3883+
level=None,
3884+
return_indexers: bool = False,
3885+
sort: bool = False,
3886+
):
38803887
"""
38813888
Compute join_index and indexers to conform data
38823889
structures to the new index.

pandas/core/indexes/datetimelike.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,12 @@ def _union(self, other, sort):
827827
_join_precedence = 10
828828

829829
def join(
830-
self, other, how: str = "left", level=None, return_indexers=False, sort=False
830+
self,
831+
other,
832+
how: str = "left",
833+
level=None,
834+
return_indexers: bool = False,
835+
sort: bool = False,
831836
):
832837
"""
833838
See Index.join

0 commit comments

Comments
 (0)