Skip to content

Commit 05864f7

Browse files
committed
fix: merge in upstream/main
2 parents d10fedc + e660f2c commit 05864f7

File tree

136 files changed

+1973
-1474
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

136 files changed

+1973
-1474
lines changed

.github/workflows/package-checks.yml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
name: Package Checks
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- 1.5.x
8+
pull_request:
9+
branches:
10+
- main
11+
- 1.5.x
12+
13+
permissions:
14+
contents: read
15+
16+
jobs:
17+
pip:
18+
runs-on: ubuntu-latest
19+
strategy:
20+
matrix:
21+
extra: ["test", "performance", "timezone", "computation", "fss", "aws", "gcp", "excel", "parquet", "feather", "hdf5", "spss", "postgresql", "mysql", "sql-other", "html", "xml", "plot", "output_formatting", "clipboard", "compression", "all"]
22+
fail-fast: false
23+
name: Install Extras - ${{ matrix.extra }}
24+
concurrency:
25+
# https://github.community/t/concurrecy-not-work-for-push/183068/7
26+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-pip-extras-${{ matrix.extra }}
27+
cancel-in-progress: true
28+
29+
steps:
30+
- name: Checkout
31+
uses: actions/checkout@v3
32+
with:
33+
fetch-depth: 0
34+
35+
- name: Setup Python
36+
id: setup_python
37+
uses: actions/setup-python@v3
38+
with:
39+
python-version: '3.8'
40+
41+
# Hacky patch to disable building cython extensions.
42+
# This job should only check that the extras successfully install.
43+
- name: Disable building ext_modules
44+
run: |
45+
sed -i '/ext_modules=/d' setup.py
46+
shell: bash -el {0}
47+
48+
- name: Install required dependencies
49+
run: |
50+
python -m pip install --upgrade pip setuptools wheel python-dateutil pytz numpy cython
51+
shell: bash -el {0}
52+
53+
- name: Pip install with extra
54+
run: |
55+
python -m pip install -e .[${{ matrix.extra }}] --no-build-isolation
56+
shell: bash -el {0}

asv_bench/benchmarks/hash_functions.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,14 @@ def time_unique(self, exponent):
5757
class NumericSeriesIndexing:
5858

5959
params = [
60-
(pd.Int64Index, pd.UInt64Index, pd.Float64Index),
60+
(np.int64, np.uint64, np.float64),
6161
(10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6),
6262
]
63-
param_names = ["index_dtype", "N"]
63+
param_names = ["dtype", "N"]
6464

65-
def setup(self, index, N):
66-
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)))
67-
indices = index(vals)
65+
def setup(self, dtype, N):
66+
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)), dtype=dtype)
67+
indices = pd.Index(vals)
6868
self.data = pd.Series(np.arange(N), index=indices)
6969

7070
def time_loc_slice(self, index, N):
@@ -75,15 +75,15 @@ def time_loc_slice(self, index, N):
7575
class NumericSeriesIndexingShuffled:
7676

7777
params = [
78-
(pd.Int64Index, pd.UInt64Index, pd.Float64Index),
78+
(np.int64, np.uint64, np.float64),
7979
(10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6),
8080
]
81-
param_names = ["index_dtype", "N"]
81+
param_names = ["dtype", "N"]
8282

83-
def setup(self, index, N):
84-
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)))
83+
def setup(self, dtype, N):
84+
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)), dtype=dtype)
8585
np.random.shuffle(vals)
86-
indices = index(vals)
86+
indices = pd.Index(vals)
8787
self.data = pd.Series(np.arange(N), index=indices)
8888

8989
def time_loc_slice(self, index, N):

asv_bench/benchmarks/index_cached_properties.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def setup(self, index_type):
3030
elif index_type == "DatetimeIndex":
3131
self.idx = pd.date_range("1/1/2000", freq="T", periods=N)
3232
elif index_type == "Int64Index":
33-
self.idx = pd.Index(range(N))
33+
self.idx = pd.Index(range(N), dtype="int64")
3434
elif index_type == "PeriodIndex":
3535
self.idx = pd.period_range("1/1/2000", freq="T", periods=N)
3636
elif index_type == "RangeIndex":
@@ -40,9 +40,9 @@ def setup(self, index_type):
4040
elif index_type == "TimedeltaIndex":
4141
self.idx = pd.TimedeltaIndex(range(N))
4242
elif index_type == "Float64Index":
43-
self.idx = pd.Float64Index(range(N))
43+
self.idx = pd.Index(range(N), dtype="float64")
4444
elif index_type == "UInt64Index":
45-
self.idx = pd.UInt64Index(range(N))
45+
self.idx = pd.Index(range(N), dtype="uint64")
4646
elif index_type == "CategoricalIndex":
4747
self.idx = pd.CategoricalIndex(range(N), range(N))
4848
else:

asv_bench/benchmarks/index_object.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
from pandas import (
66
DatetimeIndex,
7-
Float64Index,
87
Index,
98
IntervalIndex,
109
MultiIndex,
@@ -202,8 +201,8 @@ class Float64IndexMethod:
202201
# GH 13166
203202
def setup(self):
204203
N = 100_000
205-
a = np.arange(N)
206-
self.ind = Float64Index(a * 4.8000000418824129e-08)
204+
a = np.arange(N, dtype=np.float64)
205+
self.ind = Index(a * 4.8000000418824129e-08)
207206

208207
def time_get_loc(self):
209208
self.ind.get_loc(0)

asv_bench/benchmarks/indexing.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,10 @@
1212
from pandas import (
1313
CategoricalIndex,
1414
DataFrame,
15-
Float64Index,
16-
Int64Index,
15+
Index,
1716
IntervalIndex,
1817
MultiIndex,
1918
Series,
20-
UInt64Index,
2119
concat,
2220
date_range,
2321
option_context,
@@ -30,17 +28,17 @@
3028
class NumericSeriesIndexing:
3129

3230
params = [
33-
(Int64Index, UInt64Index, Float64Index),
31+
(np.int64, np.uint64, np.float64),
3432
("unique_monotonic_inc", "nonunique_monotonic_inc"),
3533
]
36-
param_names = ["index_dtype", "index_structure"]
34+
param_names = ["dtype", "index_structure"]
3735

38-
def setup(self, index, index_structure):
36+
def setup(self, dtype, index_structure):
3937
N = 10**6
4038
indices = {
41-
"unique_monotonic_inc": index(range(N)),
42-
"nonunique_monotonic_inc": index(
43-
list(range(55)) + [54] + list(range(55, N - 1))
39+
"unique_monotonic_inc": Index(range(N), dtype=dtype),
40+
"nonunique_monotonic_inc": Index(
41+
list(range(55)) + [54] + list(range(55, N - 1)), dtype=dtype
4442
),
4543
}
4644
self.data = Series(np.random.rand(N), index=indices[index_structure])
@@ -159,17 +157,17 @@ def time_boolean_rows_boolean(self):
159157
class DataFrameNumericIndexing:
160158

161159
params = [
162-
(Int64Index, UInt64Index, Float64Index),
160+
(np.int64, np.uint64, np.float64),
163161
("unique_monotonic_inc", "nonunique_monotonic_inc"),
164162
]
165-
param_names = ["index_dtype", "index_structure"]
163+
param_names = ["dtype", "index_structure"]
166164

167-
def setup(self, index, index_structure):
165+
def setup(self, dtype, index_structure):
168166
N = 10**5
169167
indices = {
170-
"unique_monotonic_inc": index(range(N)),
171-
"nonunique_monotonic_inc": index(
172-
list(range(55)) + [54] + list(range(55, N - 1))
168+
"unique_monotonic_inc": Index(range(N), dtype=dtype),
169+
"nonunique_monotonic_inc": Index(
170+
list(range(55)) + [54] + list(range(55, N - 1)), dtype=dtype
173171
),
174172
}
175173
self.idx_dupe = np.array(range(30)) * 99
@@ -201,7 +199,7 @@ class Take:
201199
def setup(self, index):
202200
N = 100000
203201
indexes = {
204-
"int": Int64Index(np.arange(N)),
202+
"int": Index(np.arange(N), dtype=np.int64),
205203
"datetime": date_range("2011-01-01", freq="S", periods=N),
206204
}
207205
index = indexes[index]

asv_bench/benchmarks/package.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def time_import(self):
1111
# measurement of the import time we actually care about,
1212
# without the subprocess or interpreter overhead
1313
cmd = [sys.executable, "-X", "importtime", "-c", "import pandas as pd"]
14-
p = subprocess.run(cmd, stderr=subprocess.PIPE)
14+
p = subprocess.run(cmd, stderr=subprocess.PIPE, check=True)
1515

1616
line = p.stderr.splitlines()[-1]
1717
field = line.split(b"|")[-2].strip()

doc/source/conf.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,14 @@ class AccessorCallableDocumenter(AccessorLevelDocumenter, MethodDocumenter):
583583
priority = 0.5
584584

585585
def format_name(self):
586-
return MethodDocumenter.format_name(self).rstrip(".__call__")
586+
if sys.version_info < (3, 9):
587+
# NOTE pyupgrade will remove this when we run it with --py39-plus
588+
# so don't remove the unnecessary `else` statement below
589+
from pandas.util._str_methods import removesuffix
590+
591+
return removesuffix(MethodDocumenter.format_name(self), ".__call__")
592+
else:
593+
return MethodDocumenter.format_name(self).removesuffix(".__call__")
587594

588595

589596
class PandasAutosummary(Autosummary):

doc/source/getting_started/install.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ depending on required compatibility.
415415
Dependency Minimum Version optional_extra Notes
416416
========================= ================== ================ =============================================================
417417
PyTables 3.6.1 hdf5 HDF5-based reading / writing
418-
blosc 1.21.0 hdf5 Compression for HDF5
418+
blosc 1.21.0 hdf5 Compression for HDF5; only available on ``conda``
419419
zlib hdf5 Compression for HDF5
420420
fastparquet 0.6.3 - Parquet reading / writing (pyarrow is default)
421421
pyarrow 6.0.0 parquet, feather Parquet, ORC, and feather reading / writing

doc/source/reference/indexing.rst

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,6 @@ Numeric Index
166166
:template: autosummary/class_without_autosummary.rst
167167

168168
RangeIndex
169-
Int64Index
170-
UInt64Index
171-
Float64Index
172169

173170
.. We need this autosummary so that the methods are generated.
174171
.. Separate block, since they aren't classes.

doc/source/user_guide/merging.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,7 @@ It's not a stretch to see how this can be very useful. More detail on this
151151
functionality below.
152152

153153
.. note::
154-
It is worth noting that :func:`~pandas.concat` (and therefore
155-
:func:`~pandas.append`) makes a full copy of the data, and that constantly
154+
It is worth noting that :func:`~pandas.concat` makes a full copy of the data, and that constantly
156155
reusing this function can create a significant performance hit. If you need
157156
to use the operation over several datasets, use a list comprehension.
158157

doc/source/whatsnew/v1.5.0.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1155,7 +1155,6 @@ Plotting
11551155
- Bug in :meth:`DataFrame.boxplot` that prevented passing in ``xlabel`` and ``ylabel`` (:issue:`45463`)
11561156
- Bug in :meth:`DataFrame.boxplot` that prevented specifying ``vert=False`` (:issue:`36918`)
11571157
- Bug in :meth:`DataFrame.plot.scatter` that prevented specifying ``norm`` (:issue:`45809`)
1158-
- The function :meth:`DataFrame.plot.scatter` now accepts ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` for consistency to other plotting functions (:issue:`44670`)
11591158
- Fix showing "None" as ylabel in :meth:`Series.plot` when not setting ylabel (:issue:`46129`)
11601159
- Bug in :meth:`DataFrame.plot` that led to xticks and vertical grids being improperly placed when plotting a quarterly series (:issue:`47602`)
11611160
- Bug in :meth:`DataFrame.plot` that prevented setting y-axis label, limits and ticks for a secondary y-axis (:issue:`47753`)

doc/source/whatsnew/v1.5.2.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,20 @@ Fixed regressions
1515
~~~~~~~~~~~~~~~~~
1616
- Fixed regression in :meth:`MultiIndex.join` for extension array dtypes (:issue:`49277`)
1717
- Fixed regression in :meth:`Series.replace` raising ``RecursionError`` with numeric dtype and when specifying ``value=None`` (:issue:`45725`)
18+
- Fixed regression in arithmetic operations for :class:`DataFrame` with :class:`MultiIndex` columns with different dtypes (:issue:`49769`)
1819
- Fixed regression in :meth:`DataFrame.plot` preventing :class:`~matplotlib.colors.Colormap` instance
1920
from being passed using the ``colormap`` argument if Matplotlib 3.6+ is used (:issue:`49374`)
2021
- Fixed regression in :func:`date_range` returning an invalid set of periods for ``CustomBusinessDay`` frequency and ``start`` date with timezone (:issue:`49441`)
2122
- Fixed performance regression in groupby operations (:issue:`49676`)
22-
-
23+
- Fixed regression in :class:`Timedelta` constructor returning object of wrong type when subclassing ``Timedelta`` (:issue:`49579`)
2324

2425
.. ---------------------------------------------------------------------------
2526
.. _whatsnew_152.bug_fixes:
2627

2728
Bug fixes
2829
~~~~~~~~~
2930
- Bug in the Copy-on-Write implementation losing track of views in certain chained indexing cases (:issue:`48996`)
31+
- Fixed memory leak in :meth:`.Styler.to_excel` (:issue:`49751`)
3032
- Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`)
3133
-
3234

@@ -35,7 +37,7 @@ Bug fixes
3537

3638
Other
3739
~~~~~
38-
-
40+
- Reverted ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`)
3941
-
4042

4143
.. ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)