Skip to content

Commit 1d9f274

Browse files
authored
Merge branch 'main' into implementation-pdep-4
2 parents 060835d + 8564b70 commit 1d9f274

File tree

282 files changed

+2384
-5788
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

282 files changed

+2384
-5788
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ repos:
102102
types: [python]
103103
stages: [manual]
104104
additional_dependencies: &pyright_dependencies
105-
- pyright@1.1.264
105+
- pyright@1.1.276
106106
- id: pyright_reportGeneralTypeIssues
107107
# note: assumes python env is setup and activated
108108
name: pyright reportGeneralTypeIssues

asv_bench/asv.conf.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
"openpyxl": [],
5555
"xlsxwriter": [],
5656
"xlrd": [],
57-
"xlwt": [],
5857
"odfpy": [],
5958
"jinja2": [],
6059
},

asv_bench/benchmarks/groupby.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Timestamp,
1515
date_range,
1616
period_range,
17+
to_timedelta,
1718
)
1819

1920
from .pandas_vb_common import tm
@@ -35,7 +36,6 @@
3536
"pct_change",
3637
"min",
3738
"var",
38-
"mad",
3939
"describe",
4040
"std",
4141
"quantile",
@@ -52,7 +52,6 @@
5252
"cummax",
5353
"pct_change",
5454
"var",
55-
"mad",
5655
"describe",
5756
"std",
5857
},
@@ -437,7 +436,6 @@ class GroupByMethods:
437436
"first",
438437
"head",
439438
"last",
440-
"mad",
441439
"max",
442440
"min",
443441
"median",
@@ -483,7 +481,7 @@ def setup(self, dtype, method, application, ncols):
483481

484482
if method == "describe":
485483
ngroups = 20
486-
elif method in ["mad", "skew"]:
484+
elif method == "skew":
487485
ngroups = 100
488486
else:
489487
ngroups = 1000
@@ -990,4 +988,31 @@ def time_sample_weights(self):
990988
self.df.groupby(self.groups).sample(n=1, weights=self.weights)
991989

992990

991+
class Resample:
992+
# GH 28635
993+
def setup(self):
994+
num_timedeltas = 20_000
995+
num_groups = 3
996+
997+
index = MultiIndex.from_product(
998+
[
999+
np.arange(num_groups),
1000+
to_timedelta(np.arange(num_timedeltas), unit="s"),
1001+
],
1002+
names=["groups", "timedeltas"],
1003+
)
1004+
data = np.random.randint(0, 1000, size=(len(index)))
1005+
1006+
self.df = DataFrame(data, index=index).reset_index("timedeltas")
1007+
self.df_multiindex = DataFrame(data, index=index)
1008+
1009+
def time_resample(self):
1010+
self.df.groupby(level="groups").resample("10s", on="timedeltas").mean()
1011+
1012+
def time_resample_multiindex(self):
1013+
self.df_multiindex.groupby(level="groups").resample(
1014+
"10s", level="timedeltas"
1015+
).mean()
1016+
1017+
9931018
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/io/excel.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def _generate_dataframe():
3333

3434
class WriteExcel:
3535

36-
params = ["openpyxl", "xlsxwriter", "xlwt"]
36+
params = ["openpyxl", "xlsxwriter"]
3737
param_names = ["engine"]
3838

3939
def setup(self, engine):
@@ -68,10 +68,9 @@ def time_write_excel_style(self, engine):
6868

6969
class ReadExcel:
7070

71-
params = ["xlrd", "openpyxl", "odf"]
71+
params = ["openpyxl", "odf"]
7272
param_names = ["engine"]
7373
fname_excel = "spreadsheet.xlsx"
74-
fname_excel_xls = "spreadsheet.xls"
7574
fname_odf = "spreadsheet.ods"
7675

7776
def _create_odf(self):
@@ -92,13 +91,10 @@ def setup_cache(self):
9291
self.df = _generate_dataframe()
9392

9493
self.df.to_excel(self.fname_excel, sheet_name="Sheet1")
95-
self.df.to_excel(self.fname_excel_xls, sheet_name="Sheet1")
9694
self._create_odf()
9795

9896
def time_read_excel(self, engine):
99-
if engine == "xlrd":
100-
fname = self.fname_excel_xls
101-
elif engine == "odf":
97+
if engine == "odf":
10298
fname = self.fname_odf
10399
else:
104100
fname = self.fname_excel
@@ -107,9 +103,7 @@ def time_read_excel(self, engine):
107103

108104
class ReadExcelNRows(ReadExcel):
109105
def time_read_excel(self, engine):
110-
if engine == "xlrd":
111-
fname = self.fname_excel_xls
112-
elif engine == "odf":
106+
if engine == "odf":
113107
fname = self.fname_odf
114108
else:
115109
fname = self.fname_excel

asv_bench/benchmarks/io/stata.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ def setup(self, convert_dates):
3838
)
3939
self.df["float32_"] = np.array(np.random.randn(N), dtype=np.float32)
4040
self.convert_dates = {"index": convert_dates}
41-
self.df.to_stata(self.fname, self.convert_dates)
41+
self.df.to_stata(self.fname, convert_dates=self.convert_dates)
4242

4343
def time_read_stata(self, convert_dates):
4444
read_stata(self.fname)
4545

4646
def time_write_stata(self, convert_dates):
47-
self.df.to_stata(self.fname, self.convert_dates)
47+
self.df.to_stata(self.fname, convert_dates=self.convert_dates)
4848

4949

5050
class StataMissing(Stata):
@@ -54,7 +54,7 @@ def setup(self, convert_dates):
5454
missing_data = np.random.randn(self.N)
5555
missing_data[missing_data < 0] = np.nan
5656
self.df[f"missing_{i}"] = missing_data
57-
self.df.to_stata(self.fname, self.convert_dates)
57+
self.df.to_stata(self.fname, convert_dates=self.convert_dates)
5858

5959

6060
from ..pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/join_merge.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,13 @@ class ConcatIndexDtype:
9797

9898
params = (
9999
["datetime64[ns]", "int64", "Int64", "string[python]", "string[pyarrow]"],
100+
["monotonic", "non_monotonic", "has_na"],
100101
[0, 1],
101102
[True, False],
102-
[True, False],
103103
)
104-
param_names = ["dtype", "axis", "sort", "is_monotonic"]
104+
param_names = ["dtype", "structure", "axis", "sort"]
105105

106-
def setup(self, dtype, axis, sort, is_monotonic):
106+
def setup(self, dtype, structure, axis, sort):
107107
N = 10_000
108108
if dtype == "datetime64[ns]":
109109
vals = date_range("1970-01-01", periods=N)
@@ -115,14 +115,21 @@ def setup(self, dtype, axis, sort, is_monotonic):
115115
raise NotImplementedError
116116

117117
idx = Index(vals, dtype=dtype)
118-
if is_monotonic:
118+
119+
if structure == "monotonic":
119120
idx = idx.sort_values()
120-
else:
121+
elif structure == "non_monotonic":
121122
idx = idx[::-1]
123+
elif structure == "has_na":
124+
if not idx._can_hold_na:
125+
raise NotImplementedError
126+
idx = Index([None], dtype=dtype).append(idx)
127+
else:
128+
raise NotImplementedError
122129

123-
self.series = [Series(i, idx[i:]) for i in range(5)]
130+
self.series = [Series(i, idx[:-i]) for i in range(1, 6)]
124131

125-
def time_concat_series(self, dtype, axis, sort, is_monotonic):
132+
def time_concat_series(self, dtype, structure, axis, sort):
126133
concat(self.series, axis=axis, sort=sort)
127134

128135

asv_bench/benchmarks/stat_ops.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import pandas as pd
44

5-
ops = ["mean", "sum", "median", "std", "skew", "kurt", "mad", "prod", "sem", "var"]
5+
ops = ["mean", "sum", "median", "std", "skew", "kurt", "prod", "sem", "var"]
66

77

88
class FrameOps:
@@ -11,9 +11,6 @@ class FrameOps:
1111
param_names = ["op", "dtype", "axis"]
1212

1313
def setup(self, op, dtype, axis):
14-
if op == "mad" and dtype == "Int64":
15-
# GH-33036, GH#33600
16-
raise NotImplementedError
1714
values = np.random.randn(100000, 4)
1815
if dtype == "Int64":
1916
values = values.astype(int)

asv_bench/benchmarks/tslibs/offsets.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,8 @@ def setup(self, offset):
7171
self.date = datetime(2011, 1, 1)
7272
self.dt64 = np.datetime64("2011-01-01 09:00Z")
7373

74-
def time_apply(self, offset):
75-
offset.apply(self.date)
76-
77-
def time_apply_np_dt64(self, offset):
78-
offset.apply(self.dt64)
74+
def time_add_np_dt64(self, offset):
75+
offset + self.dt64
7976

8077
def time_add(self, offset):
8178
self.date + offset

ci/code_checks.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ import pandas
4747
4848
blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
4949
'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
50-
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
50+
'tables', 'urllib.request', 'xlrd', 'xlsxwriter'}
5151
5252
# GH#28227 for some of these check for top-level modules, while others are
5353
# more specific (e.g. urllib.request)

ci/deps/actions-310.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

ci/deps/actions-38-downstream_compat.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard
5655

5756
# downstream packages

ci/deps/actions-38-minimum_versions.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,5 +53,4 @@ dependencies:
5353
- xarray=0.19.0
5454
- xlrd=2.0.1
5555
- xlsxwriter=1.4.3
56-
- xlwt=1.3.0
5756
- zstandard=0.15.2

ci/deps/actions-38.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,4 @@ dependencies:
5050
- xarray
5151
- xlrd
5252
- xlsxwriter
53-
- xlwt
5453
- zstandard

ci/deps/actions-39.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

ci/deps/circle-38-arm64.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

0 commit comments

Comments
 (0)