Skip to content

Commit 0439322

Browse files
Merge branch 'main' into raise-on-parse-int-overflow
2 parents a545602 + dec9be2 commit 0439322

File tree

490 files changed

+5511
-11972
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

490 files changed

+5511
-11972
lines changed

.github/workflows/scorecards.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
persist-credentials: false
3030

3131
- name: "Run analysis"
32-
uses: ossf/scorecard-action@v2.0.3
32+
uses: ossf/scorecard-action@v2.0.6
3333
with:
3434
results_file: results.sarif
3535
results_format: sarif

.github/workflows/wheels.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
- [windows-2019, win_amd64]
5353
- [windows-2019, win32]
5454
# TODO: support PyPy?
55-
python: [["cp38", "3.8"], ["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11-dev"]]# "pp38", "pp39"]
55+
python: [["cp38", "3.8"], ["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]# "pp38", "pp39"]
5656
env:
5757
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
5858
IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
@@ -73,7 +73,7 @@ jobs:
7373
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
7474

7575
# Used to test the built wheels
76-
- uses: actions/setup-python@v3
76+
- uses: actions/setup-python@v4
7777
with:
7878
python-version: ${{ matrix.python[1] }}
7979

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ repos:
102102
types: [python]
103103
stages: [manual]
104104
additional_dependencies: &pyright_dependencies
105-
- pyright@1.1.264
105+
- pyright@1.1.276
106106
- id: pyright_reportGeneralTypeIssues
107107
# note: assumes python env is setup and activated
108108
name: pyright reportGeneralTypeIssues

asv_bench/asv.conf.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
"openpyxl": [],
5555
"xlsxwriter": [],
5656
"xlrd": [],
57-
"xlwt": [],
5857
"odfpy": [],
5958
"jinja2": [],
6059
},

asv_bench/benchmarks/array.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,24 @@ def time_from_integer_array(self):
4444
pd.array(self.values_integer, dtype="Int64")
4545

4646

47+
class StringArray:
48+
def setup(self):
49+
N = 100_000
50+
values = tm.rands_array(3, N)
51+
self.values_obj = np.array(values, dtype="object")
52+
self.values_str = np.array(values, dtype="U")
53+
self.values_list = values.tolist()
54+
55+
def time_from_np_object_array(self):
56+
pd.array(self.values_obj, dtype="string")
57+
58+
def time_from_np_str_array(self):
59+
pd.array(self.values_str, dtype="string")
60+
61+
def time_from_list(self):
62+
pd.array(self.values_list, dtype="string")
63+
64+
4765
class ArrowStringArray:
4866

4967
params = [False, True]

asv_bench/benchmarks/groupby.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Timestamp,
1515
date_range,
1616
period_range,
17+
to_timedelta,
1718
)
1819

1920
from .pandas_vb_common import tm
@@ -35,7 +36,6 @@
3536
"pct_change",
3637
"min",
3738
"var",
38-
"mad",
3939
"describe",
4040
"std",
4141
"quantile",
@@ -52,7 +52,6 @@
5252
"cummax",
5353
"pct_change",
5454
"var",
55-
"mad",
5655
"describe",
5756
"std",
5857
},
@@ -311,7 +310,7 @@ def time_different_python_functions_multicol(self, df):
311310
df.groupby(["key1", "key2"]).agg([sum, min, max])
312311

313312
def time_different_python_functions_singlecol(self, df):
314-
df.groupby("key1").agg([sum, min, max])
313+
df.groupby("key1")[["value1", "value2", "value3"]].agg([sum, min, max])
315314

316315

317316
class GroupStrings:
@@ -437,7 +436,6 @@ class GroupByMethods:
437436
"first",
438437
"head",
439438
"last",
440-
"mad",
441439
"max",
442440
"min",
443441
"median",
@@ -483,7 +481,7 @@ def setup(self, dtype, method, application, ncols):
483481

484482
if method == "describe":
485483
ngroups = 20
486-
elif method in ["mad", "skew"]:
484+
elif method == "skew":
487485
ngroups = 100
488486
else:
489487
ngroups = 1000
@@ -685,7 +683,7 @@ class String:
685683
def setup(self, dtype, method):
686684
cols = list("abcdefghjkl")
687685
self.df = DataFrame(
688-
np.random.randint(0, 100, size=(1_000_000, len(cols))),
686+
np.random.randint(0, 100, size=(10_000, len(cols))),
689687
columns=cols,
690688
dtype=dtype,
691689
)
@@ -990,4 +988,31 @@ def time_sample_weights(self):
990988
self.df.groupby(self.groups).sample(n=1, weights=self.weights)
991989

992990

991+
class Resample:
992+
# GH 28635
993+
def setup(self):
994+
num_timedeltas = 20_000
995+
num_groups = 3
996+
997+
index = MultiIndex.from_product(
998+
[
999+
np.arange(num_groups),
1000+
to_timedelta(np.arange(num_timedeltas), unit="s"),
1001+
],
1002+
names=["groups", "timedeltas"],
1003+
)
1004+
data = np.random.randint(0, 1000, size=(len(index)))
1005+
1006+
self.df = DataFrame(data, index=index).reset_index("timedeltas")
1007+
self.df_multiindex = DataFrame(data, index=index)
1008+
1009+
def time_resample(self):
1010+
self.df.groupby(level="groups").resample("10s", on="timedeltas").mean()
1011+
1012+
def time_resample_multiindex(self):
1013+
self.df_multiindex.groupby(level="groups").resample(
1014+
"10s", level="timedeltas"
1015+
).mean()
1016+
1017+
9931018
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/io/excel.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def _generate_dataframe():
3333

3434
class WriteExcel:
3535

36-
params = ["openpyxl", "xlsxwriter", "xlwt"]
36+
params = ["openpyxl", "xlsxwriter"]
3737
param_names = ["engine"]
3838

3939
def setup(self, engine):
@@ -68,10 +68,9 @@ def time_write_excel_style(self, engine):
6868

6969
class ReadExcel:
7070

71-
params = ["xlrd", "openpyxl", "odf"]
71+
params = ["openpyxl", "odf"]
7272
param_names = ["engine"]
7373
fname_excel = "spreadsheet.xlsx"
74-
fname_excel_xls = "spreadsheet.xls"
7574
fname_odf = "spreadsheet.ods"
7675

7776
def _create_odf(self):
@@ -92,13 +91,10 @@ def setup_cache(self):
9291
self.df = _generate_dataframe()
9392

9493
self.df.to_excel(self.fname_excel, sheet_name="Sheet1")
95-
self.df.to_excel(self.fname_excel_xls, sheet_name="Sheet1")
9694
self._create_odf()
9795

9896
def time_read_excel(self, engine):
99-
if engine == "xlrd":
100-
fname = self.fname_excel_xls
101-
elif engine == "odf":
97+
if engine == "odf":
10298
fname = self.fname_odf
10399
else:
104100
fname = self.fname_excel
@@ -107,9 +103,7 @@ def time_read_excel(self, engine):
107103

108104
class ReadExcelNRows(ReadExcel):
109105
def time_read_excel(self, engine):
110-
if engine == "xlrd":
111-
fname = self.fname_excel_xls
112-
elif engine == "odf":
106+
if engine == "odf":
113107
fname = self.fname_odf
114108
else:
115109
fname = self.fname_excel

asv_bench/benchmarks/io/sql.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def setup(self, connection):
3838
},
3939
index=tm.makeStringIndex(N),
4040
)
41-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
41+
self.df.iloc[1000:3000, 1] = np.nan
4242
self.df["date"] = self.df["datetime"].dt.date
4343
self.df["time"] = self.df["datetime"].dt.time
4444
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -88,7 +88,7 @@ def setup(self, connection, dtype):
8888
},
8989
index=tm.makeStringIndex(N),
9090
)
91-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
91+
self.df.iloc[1000:3000, 1] = np.nan
9292
self.df["date"] = self.df["datetime"].dt.date
9393
self.df["time"] = self.df["datetime"].dt.time
9494
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -117,7 +117,7 @@ def setup(self):
117117
},
118118
index=tm.makeStringIndex(N),
119119
)
120-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
120+
self.df.iloc[1000:3000, 1] = np.nan
121121
self.df["date"] = self.df["datetime"].dt.date
122122
self.df["time"] = self.df["datetime"].dt.time
123123
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -164,7 +164,7 @@ def setup(self, dtype):
164164
},
165165
index=tm.makeStringIndex(N),
166166
)
167-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
167+
self.df.iloc[1000:3000, 1] = np.nan
168168
self.df["date"] = self.df["datetime"].dt.date
169169
self.df["time"] = self.df["datetime"].dt.time
170170
self.df["datetime_string"] = self.df["datetime"].astype(str)

asv_bench/benchmarks/io/stata.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ def setup(self, convert_dates):
3838
)
3939
self.df["float32_"] = np.array(np.random.randn(N), dtype=np.float32)
4040
self.convert_dates = {"index": convert_dates}
41-
self.df.to_stata(self.fname, self.convert_dates)
41+
self.df.to_stata(self.fname, convert_dates=self.convert_dates)
4242

4343
def time_read_stata(self, convert_dates):
4444
read_stata(self.fname)
4545

4646
def time_write_stata(self, convert_dates):
47-
self.df.to_stata(self.fname, self.convert_dates)
47+
self.df.to_stata(self.fname, convert_dates=self.convert_dates)
4848

4949

5050
class StataMissing(Stata):
@@ -54,7 +54,7 @@ def setup(self, convert_dates):
5454
missing_data = np.random.randn(self.N)
5555
missing_data[missing_data < 0] = np.nan
5656
self.df[f"missing_{i}"] = missing_data
57-
self.df.to_stata(self.fname, self.convert_dates)
57+
self.df.to_stata(self.fname, convert_dates=self.convert_dates)
5858

5959

6060
from ..pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/io/style.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ def _style_format(self):
8383
def _style_apply_format_hide(self):
8484
self.st = self.df.style.applymap(lambda v: "color: red;")
8585
self.st.format("{:.3f}")
86-
self.st.hide_index(self.st.index[1:])
87-
self.st.hide_columns(self.st.columns[1:])
86+
self.st.hide(self.st.index[1:], axis=0)
87+
self.st.hide(self.st.columns[1:], axis=1)
8888

8989
def _style_tooltips(self):
9090
ttips = DataFrame("abc", index=self.df.index[::2], columns=self.df.columns[::2])
9191
self.st = self.df.style.set_tooltips(ttips)
92-
self.st.hide_index(self.st.index[12:])
93-
self.st.hide_columns(self.st.columns[12:])
92+
self.st.hide(self.st.index[12:], axis=0)
93+
self.st.hide(self.st.columns[12:], axis=1)

asv_bench/benchmarks/join_merge.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,13 @@ class ConcatIndexDtype:
9797

9898
params = (
9999
["datetime64[ns]", "int64", "Int64", "string[python]", "string[pyarrow]"],
100+
["monotonic", "non_monotonic", "has_na"],
100101
[0, 1],
101102
[True, False],
102-
[True, False],
103103
)
104-
param_names = ["dtype", "axis", "sort", "is_monotonic"]
104+
param_names = ["dtype", "structure", "axis", "sort"]
105105

106-
def setup(self, dtype, axis, sort, is_monotonic):
106+
def setup(self, dtype, structure, axis, sort):
107107
N = 10_000
108108
if dtype == "datetime64[ns]":
109109
vals = date_range("1970-01-01", periods=N)
@@ -115,14 +115,21 @@ def setup(self, dtype, axis, sort, is_monotonic):
115115
raise NotImplementedError
116116

117117
idx = Index(vals, dtype=dtype)
118-
if is_monotonic:
118+
119+
if structure == "monotonic":
119120
idx = idx.sort_values()
120-
else:
121+
elif structure == "non_monotonic":
121122
idx = idx[::-1]
123+
elif structure == "has_na":
124+
if not idx._can_hold_na:
125+
raise NotImplementedError
126+
idx = Index([None], dtype=dtype).append(idx)
127+
else:
128+
raise NotImplementedError
122129

123-
self.series = [Series(i, idx[i:]) for i in range(5)]
130+
self.series = [Series(i, idx[:-i]) for i in range(1, 6)]
124131

125-
def time_concat_series(self, dtype, axis, sort, is_monotonic):
132+
def time_concat_series(self, dtype, structure, axis, sort):
126133
concat(self.series, axis=axis, sort=sort)
127134

128135

asv_bench/benchmarks/reshape.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def setup(self):
3636
self.df = DataFrame(data)
3737

3838
def time_reshape_pivot_time_series(self):
39-
self.df.pivot("date", "variable", "value")
39+
self.df.pivot(index="date", columns="variable", values="value")
4040

4141

4242
class SimpleReshape:

asv_bench/benchmarks/stat_ops.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import pandas as pd
44

5-
ops = ["mean", "sum", "median", "std", "skew", "kurt", "mad", "prod", "sem", "var"]
5+
ops = ["mean", "sum", "median", "std", "skew", "kurt", "prod", "sem", "var"]
66

77

88
class FrameOps:
@@ -11,9 +11,6 @@ class FrameOps:
1111
param_names = ["op", "dtype", "axis"]
1212

1313
def setup(self, op, dtype, axis):
14-
if op == "mad" and dtype == "Int64":
15-
# GH-33036, GH#33600
16-
raise NotImplementedError
1714
values = np.random.randn(100000, 4)
1815
if dtype == "Int64":
1916
values = values.astype(int)

asv_bench/benchmarks/tslibs/offsets.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,8 @@ def setup(self, offset):
7171
self.date = datetime(2011, 1, 1)
7272
self.dt64 = np.datetime64("2011-01-01 09:00Z")
7373

74-
def time_apply(self, offset):
75-
offset.apply(self.date)
76-
77-
def time_apply_np_dt64(self, offset):
78-
offset.apply(self.dt64)
74+
def time_add_np_dt64(self, offset):
75+
offset + self.dt64
7976

8077
def time_add(self, offset):
8178
self.date + offset

ci/code_checks.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ import pandas
4747
4848
blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
4949
'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
50-
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
50+
'tables', 'urllib.request', 'xlrd', 'xlsxwriter'}
5151
5252
# GH#28227 for some of these check for top-level modules, while others are
5353
# more specific (e.g. urllib.request)

ci/deps/actions-310.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

ci/deps/actions-38-downstream_compat.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard
5655

5756
# downstream packages

0 commit comments

Comments
 (0)