Skip to content

Commit d88e60c

Browse files
committed
Merge branch 'main' into depr-sparse-astype
2 parents e1824cf + 3872572 commit d88e60c

File tree

168 files changed

+695
-2438
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

168 files changed

+695
-2438
lines changed

asv_bench/asv.conf.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
"openpyxl": [],
5555
"xlsxwriter": [],
5656
"xlrd": [],
57-
"xlwt": [],
5857
"odfpy": [],
5958
"jinja2": [],
6059
},

asv_bench/benchmarks/io/excel.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def _generate_dataframe():
3333

3434
class WriteExcel:
3535

36-
params = ["openpyxl", "xlsxwriter", "xlwt"]
36+
params = ["openpyxl", "xlsxwriter"]
3737
param_names = ["engine"]
3838

3939
def setup(self, engine):
@@ -68,10 +68,9 @@ def time_write_excel_style(self, engine):
6868

6969
class ReadExcel:
7070

71-
params = ["xlrd", "openpyxl", "odf"]
71+
params = ["openpyxl", "odf"]
7272
param_names = ["engine"]
7373
fname_excel = "spreadsheet.xlsx"
74-
fname_excel_xls = "spreadsheet.xls"
7574
fname_odf = "spreadsheet.ods"
7675

7776
def _create_odf(self):
@@ -92,13 +91,10 @@ def setup_cache(self):
9291
self.df = _generate_dataframe()
9392

9493
self.df.to_excel(self.fname_excel, sheet_name="Sheet1")
95-
self.df.to_excel(self.fname_excel_xls, sheet_name="Sheet1")
9694
self._create_odf()
9795

9896
def time_read_excel(self, engine):
99-
if engine == "xlrd":
100-
fname = self.fname_excel_xls
101-
elif engine == "odf":
97+
if engine == "odf":
10298
fname = self.fname_odf
10399
else:
104100
fname = self.fname_excel
@@ -107,9 +103,7 @@ def time_read_excel(self, engine):
107103

108104
class ReadExcelNRows(ReadExcel):
109105
def time_read_excel(self, engine):
110-
if engine == "xlrd":
111-
fname = self.fname_excel_xls
112-
elif engine == "odf":
106+
if engine == "odf":
113107
fname = self.fname_odf
114108
else:
115109
fname = self.fname_excel

asv_bench/benchmarks/io/sql.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def setup(self, connection):
3838
},
3939
index=tm.makeStringIndex(N),
4040
)
41-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
41+
self.df.iloc[1000:3000, 1] = np.nan
4242
self.df["date"] = self.df["datetime"].dt.date
4343
self.df["time"] = self.df["datetime"].dt.time
4444
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -88,7 +88,7 @@ def setup(self, connection, dtype):
8888
},
8989
index=tm.makeStringIndex(N),
9090
)
91-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
91+
self.df.iloc[1000:3000, 1] = np.nan
9292
self.df["date"] = self.df["datetime"].dt.date
9393
self.df["time"] = self.df["datetime"].dt.time
9494
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -117,7 +117,7 @@ def setup(self):
117117
},
118118
index=tm.makeStringIndex(N),
119119
)
120-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
120+
self.df.iloc[1000:3000, 1] = np.nan
121121
self.df["date"] = self.df["datetime"].dt.date
122122
self.df["time"] = self.df["datetime"].dt.time
123123
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -164,7 +164,7 @@ def setup(self, dtype):
164164
},
165165
index=tm.makeStringIndex(N),
166166
)
167-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
167+
self.df.iloc[1000:3000, 1] = np.nan
168168
self.df["date"] = self.df["datetime"].dt.date
169169
self.df["time"] = self.df["datetime"].dt.time
170170
self.df["datetime_string"] = self.df["datetime"].astype(str)

asv_bench/benchmarks/reshape.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def setup(self):
3636
self.df = DataFrame(data)
3737

3838
def time_reshape_pivot_time_series(self):
39-
self.df.pivot("date", "variable", "value")
39+
self.df.pivot(index="date", columns="variable", values="value")
4040

4141

4242
class SimpleReshape:

ci/code_checks.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ import pandas
4747
4848
blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
4949
'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
50-
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
50+
'tables', 'urllib.request', 'xlrd', 'xlsxwriter'}
5151
5252
# GH#28227 for some of these check for top-level modules, while others are
5353
# more specific (e.g. urllib.request)

ci/deps/actions-310.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

ci/deps/actions-38-downstream_compat.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard
5655

5756
# downstream packages

ci/deps/actions-38-minimum_versions.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,5 +53,4 @@ dependencies:
5353
- xarray=0.19.0
5454
- xlrd=2.0.1
5555
- xlsxwriter=1.4.3
56-
- xlwt=1.3.0
5756
- zstandard=0.15.2

ci/deps/actions-38.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,4 @@ dependencies:
5050
- xarray
5151
- xlrd
5252
- xlsxwriter
53-
- xlwt
5453
- zstandard

ci/deps/actions-39.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

ci/deps/circle-38-arm64.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

doc/scripts/eval_performance.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
from timeit import repeat as timeit
2+
3+
import numpy as np
4+
import seaborn as sns
5+
6+
from pandas import DataFrame
7+
8+
setup_common = """from pandas import DataFrame
9+
from numpy.random import randn
10+
df = DataFrame(randn(%d, 3), columns=list('abc'))
11+
%s"""
12+
13+
setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'"
14+
15+
16+
def bench_with(n, times=10, repeat=3, engine="numexpr"):
17+
return (
18+
np.array(
19+
timeit(
20+
"df.eval(s, engine=%r)" % engine,
21+
setup=setup_common % (n, setup_with),
22+
repeat=repeat,
23+
number=times,
24+
)
25+
)
26+
/ times
27+
)
28+
29+
30+
setup_subset = "s = 'a <= b <= c ** 2 + b ** 2 - a and b > c'"
31+
32+
33+
def bench_subset(n, times=20, repeat=3, engine="numexpr"):
34+
return (
35+
np.array(
36+
timeit(
37+
"df.query(s, engine=%r)" % engine,
38+
setup=setup_common % (n, setup_subset),
39+
repeat=repeat,
40+
number=times,
41+
)
42+
)
43+
/ times
44+
)
45+
46+
47+
def bench(mn=3, mx=7, num=100, engines=("python", "numexpr"), verbose=False):
48+
r = np.logspace(mn, mx, num=num).round().astype(int)
49+
50+
ev = DataFrame(np.empty((num, len(engines))), columns=engines)
51+
qu = ev.copy(deep=True)
52+
53+
ev["size"] = qu["size"] = r
54+
55+
for engine in engines:
56+
for i, n in enumerate(r):
57+
if verbose & (i % 10 == 0):
58+
print("engine: %r, i == %d" % (engine, i))
59+
ev_times = bench_with(n, times=1, repeat=1, engine=engine)
60+
ev.loc[i, engine] = np.mean(ev_times)
61+
qu_times = bench_subset(n, times=1, repeat=1, engine=engine)
62+
qu.loc[i, engine] = np.mean(qu_times)
63+
64+
return ev, qu
65+
66+
67+
def plot_perf(df, engines, title, filename=None):
68+
from matplotlib.pyplot import figure
69+
70+
sns.set()
71+
sns.set_palette("Set2")
72+
73+
fig = figure(figsize=(4, 3), dpi=120)
74+
ax = fig.add_subplot(111)
75+
76+
for engine in engines:
77+
ax.loglog(df["size"], df[engine], label=engine, lw=2)
78+
79+
ax.set_xlabel("Number of Rows")
80+
ax.set_ylabel("Time (s)")
81+
ax.set_title(title)
82+
ax.legend(loc="best")
83+
ax.tick_params(top=False, right=False)
84+
85+
fig.tight_layout()
86+
87+
if filename is not None:
88+
fig.savefig(filename)
89+
90+
91+
if __name__ == "__main__":
92+
import os
93+
94+
pandas_dir = os.path.dirname(
95+
os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
96+
)
97+
static_path = os.path.join(pandas_dir, "doc", "source", "_static")
98+
99+
join = lambda p: os.path.join(static_path, p)
100+
101+
fn = join("eval-query-perf-data.h5")
102+
103+
engines = "python", "numexpr"
104+
105+
ev, qu = bench(verbose=True) # only this one
106+
107+
plot_perf(ev, engines, "DataFrame.eval()", filename=join("eval-perf.png"))
108+
plot_perf(qu, engines, "DataFrame.query()", filename=join("query-perf.png"))
-24.7 KB
Binary file not shown.

doc/source/_static/eval-perf.png

10.8 KB
Loading
-21.2 KB
Binary file not shown.

doc/source/_static/query-perf.png

8.79 KB
Loading

doc/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@
236236
if ".dev" in version:
237237
switcher_version = "dev"
238238
elif "rc" in version:
239-
switcher_version = version.split("rc")[0] + " (rc)"
239+
switcher_version = version.split("rc", maxsplit=1)[0] + " (rc)"
240240

241241
html_theme_options = {
242242
"external_links": [],

doc/source/development/contributing_environment.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ To test out code changes, you'll need to build pandas from source, which
1010
requires a C/C++ compiler and Python environment. If you're making documentation
1111
changes, you can skip to :ref:`contributing to the documentation <contributing_documentation>` but if you skip
1212
creating the development environment you won't be able to build the documentation
13-
locally before pushing your changes.
13+
locally before pushing your changes. It's recommended to also install the :ref:`pre-commit hooks <contributing.pre-commit>`.
1414

1515
.. contents:: Table of contents:
1616
:local:

doc/source/getting_started/install.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,6 @@ Can be managed as optional_extra with ``pandas[excel]``.
336336
Dependency Minimum Version optional_extra Notes
337337
========================= ================== =============== =============================================================
338338
xlrd 2.0.1 excel Reading Excel
339-
xlwt 1.3.0 excel Writing Excel
340339
xlsxwriter 1.4.3 excel Writing Excel
341340
openpyxl 3.0.7 excel Reading / writing for xlsx files
342341
pyxlsb 1.0.8 excel Reading for xlsb files

doc/source/getting_started/intro_tutorials/09_timeseries.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ I want to add a new column to the ``DataFrame`` containing only the month of the
144144
145145
By using ``Timestamp`` objects for dates, a lot of time-related
146146
properties are provided by pandas. For example the ``month``, but also
147-
``year``, ``weekofyear``, ``quarter``,… All of these properties are
147+
``year``, ``quarter``,… All of these properties are
148148
accessible by the ``dt`` accessor.
149149

150150
.. raw:: html

doc/source/reference/indexing.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,6 @@ Time/date components
343343
DatetimeIndex.timetz
344344
DatetimeIndex.dayofyear
345345
DatetimeIndex.day_of_year
346-
DatetimeIndex.weekofyear
347-
DatetimeIndex.week
348346
DatetimeIndex.dayofweek
349347
DatetimeIndex.day_of_week
350348
DatetimeIndex.weekday

doc/source/reference/series.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,8 +311,6 @@ Datetime properties
311311
Series.dt.second
312312
Series.dt.microsecond
313313
Series.dt.nanosecond
314-
Series.dt.week
315-
Series.dt.weekofyear
316314
Series.dt.dayofweek
317315
Series.dt.day_of_week
318316
Series.dt.weekday

doc/source/user_guide/categorical.rst

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -353,11 +353,6 @@ Renaming categories is done by using the
353353

354354
In contrast to R's ``factor``, categorical data can have categories of other types than string.
355355

356-
.. note::
357-
358-
Be aware that assigning new categories is an inplace operation, while most other operations
359-
under ``Series.cat`` per default return a new ``Series`` of dtype ``category``.
360-
361356
Categories must be unique or a ``ValueError`` is raised:
362357

363358
.. ipython:: python
@@ -952,7 +947,6 @@ categorical (categories and ordering). So if you read back the CSV file you have
952947
relevant columns back to ``category`` and assign the right categories and categories ordering.
953948

954949
.. ipython:: python
955-
:okwarning:
956950
957951
import io
958952
@@ -969,8 +963,8 @@ relevant columns back to ``category`` and assign the right categories and catego
969963
df2["cats"]
970964
# Redo the category
971965
df2["cats"] = df2["cats"].astype("category")
972-
df2["cats"].cat.set_categories(
973-
["very bad", "bad", "medium", "good", "very good"], inplace=True
966+
df2["cats"] = df2["cats"].cat.set_categories(
967+
["very bad", "bad", "medium", "good", "very good"]
974968
)
975969
df2.dtypes
976970
df2["cats"]
@@ -1162,16 +1156,12 @@ Constructing a ``Series`` from a ``Categorical`` will not copy the input
11621156
change the original ``Categorical``:
11631157

11641158
.. ipython:: python
1165-
:okwarning:
11661159
11671160
cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
11681161
s = pd.Series(cat, name="cat")
11691162
cat
11701163
s.iloc[0:2] = 10
11711164
cat
1172-
df = pd.DataFrame(s)
1173-
df["cat"].cat.categories = [1, 2, 3, 4, 5]
1174-
cat
11751165
11761166
Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categoricals``:
11771167

0 commit comments

Comments
 (0)