Skip to content

Commit 5c471b3

Browse files
Merge branch 'master' into GH36666
2 parents cdc5821 + 54fa3da commit 5c471b3

File tree

124 files changed

+2648
-1890
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+2648
-1890
lines changed

.pre-commit-config.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,15 @@ repos:
5353
types: [rst]
5454
args: [--filename=*.rst]
5555
additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
56+
- id: incorrect-sphinx-directives
57+
name: Check for incorrect Sphinx directives
58+
language: pygrep
59+
entry: >-
60+
\.\. (autosummary|contents|currentmodule|deprecated
61+
|function|image|important|include|ipython|literalinclude
62+
|math|module|note|raw|seealso|toctree|versionadded
63+
|versionchanged|warning):[^:]
64+
files: \.(py|pyx|rst)$
5665
- repo: https://github.com/asottile/yesqa
5766
rev: v1.2.2
5867
hooks:
@@ -61,4 +70,6 @@ repos:
6170
rev: v3.2.0
6271
hooks:
6372
- id: end-of-file-fixer
64-
exclude: '.html$|^LICENSES/|.csv$|.txt$|.svg$|.py$'
73+
exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
74+
- id: trailing-whitespace
75+
exclude: \.(html|svg)$

asv_bench/benchmarks/groupby.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,26 @@ def time_category_size(self):
358358
self.draws.groupby(self.cats).size()
359359

360360

361+
class FillNA:
362+
def setup(self):
363+
N = 100
364+
self.df = DataFrame(
365+
{"group": [1] * N + [2] * N, "value": [np.nan, 1.0] * N}
366+
).set_index("group")
367+
368+
def time_df_ffill(self):
369+
self.df.groupby("group").fillna(method="ffill")
370+
371+
def time_df_bfill(self):
372+
self.df.groupby("group").fillna(method="bfill")
373+
374+
def time_srs_ffill(self):
375+
self.df.groupby("group")["value"].fillna(method="ffill")
376+
377+
def time_srs_bfill(self):
378+
self.df.groupby("group")["value"].fillna(method="bfill")
379+
380+
361381
class GroupByMethods:
362382

363383
param_names = ["dtype", "method", "application"]

asv_bench/benchmarks/io/pickle.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,11 @@ def time_read_pickle(self):
2424
def time_write_pickle(self):
2525
self.df.to_pickle(self.fname)
2626

27+
def peakmem_read_pickle(self):
28+
read_pickle(self.fname)
29+
30+
def peakmem_write_pickle(self):
31+
self.df.to_pickle(self.fname)
32+
2733

2834
from ..pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/rolling.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,21 @@ class ExpandingMethods:
7676

7777
def setup(self, constructor, dtype, method):
7878
N = 10 ** 5
79+
N_groupby = 100
7980
arr = (100 * np.random.random(N)).astype(dtype)
8081
self.expanding = getattr(pd, constructor)(arr).expanding()
82+
self.expanding_groupby = (
83+
pd.DataFrame({"A": arr[:N_groupby], "B": range(N_groupby)})
84+
.groupby("B")
85+
.expanding()
86+
)
8187

8288
def time_expanding(self, constructor, dtype, method):
8389
getattr(self.expanding, method)()
8490

91+
def time_expanding_groupby(self, constructor, dtype, method):
92+
getattr(self.expanding_groupby, method)()
93+
8594

8695
class EWMMethods:
8796

asv_bench/benchmarks/timeseries.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@
33
import dateutil
44
import numpy as np
55

6-
from pandas import DataFrame, Series, date_range, period_range, to_datetime
6+
from pandas import (
7+
DataFrame,
8+
Series,
9+
date_range,
10+
period_range,
11+
timedelta_range,
12+
to_datetime,
13+
)
714

815
from pandas.tseries.frequencies import infer_freq
916

@@ -121,12 +128,15 @@ def time_convert(self):
121128

122129
class Iteration:
123130

124-
params = [date_range, period_range]
131+
params = [date_range, period_range, timedelta_range]
125132
param_names = ["time_index"]
126133

127134
def setup(self, time_index):
128135
N = 10 ** 6
129-
self.idx = time_index(start="20140101", freq="T", periods=N)
136+
if time_index is timedelta_range:
137+
self.idx = time_index(start=0, freq="T", periods=N)
138+
else:
139+
self.idx = time_index(start="20140101", freq="T", periods=N)
130140
self.exit = 10000
131141

132142
def time_iter(self, time_index):

ci/code_checks.sh

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
180180
invgrep -r -E --include '*.py' "[[:space:]] pytest.raises" pandas/tests/
181181
RET=$(($RET + $?)) ; echo $MSG "DONE"
182182

183-
MSG='Check for python2-style file encodings' ; echo $MSG
184-
invgrep -R --include="*.py" --include="*.pyx" -E "# -\*- coding: utf-8 -\*-" pandas scripts
185-
RET=$(($RET + $?)) ; echo $MSG "DONE"
186-
187-
MSG='Check for python2-style super usage' ; echo $MSG
188-
invgrep -R --include="*.py" -E "super\(\w*, (self|cls)\)" pandas
189-
RET=$(($RET + $?)) ; echo $MSG "DONE"
190-
191183
MSG='Check for use of builtin filter function' ; echo $MSG
192184
invgrep -R --include="*.py" -P '(?<!def)[\(\s]filter\(' pandas
193185
RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -206,18 +198,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
206198
invgrep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
207199
RET=$(($RET + $?)) ; echo $MSG "DONE"
208200

209-
MSG='Check for python2 new-style classes and for empty parentheses' ; echo $MSG
210-
invgrep -R --include="*.py" --include="*.pyx" -E "class\s\S*\((object)?\):" pandas asv_bench/benchmarks scripts
211-
RET=$(($RET + $?)) ; echo $MSG "DONE"
212-
213201
MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
214202
invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
215203
RET=$(($RET + $?)) ; echo $MSG "DONE"
216204

217-
MSG='Check for incorrect sphinx directives' ; echo $MSG
218-
invgrep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source
219-
RET=$(($RET + $?)) ; echo $MSG "DONE"
220-
221205
# Check for the following code in testing: `unittest.mock`, `mock.Mock()` or `mock.patch`
222206
MSG='Check that unittest.mock is not used (pytest builtin monkeypatch fixture should be used instead)' ; echo $MSG
223207
invgrep -r -E --include '*.py' '(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)' pandas/tests/
@@ -259,15 +243,15 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
259243
invgrep -R --include=*.{py,pyx} '\.__class__' pandas
260244
RET=$(($RET + $?)) ; echo $MSG "DONE"
261245

262-
MSG='Check for use of xrange instead of range' ; echo $MSG
263-
invgrep -R --include=*.{py,pyx} 'xrange' pandas
264-
RET=$(($RET + $?)) ; echo $MSG "DONE"
265-
266246
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
267247
INVGREP_APPEND=" <- trailing whitespaces found"
268248
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
269249
RET=$(($RET + $?)) ; echo $MSG "DONE"
270250
unset INVGREP_APPEND
251+
252+
MSG='Check code for instances of os.remove' ; echo $MSG
253+
invgrep -R --include="*.py*" --exclude "common.py" --exclude "test_writers.py" --exclude "test_store.py" -E "os\.remove" pandas/tests/
254+
RET=$(($RET + $?)) ; echo $MSG "DONE"
271255
fi
272256

273257
### CODE ###

doc/source/development/contributing.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ You will need `Build Tools for Visual Studio 2017
206206
scrolling down to "All downloads" -> "Tools for Visual Studio 2019".
207207
In the installer, select the "C++ build tools" workload.
208208

209-
**Mac OS**
209+
**macOS**
210210

211211
Information about compiler installation can be found here:
212212
https://devguide.python.org/setup/#macos
@@ -299,7 +299,7 @@ Creating a Python environment (pip)
299299
If you aren't using conda for your development environment, follow these instructions.
300300
You'll need to have at least Python 3.6.1 installed on your system.
301301

302-
**Unix**/**Mac OS with virtualenv**
302+
**Unix**/**macOS with virtualenv**
303303

304304
.. code-block:: bash
305305
@@ -318,7 +318,7 @@ You'll need to have at least Python 3.6.1 installed on your system.
318318
python setup.py build_ext --inplace -j 4
319319
python -m pip install -e . --no-build-isolation --no-use-pep517
320320
321-
**Unix**/**Mac OS with pyenv**
321+
**Unix**/**macOS with pyenv**
322322

323323
Consult the docs for setting up pyenv `here <https://github.com/pyenv/pyenv>`__.
324324

doc/source/getting_started/install.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ BeautifulSoup4 4.6.0 HTML parser for read_html (see :ref
262262
Jinja2 2.10 Conditional formatting with DataFrame.style
263263
PyQt4 Clipboard I/O
264264
PyQt5 Clipboard I/O
265-
PyTables 3.4.4 HDF5-based reading / writing
265+
PyTables 3.5.1 HDF5-based reading / writing
266266
SQLAlchemy 1.2.8 SQL support for databases other than sqlite
267267
SciPy 1.12.0 Miscellaneous statistical functions
268268
xlsxwriter 1.0.2 Excel writing
@@ -280,7 +280,6 @@ psycopg2 2.7 PostgreSQL engine for sqlalchemy
280280
pyarrow 0.15.0 Parquet, ORC, and feather reading / writing
281281
pymysql 0.7.11 MySQL engine for sqlalchemy
282282
pyreadstat SPSS files (.sav) reading
283-
pytables 3.5.1 HDF5 reading / writing
284283
pyxlsb 1.0.6 Reading for xlsb files
285284
qtpy Clipboard I/O
286285
s3fs 0.4.0 Amazon S3 access

doc/source/reference/series.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@ Attributes
2222
:toctree: api/
2323

2424
Series.index
25-
26-
.. autosummary::
27-
:toctree: api/
28-
2925
Series.array
3026
Series.values
3127
Series.dtype

doc/source/reference/window.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Standard moving window functions
3232
Rolling.apply
3333
Rolling.aggregate
3434
Rolling.quantile
35+
Rolling.sem
3536
Window.mean
3637
Window.sum
3738
Window.var
@@ -61,6 +62,7 @@ Standard expanding window functions
6162
Expanding.apply
6263
Expanding.aggregate
6364
Expanding.quantile
65+
Expanding.sem
6466

6567
Exponentially-weighted moving window functions
6668
----------------------------------------------

doc/source/user_guide/computation.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ We provide a number of common statistical functions:
328328
:meth:`~Rolling.apply`, Generic apply
329329
:meth:`~Rolling.cov`, Sample covariance (binary)
330330
:meth:`~Rolling.corr`, Sample correlation (binary)
331+
:meth:`~Rolling.sem`, Standard error of mean
331332

332333
.. _computation.window_variance.caveats:
333334

@@ -938,6 +939,7 @@ Method summary
938939
:meth:`~Expanding.apply`, Generic apply
939940
:meth:`~Expanding.cov`, Sample covariance (binary)
940941
:meth:`~Expanding.corr`, Sample correlation (binary)
942+
:meth:`~Expanding.sem`, Standard error of mean
941943

942944
.. note::
943945

doc/source/user_guide/io.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
2323
text;`JSON <https://www.json.org/>`__;:ref:`read_json<io.json_reader>`;:ref:`to_json<io.json_writer>`
2424
text;`HTML <https://en.wikipedia.org/wiki/HTML>`__;:ref:`read_html<io.read_html>`;:ref:`to_html<io.html>`
2525
text; Local clipboard;:ref:`read_clipboard<io.clipboard>`;:ref:`to_clipboard<io.clipboard>`
26-
;`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__;:ref:`read_excel<io.excel_reader>`;:ref:`to_excel<io.excel_writer>`
26+
binary;`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__;:ref:`read_excel<io.excel_reader>`;:ref:`to_excel<io.excel_writer>`
2727
binary;`OpenDocument <http://www.opendocumentformat.org>`__;:ref:`read_excel<io.ods>`;
2828
binary;`HDF5 Format <https://support.hdfgroup.org/HDF5/whatishdf5.html>`__;:ref:`read_hdf<io.hdf5>`;:ref:`to_hdf<io.hdf5>`
2929
binary;`Feather Format <https://github.com/wesm/feather>`__;:ref:`read_feather<io.feather>`;:ref:`to_feather<io.feather>`

doc/source/user_guide/text.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -302,10 +302,10 @@ positional argument (a regex object) and return a string.
302302
return m.group(0)[::-1]
303303
304304
305-
pd.Series(
306-
["foo 123", "bar baz", np.nan],
307-
dtype="string"
308-
).str.replace(pat, repl, regex=True)
305+
pd.Series(["foo 123", "bar baz", np.nan], dtype="string").str.replace(
306+
pat, repl, regex=True
307+
)
308+
309309
310310
# Using regex groups
311311
pat = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"

0 commit comments

Comments
 (0)