Skip to content

Commit dbc8253

Browse files
Merge remote-tracking branch 'upstream/master' into factorize
2 parents a251537 + 5bf5ae8 commit dbc8253

File tree

861 files changed

+9697
-3523
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

861 files changed

+9697
-3523
lines changed

.github/workflows/ci.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,12 @@ jobs:
153153
run: |
154154
source activate pandas-dev
155155
pytest pandas/tests/frame/methods --array-manager
156+
pytest pandas/tests/arithmetic/ --array-manager
157+
158+
# indexing subset (temporary since other tests don't pass yet)
159+
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean --array-manager
160+
pytest pandas/tests/frame/indexing/test_where.py --array-manager
161+
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_multi_index --array-manager
162+
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns --array-manager
163+
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups --array-manager
164+
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column --array-manager

.github/workflows/database.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,11 @@ jobs:
170170

171171
- name: Print skipped tests
172172
run: python ci/print_skipped.py
173+
174+
- name: Upload coverage to Codecov
175+
uses: codecov/codecov-action@v1
176+
with:
177+
files: /tmp/test_coverage.xml
178+
flags: unittests
179+
name: codecov-pandas
180+
fail_ci_if_error: true

.pre-commit-config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,12 @@ repos:
180180
language: pygrep
181181
types: [python]
182182
files: ^pandas/tests/
183+
- id: title-capitalization
184+
name: Validate correct capitalization among titles in documentation
185+
entry: python scripts/validate_rst_title_capitalization.py
186+
language: python
187+
types: [rst]
188+
files: ^doc/source/(development|reference)/
183189
- repo: https://github.com/asottile/yesqa
184190
rev: v1.2.2
185191
hooks:

MANIFEST.in

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,4 @@
1-
include MANIFEST.in
2-
include LICENSE
31
include RELEASE.md
4-
include README.md
5-
include setup.py
6-
include pyproject.toml
72

83
graft doc
94
prune doc/build
@@ -16,10 +11,12 @@ global-exclude *.bz2
1611
global-exclude *.csv
1712
global-exclude *.dta
1813
global-exclude *.feather
14+
global-exclude *.tar
1915
global-exclude *.gz
2016
global-exclude *.h5
2117
global-exclude *.html
2218
global-exclude *.json
19+
global-exclude *.jsonl
2320
global-exclude *.pickle
2421
global-exclude *.png
2522
global-exclude *.pyc
@@ -40,6 +37,11 @@ global-exclude .DS_Store
4037
global-exclude .git*
4138
global-exclude \#*
4239

40+
# GH 39321
41+
# csv_dir_path fixture checks the existence of the directory
42+
# exclude the whole directory to avoid running related tests in sdist
43+
prune pandas/tests/io/parser/data
44+
4345
include versioneer.py
4446
include pandas/_version.py
4547
include pandas/io/formats/templates/*.tpl

asv_bench/benchmarks/arithmetic.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,13 @@
44
import numpy as np
55

66
import pandas as pd
7-
from pandas import DataFrame, Series, Timestamp, date_range, to_timedelta
7+
from pandas import (
8+
DataFrame,
9+
Series,
10+
Timestamp,
11+
date_range,
12+
to_timedelta,
13+
)
814
import pandas._testing as tm
915
from pandas.core.algorithms import checked_add_with_arr
1016

@@ -110,16 +116,26 @@ class FrameWithFrameWide:
110116
operator.add,
111117
operator.floordiv,
112118
operator.gt,
113-
]
119+
],
120+
[
121+
# (n_rows, n_columns)
122+
(1_000_000, 10),
123+
(100_000, 100),
124+
(10_000, 1000),
125+
(1000, 10_000),
126+
],
114127
]
115-
param_names = ["op"]
128+
param_names = ["op", "shape"]
116129

117-
def setup(self, op):
130+
def setup(self, op, shape):
118131
# we choose dtypes so as to make the blocks
119132
# a) not perfectly match between right and left
120133
# b) appreciably bigger than single columns
121-
n_cols = 2000
122-
n_rows = 500
134+
n_rows, n_cols = shape
135+
136+
if op is operator.floordiv:
137+
# floordiv is much slower than the other operations -> use less data
138+
n_rows = n_rows // 10
123139

124140
# construct dataframe with 2 blocks
125141
arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8")
@@ -131,7 +147,7 @@ def setup(self, op):
131147
df._consolidate_inplace()
132148

133149
# TODO: GH#33198 the setting here shoudlnt need two steps
134-
arr1 = np.random.randn(n_rows, n_cols // 4).astype("f8")
150+
arr1 = np.random.randn(n_rows, max(n_cols // 4, 3)).astype("f8")
135151
arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
136152
arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")
137153
df2 = pd.concat(
@@ -145,11 +161,11 @@ def setup(self, op):
145161
self.left = df
146162
self.right = df2
147163

148-
def time_op_different_blocks(self, op):
164+
def time_op_different_blocks(self, op, shape):
149165
# blocks (and dtypes) are not aligned
150166
op(self.left, self.right)
151167

152-
def time_op_same_blocks(self, op):
168+
def time_op_same_blocks(self, op, shape):
153169
# blocks (and dtypes) are aligned
154170
op(self.left, self.left)
155171

asv_bench/benchmarks/ctors.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
import numpy as np
22

3-
from pandas import DatetimeIndex, Index, MultiIndex, Series, Timestamp
3+
from pandas import (
4+
DatetimeIndex,
5+
Index,
6+
MultiIndex,
7+
Series,
8+
Timestamp,
9+
)
410

511
from .pandas_vb_common import tm
612

asv_bench/benchmarks/dtypes.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
import pandas as pd
66
from pandas import DataFrame
77
import pandas._testing as tm
8-
from pandas.api.types import is_extension_array_dtype, pandas_dtype
8+
from pandas.api.types import (
9+
is_extension_array_dtype,
10+
pandas_dtype,
11+
)
912

1013
from .pandas_vb_common import (
1114
datetime_dtypes,

asv_bench/benchmarks/frame_ctor.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,21 @@
11
import numpy as np
22

33
import pandas as pd
4-
from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range
4+
from pandas import (
5+
DataFrame,
6+
MultiIndex,
7+
Series,
8+
Timestamp,
9+
date_range,
10+
)
511

612
from .pandas_vb_common import tm
713

814
try:
9-
from pandas.tseries.offsets import Hour, Nano
15+
from pandas.tseries.offsets import (
16+
Hour,
17+
Nano,
18+
)
1019
except ImportError:
1120
# For compatibility with older versions
1221
from pandas.core.datetools import * # noqa

asv_bench/benchmarks/frame_methods.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,15 @@
33

44
import numpy as np
55

6-
from pandas import DataFrame, MultiIndex, NaT, Series, date_range, isnull, period_range
6+
from pandas import (
7+
DataFrame,
8+
MultiIndex,
9+
NaT,
10+
Series,
11+
date_range,
12+
isnull,
13+
period_range,
14+
)
715

816
from .pandas_vb_common import tm
917

asv_bench/benchmarks/gil.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
import numpy as np
22

3-
from pandas import DataFrame, Series, date_range, factorize, read_csv
3+
from pandas import (
4+
DataFrame,
5+
Series,
6+
date_range,
7+
factorize,
8+
read_csv,
9+
)
410
from pandas.core.algorithms import take_nd
511

612
from .pandas_vb_common import tm

asv_bench/benchmarks/groupby.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,29 @@ def time_dtype_as_field(self, dtype, method, application):
461461
self.as_field_method()
462462

463463

464+
class GroupByCythonAgg:
465+
"""
466+
Benchmarks specifically targetting our cython aggregation algorithms
467+
(using a big enough dataframe with simple key, so a large part of the
468+
time is actually spent in the grouped aggregation).
469+
"""
470+
471+
param_names = ["dtype", "method"]
472+
params = [
473+
["float64"],
474+
["sum", "prod", "min", "max", "mean", "median", "var", "first", "last"],
475+
]
476+
477+
def setup(self, dtype, method):
478+
N = 1_000_000
479+
df = DataFrame(np.random.randn(N, 10), columns=list("abcdefghij"))
480+
df["key"] = np.random.randint(0, 100, size=N)
481+
self.df = df
482+
483+
def time_frame_agg(self, dtype, method):
484+
self.df.groupby("key").agg(method)
485+
486+
464487
class RankWithTies:
465488
# GH 21237
466489
param_names = ["dtype", "tie_method"]

asv_bench/benchmarks/inference.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
import numpy as np
22

3-
from pandas import Series, to_numeric
4-
5-
from .pandas_vb_common import lib, tm
3+
from pandas import (
4+
Series,
5+
to_numeric,
6+
)
7+
8+
from .pandas_vb_common import (
9+
lib,
10+
tm,
11+
)
612

713

814
class ToNumeric:

asv_bench/benchmarks/io/csv.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,24 @@
1-
from io import BytesIO, StringIO
1+
from io import (
2+
BytesIO,
3+
StringIO,
4+
)
25
import random
36
import string
47

58
import numpy as np
69

7-
from pandas import Categorical, DataFrame, date_range, read_csv, to_datetime
8-
9-
from ..pandas_vb_common import BaseIO, tm
10+
from pandas import (
11+
Categorical,
12+
DataFrame,
13+
date_range,
14+
read_csv,
15+
to_datetime,
16+
)
17+
18+
from ..pandas_vb_common import (
19+
BaseIO,
20+
tm,
21+
)
1022

1123

1224
class ToCSV(BaseIO):

asv_bench/benchmarks/io/excel.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,19 @@
22

33
import numpy as np
44
from odf.opendocument import OpenDocumentSpreadsheet
5-
from odf.table import Table, TableCell, TableRow
5+
from odf.table import (
6+
Table,
7+
TableCell,
8+
TableRow,
9+
)
610
from odf.text import P
711

8-
from pandas import DataFrame, ExcelWriter, date_range, read_excel
12+
from pandas import (
13+
DataFrame,
14+
ExcelWriter,
15+
date_range,
16+
read_excel,
17+
)
918

1019
from ..pandas_vb_common import tm
1120

asv_bench/benchmarks/io/hdf.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
11
import numpy as np
22

3-
from pandas import DataFrame, HDFStore, date_range, read_hdf
4-
5-
from ..pandas_vb_common import BaseIO, tm
3+
from pandas import (
4+
DataFrame,
5+
HDFStore,
6+
date_range,
7+
read_hdf,
8+
)
9+
10+
from ..pandas_vb_common import (
11+
BaseIO,
12+
tm,
13+
)
614

715

816
class HDFStoreDataFrame(BaseIO):

asv_bench/benchmarks/io/json.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,18 @@
22

33
import numpy as np
44

5-
from pandas import DataFrame, concat, date_range, read_json, timedelta_range
6-
7-
from ..pandas_vb_common import BaseIO, tm
5+
from pandas import (
6+
DataFrame,
7+
concat,
8+
date_range,
9+
read_json,
10+
timedelta_range,
11+
)
12+
13+
from ..pandas_vb_common import (
14+
BaseIO,
15+
tm,
16+
)
817

918

1019
class ReadJSON(BaseIO):

asv_bench/benchmarks/io/pickle.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
11
import numpy as np
22

3-
from pandas import DataFrame, date_range, read_pickle
4-
5-
from ..pandas_vb_common import BaseIO, tm
3+
from pandas import (
4+
DataFrame,
5+
date_range,
6+
read_pickle,
7+
)
8+
9+
from ..pandas_vb_common import (
10+
BaseIO,
11+
tm,
12+
)
613

714

815
class Pickle(BaseIO):

asv_bench/benchmarks/io/sql.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
import numpy as np
44
from sqlalchemy import create_engine
55

6-
from pandas import DataFrame, date_range, read_sql_query, read_sql_table
6+
from pandas import (
7+
DataFrame,
8+
date_range,
9+
read_sql_query,
10+
read_sql_table,
11+
)
712

813
from ..pandas_vb_common import tm
914

0 commit comments

Comments
 (0)