Skip to content

Commit 74b3471

Browse files
committed
Merge remote-tracking branch 'upstream/master' into docfix-multiindex-set_levels
2 parents a1f80ee + c414848 commit 74b3471

File tree

299 files changed

+4846
-5654
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

299 files changed

+4846
-5654
lines changed

.github/workflows/ci.yml

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ jobs:
1515
runs-on: ubuntu-latest
1616
steps:
1717

18+
- name: Setting conda path
19+
run: echo "::add-path::${HOME}/miniconda3/bin"
20+
1821
- name: Checkout
1922
uses: actions/checkout@v1
2023

21-
- name: Setting conda path
22-
run: echo "::set-env name=PATH::${HOME}/miniconda3/bin:${PATH}"
23-
2424
- name: Looking for unwanted patterns
2525
run: ci/code_checks.sh patterns
2626
if: true
@@ -80,15 +80,18 @@ jobs:
8080
git fetch upstream
8181
if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
8282
asv machine --yes
83-
ASV_OUTPUT="$(asv dev)"
84-
if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then
85-
echo "##vso[task.logissue type=error]Benchmarks run with errors"
86-
echo "$ASV_OUTPUT"
83+
asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
84+
if grep "failed" benchmarks.log > /dev/null ; then
8785
exit 1
88-
else
89-
echo "Benchmarks run without errors"
9086
fi
9187
else
9288
echo "Benchmarks did not run, no changes detected"
9389
fi
9490
if: true
91+
92+
- name: Publish benchmarks artifact
93+
uses: actions/upload-artifact@master
94+
with:
95+
name: Benchmarks log
96+
path: asv_bench/benchmarks.log
97+
if: failure()

asv_bench/benchmarks/frame_methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ def setup(self):
565565

566566
def time_frame_get_dtype_counts(self):
567567
with warnings.catch_warnings(record=True):
568-
self.df.get_dtype_counts()
568+
self.df._data.get_dtype_counts()
569569

570570
def time_info(self):
571571
self.df.info()

asv_bench/benchmarks/index_object.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Float64Index,
88
Index,
99
IntervalIndex,
10+
MultiIndex,
1011
RangeIndex,
1112
Series,
1213
date_range,
@@ -111,6 +112,18 @@ def time_get_loc_dec(self):
111112
self.idx_dec.get_loc(100000)
112113

113114

115+
class IndexEquals:
116+
def setup(self):
117+
idx_large_fast = RangeIndex(100000)
118+
idx_small_slow = date_range(start="1/1/2012", periods=1)
119+
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
120+
121+
self.idx_non_object = RangeIndex(1)
122+
123+
def time_non_object_equals_multiindex(self):
124+
self.idx_non_object.equals(self.mi_large_slow)
125+
126+
114127
class IndexAppend:
115128
def setup(self):
116129

asv_bench/benchmarks/multiindex_object.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas import DataFrame, MultiIndex, date_range
5+
from pandas import DataFrame, MultiIndex, RangeIndex, date_range
66
import pandas.util.testing as tm
77

88

@@ -147,4 +147,16 @@ def time_categorical_level(self):
147147
self.df.set_index(["a", "b"])
148148

149149

150+
class Equals:
151+
def setup(self):
152+
idx_large_fast = RangeIndex(100000)
153+
idx_small_slow = date_range(start="1/1/2012", periods=1)
154+
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
155+
156+
self.idx_non_object = RangeIndex(1)
157+
158+
def time_equals_non_object_index(self):
159+
self.mi_large_slow.equals(self.idx_non_object)
160+
161+
150162
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/stat_ops.py

Lines changed: 28 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,14 @@
77

88
class FrameOps:
99

10-
params = [ops, ["float", "int"], [0, 1], [True, False]]
11-
param_names = ["op", "dtype", "axis", "use_bottleneck"]
10+
params = [ops, ["float", "int"], [0, 1]]
11+
param_names = ["op", "dtype", "axis"]
1212

13-
def setup(self, op, dtype, axis, use_bottleneck):
13+
def setup(self, op, dtype, axis):
1414
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
15-
try:
16-
pd.options.compute.use_bottleneck = use_bottleneck
17-
except TypeError:
18-
from pandas.core import nanops
19-
20-
nanops._USE_BOTTLENECK = use_bottleneck
2115
self.df_func = getattr(df, op)
2216

23-
def time_op(self, op, dtype, axis, use_bottleneck):
17+
def time_op(self, op, dtype, axis):
2418
self.df_func(axis=axis)
2519

2620

@@ -46,20 +40,14 @@ def time_op(self, level, op):
4640

4741
class SeriesOps:
4842

49-
params = [ops, ["float", "int"], [True, False]]
50-
param_names = ["op", "dtype", "use_bottleneck"]
43+
params = [ops, ["float", "int"]]
44+
param_names = ["op", "dtype"]
5145

52-
def setup(self, op, dtype, use_bottleneck):
46+
def setup(self, op, dtype):
5347
s = pd.Series(np.random.randn(100000)).astype(dtype)
54-
try:
55-
pd.options.compute.use_bottleneck = use_bottleneck
56-
except TypeError:
57-
from pandas.core import nanops
58-
59-
nanops._USE_BOTTLENECK = use_bottleneck
6048
self.s_func = getattr(s, op)
6149

62-
def time_op(self, op, dtype, use_bottleneck):
50+
def time_op(self, op, dtype):
6351
self.s_func()
6452

6553

@@ -101,61 +89,49 @@ def time_average_old(self, constructor, pct):
10189

10290
class Correlation:
10391

104-
params = [["spearman", "kendall", "pearson"], [True, False]]
105-
param_names = ["method", "use_bottleneck"]
92+
params = [["spearman", "kendall", "pearson"]]
93+
param_names = ["method"]
10694

107-
def setup(self, method, use_bottleneck):
108-
try:
109-
pd.options.compute.use_bottleneck = use_bottleneck
110-
except TypeError:
111-
from pandas.core import nanops
95+
def setup(self, method):
96+
self.df = pd.DataFrame(np.random.randn(500, 15))
97+
self.df2 = pd.DataFrame(np.random.randn(500, 15))
98+
self.df_wide = pd.DataFrame(np.random.randn(500, 100))
99+
self.df_wide_nans = self.df_wide.where(np.random.random((500, 100)) < 0.9)
100+
self.s = pd.Series(np.random.randn(500))
101+
self.s2 = pd.Series(np.random.randn(500))
112102

113-
nanops._USE_BOTTLENECK = use_bottleneck
114-
self.df = pd.DataFrame(np.random.randn(1000, 30))
115-
self.df2 = pd.DataFrame(np.random.randn(1000, 30))
116-
self.df_wide = pd.DataFrame(np.random.randn(1000, 200))
117-
self.df_wide_nans = self.df_wide.where(np.random.random((1000, 200)) < 0.9)
118-
self.s = pd.Series(np.random.randn(1000))
119-
self.s2 = pd.Series(np.random.randn(1000))
120-
121-
def time_corr(self, method, use_bottleneck):
103+
def time_corr(self, method):
122104
self.df.corr(method=method)
123105

124-
def time_corr_wide(self, method, use_bottleneck):
106+
def time_corr_wide(self, method):
125107
self.df_wide.corr(method=method)
126108

127-
def time_corr_wide_nans(self, method, use_bottleneck):
109+
def time_corr_wide_nans(self, method):
128110
self.df_wide_nans.corr(method=method)
129111

130-
def peakmem_corr_wide(self, method, use_bottleneck):
112+
def peakmem_corr_wide(self, method):
131113
self.df_wide.corr(method=method)
132114

133-
def time_corr_series(self, method, use_bottleneck):
115+
def time_corr_series(self, method):
134116
self.s.corr(self.s2, method=method)
135117

136-
def time_corrwith_cols(self, method, use_bottleneck):
118+
def time_corrwith_cols(self, method):
137119
self.df.corrwith(self.df2, method=method)
138120

139-
def time_corrwith_rows(self, method, use_bottleneck):
121+
def time_corrwith_rows(self, method):
140122
self.df.corrwith(self.df2, axis=1, method=method)
141123

142124

143125
class Covariance:
144126

145-
params = [[True, False]]
146-
param_names = ["use_bottleneck"]
147-
148-
def setup(self, use_bottleneck):
149-
try:
150-
pd.options.compute.use_bottleneck = use_bottleneck
151-
except TypeError:
152-
from pandas.core import nanops
127+
params = []
128+
param_names = []
153129

154-
nanops._USE_BOTTLENECK = use_bottleneck
130+
def setup(self):
155131
self.s = pd.Series(np.random.randn(100000))
156132
self.s2 = pd.Series(np.random.randn(100000))
157133

158-
def time_cov_series(self, use_bottleneck):
134+
def time_cov_series(self):
159135
self.s.cov(self.s2)
160136

161137

ci/azure/posix.yml

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ jobs:
2020
CONDA_PY: "36"
2121
PATTERN: "not slow and not network"
2222
py36_locale_slow_old_np:
23-
ENV_FILE: ci/deps/azure-36-locale.yaml
23+
ENV_FILE: ci/deps/azure-36-locale_slow.yaml
2424
CONDA_PY: "36"
2525
PATTERN: "slow"
2626
LOCALE_OVERRIDE: "zh_CN.UTF-8"
2727
EXTRA_APT: "language-pack-zh-hans"
2828

29-
py36_locale_slow:
30-
ENV_FILE: ci/deps/azure-36-locale_slow.yaml
29+
py36_locale:
30+
ENV_FILE: ci/deps/azure-36-locale.yaml
3131
CONDA_PY: "36"
3232
PATTERN: "not slow and not network"
3333
LOCALE_OVERRIDE: "it_IT.UTF-8"
@@ -44,13 +44,15 @@ jobs:
4444
PATTERN: "not slow and not network"
4545
LOCALE_OVERRIDE: "zh_CN.UTF-8"
4646

47-
py37_np_dev:
48-
ENV_FILE: ci/deps/azure-37-numpydev.yaml
49-
CONDA_PY: "37"
50-
PATTERN: "not slow and not network"
51-
TEST_ARGS: "-W error"
52-
PANDAS_TESTING_MODE: "deprecate"
53-
EXTRA_APT: "xsel"
47+
# Disabled for NumPy object-dtype warning.
48+
# https://github.com/pandas-dev/pandas/issues/30043
49+
# py37_np_dev:
50+
# ENV_FILE: ci/deps/azure-37-numpydev.yaml
51+
# CONDA_PY: "37"
52+
# PATTERN: "not slow and not network"
53+
# TEST_ARGS: "-W error"
54+
# PANDAS_TESTING_MODE: "deprecate"
55+
# EXTRA_APT: "xsel"
5456

5557
steps:
5658
- script: |

ci/code_checks.sh

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,13 @@ function invgrep {
3434
#
3535
# This is useful for the CI, as we want to fail if one of the patterns
3636
# that we want to avoid is found by grep.
37-
if [[ "$AZURE" == "true" ]]; then
38-
set -o pipefail
39-
grep -n "$@" | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Found unwanted pattern: " $3}'
40-
else
41-
grep "$@"
42-
fi
43-
return $((! $?))
37+
grep -n "$@" | sed "s/^/$INVGREP_PREPEND/" | sed "s/$/$INVGREP_APPEND/" ; EXIT_STATUS=${PIPESTATUS[0]}
38+
return $((! $EXIT_STATUS))
4439
}
4540

46-
if [[ "$AZURE" == "true" ]]; then
47-
FLAKE8_FORMAT="##vso[task.logissue type=error;sourcepath=%(path)s;linenumber=%(row)s;columnnumber=%(col)s;code=%(code)s;]%(text)s"
41+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
42+
FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code):%(text)s"
43+
INVGREP_PREPEND="##[error]"
4844
else
4945
FLAKE8_FORMAT="default"
5046
fi
@@ -109,7 +105,12 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
109105

110106
# Imports - Check formatting using isort see setup.cfg for settings
111107
MSG='Check import format using isort ' ; echo $MSG
112-
isort --recursive --check-only pandas asv_bench
108+
ISORT_CMD="isort --recursive --check-only pandas asv_bench"
109+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
110+
eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
111+
else
112+
eval $ISORT_CMD
113+
fi
113114
RET=$(($RET + $?)) ; echo $MSG "DONE"
114115

115116
fi
@@ -198,15 +199,15 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
198199
invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
199200
RET=$(($RET + $?)) ; echo $MSG "DONE"
200201

202+
MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
203+
invgrep -R --include=*.{py,pyx} '\.__class__' pandas
204+
RET=$(($RET + $?)) ; echo $MSG "DONE"
205+
201206
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
202-
set -o pipefail
203-
if [[ "$AZURE" == "true" ]]; then
204-
# we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files
205-
! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
206-
else
207-
! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
208-
fi
207+
INVGREP_APPEND=" <- trailing whitespaces found"
208+
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
209209
RET=$(($RET + $?)) ; echo $MSG "DONE"
210+
unset INVGREP_APPEND
210211
fi
211212

212213
### CODE ###

ci/deps/azure-36-locale.yaml

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,27 @@ dependencies:
1313
- pytest-azurepipelines
1414

1515
# pandas dependencies
16-
- beautifulsoup4==4.6.0
17-
- bottleneck=1.2.*
16+
- beautifulsoup4
17+
- gcsfs
18+
- html5lib
19+
- ipython
20+
- jinja2
1821
- lxml
19-
- matplotlib=2.2.2
20-
- numpy=1.14.*
21-
- openpyxl=2.4.8
22+
- matplotlib=3.0.*
23+
- nomkl
24+
- numexpr
25+
- numpy=1.15.*
26+
- openpyxl
27+
# lowest supported version of pyarrow (putting it here instead of in
28+
# azure-36-minimum_versions because it needs numpy >= 1.14)
29+
- pyarrow=0.12
30+
- pytables
2231
- python-dateutil
23-
- python-blosc
24-
- pytz=2017.2
32+
- pytz
33+
- s3fs
2534
- scipy
26-
- sqlalchemy=1.1.4
27-
- xlrd=1.1.0
28-
- xlsxwriter=0.9.8
29-
- xlwt=1.2.0
30-
- pip
31-
- pip:
32-
- html5lib==1.0b2
35+
- xarray
36+
- xlrd
37+
- xlsxwriter
38+
- xlwt
39+
- moto

0 commit comments

Comments
 (0)