Skip to content

Commit 6e3aac8

Browse files
Merge remote-tracking branch 'upstream/master' into factorize
2 parents c545970 + 9f792cd commit 6e3aac8

File tree

286 files changed

+7181
-5178
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

286 files changed

+7181
-5178
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
name: Build pandas
2+
description: Rebuilds the C extensions and installs pandas
3+
runs:
4+
using: composite
5+
steps:
6+
7+
- name: Environment Detail
8+
run: |
9+
conda info
10+
conda list
11+
shell: bash -l {0}
12+
13+
- name: Build Pandas
14+
run: |
15+
python setup.py build_ext -j 2
16+
python -m pip install -e . --no-build-isolation --no-use-pep517
17+
shell: bash -l {0}

.github/actions/setup/action.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
name: Set up pandas
2+
description: Runs all the setup steps required to have a built pandas ready to use
3+
runs:
4+
using: composite
5+
steps:
6+
- name: Setting conda path
7+
run: echo "${HOME}/miniconda3/bin" >> $GITHUB_PATH
8+
shell: bash -l {0}
9+
10+
- name: Setup environment and build pandas
11+
run: ci/setup_env.sh
12+
shell: bash -l {0}

.github/workflows/ci.yml

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,8 @@ jobs:
4141
environment-file: ${{ env.ENV_FILE }}
4242
use-only-tar-bz2: true
4343

44-
- name: Environment Detail
45-
run: |
46-
conda info
47-
conda list
48-
4944
- name: Build Pandas
50-
run: |
51-
python setup.py build_ext -j 2
52-
python -m pip install -e . --no-build-isolation --no-use-pep517
45+
uses: ./.github/actions/build_pandas
5346

5447
- name: Linting
5548
run: ci/code_checks.sh lint
@@ -100,14 +93,11 @@ jobs:
10093
runs-on: ubuntu-latest
10194
steps:
10295

103-
- name: Setting conda path
104-
run: echo "${HOME}/miniconda3/bin" >> $GITHUB_PATH
105-
10696
- name: Checkout
10797
uses: actions/checkout@v1
10898

109-
- name: Setup environment and build pandas
110-
run: ci/setup_env.sh
99+
- name: Set up pandas
100+
uses: ./.github/actions/setup
111101

112102
- name: Build website
113103
run: |
@@ -139,19 +129,25 @@ jobs:
139129
run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/dev
140130
if: github.event_name == 'push'
141131

132+
- name: Move docs into site directory
133+
run: mv doc/build/html web/build/docs
134+
- name: Save website as an artifact
135+
uses: actions/upload-artifact@v2
136+
with:
137+
name: website
138+
path: web/build
139+
retention-days: 14
140+
142141
data_manager:
143142
name: Test experimental data manager
144143
runs-on: ubuntu-latest
145144
steps:
146145

147-
- name: Setting conda path
148-
run: echo "${HOME}/miniconda3/bin" >> $GITHUB_PATH
149-
150146
- name: Checkout
151147
uses: actions/checkout@v1
152148

153-
- name: Setup environment and build pandas
154-
run: ci/setup_env.sh
149+
- name: Set up pandas
150+
uses: ./.github/actions/setup
155151

156152
- name: Run tests
157153
run: |

.github/workflows/comment_bot.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
- name: Install-pre-commit
3030
run: python -m pip install --upgrade pre-commit
3131
- name: Run pre-commit
32-
run: pre-commit run --all-files || (exit 0)
32+
run: pre-commit run --from-ref=origin/master --to-ref=HEAD --all-files || (exit 0)
3333
- name: Commit results
3434
run: |
3535
git config user.name "$(git log -1 --pretty=format:%an)"

.github/workflows/database.yml

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,8 @@ jobs:
7272
environment-file: ${{ env.ENV_FILE }}
7373
use-only-tar-bz2: true
7474

75-
- name: Environment Detail
76-
run: |
77-
conda info
78-
conda list
79-
8075
- name: Build Pandas
81-
run: |
82-
python setup.py build_ext -j 2
83-
python -m pip install -e . --no-build-isolation --no-use-pep517
76+
uses: ./.github/actions/build_pandas
8477

8578
- name: Test
8679
run: ci/run_tests.sh
@@ -158,15 +151,8 @@ jobs:
158151
environment-file: ${{ env.ENV_FILE }}
159152
use-only-tar-bz2: true
160153

161-
- name: Environment Detail
162-
run: |
163-
conda info
164-
conda list
165-
166154
- name: Build Pandas
167-
run: |
168-
python setup.py build_ext -j 2
169-
python -m pip install -e . --no-build-isolation --no-use-pep517
155+
uses: ./.github/actions/build_pandas
170156

171157
- name: Test
172158
run: ci/run_tests.sh

.pre-commit-config.yaml

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ repos:
2424
hooks:
2525
- id: isort
2626
- repo: https://github.com/asottile/pyupgrade
27-
rev: v2.7.4
27+
rev: v2.10.0
2828
hooks:
2929
- id: pyupgrade
30-
args: [--py37-plus]
30+
args: [--py37-plus, --keep-runtime-typing]
3131
- repo: https://github.com/pre-commit/pygrep-hooks
32-
rev: v1.7.0
32+
rev: v1.7.1
3333
hooks:
3434
- id: rst-backticks
3535
- id: rst-directive-colons
@@ -127,6 +127,12 @@ repos:
127127
types: [python]
128128
files: ^pandas/tests/
129129
exclude: ^pandas/tests/extension/
130+
- id: unwanted-patters-pytest-xfail
131+
name: Check for use of pytest.xfail
132+
entry: pytest\.xfail
133+
language: pygrep
134+
types: [python]
135+
files: ^pandas/tests/
130136
- id: inconsistent-namespace-usage
131137
name: 'Check for inconsistent use of pandas namespace in tests'
132138
entry: python scripts/check_for_inconsistent_pandas_namespace.py
@@ -192,6 +198,11 @@ repos:
192198
files: ^pandas/
193199
exclude: ^pandas/tests/
194200
- repo: https://github.com/MarcoGorelli/no-string-hints
195-
rev: v0.1.6
201+
rev: v0.1.7
196202
hooks:
197203
- id: no-string-hints
204+
- repo: https://github.com/MarcoGorelli/abs-imports
205+
rev: v0.1.2
206+
hooks:
207+
- id: abs-imports
208+
files: ^pandas/

asv_bench/benchmarks/categoricals.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,29 @@ def setup(self):
118118
self.a = pd.Categorical(list("aabbcd") * N)
119119
self.b = pd.Categorical(list("bbcdjk") * N)
120120

121+
self.idx_a = pd.CategoricalIndex(range(N), range(N))
122+
self.idx_b = pd.CategoricalIndex(range(N + 1), range(N + 1))
123+
self.df_a = pd.DataFrame(range(N), columns=["a"], index=self.idx_a)
124+
self.df_b = pd.DataFrame(range(N + 1), columns=["a"], index=self.idx_b)
125+
121126
def time_concat(self):
122127
pd.concat([self.s, self.s])
123128

124129
def time_union(self):
125130
union_categoricals([self.a, self.b])
126131

132+
def time_append_overlapping_index(self):
133+
self.idx_a.append(self.idx_a)
134+
135+
def time_append_non_overlapping_index(self):
136+
self.idx_a.append(self.idx_b)
137+
138+
def time_concat_overlapping_index(self):
139+
pd.concat([self.df_a, self.df_a])
140+
141+
def time_concat_non_overlapping_index(self):
142+
pd.concat([self.df_a, self.df_b])
143+
127144

128145
class ValueCounts:
129146

@@ -306,7 +323,7 @@ def time_get_loc(self):
306323
self.index.get_loc(self.category)
307324

308325
def time_shallow_copy(self):
309-
self.index._shallow_copy()
326+
self.index._view()
310327

311328
def time_align(self):
312329
pd.DataFrame({"a": self.series, "b": self.series[:500]})

asv_bench/benchmarks/dtypes.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
import numpy as np
44

5+
import pandas as pd
56
from pandas import DataFrame
67
import pandas._testing as tm
7-
from pandas.api.types import pandas_dtype
8+
from pandas.api.types import is_extension_array_dtype, pandas_dtype
89

910
from .pandas_vb_common import (
1011
datetime_dtypes,
@@ -119,4 +120,16 @@ def time_select_dtype_string_exclude(self, dtype):
119120
self.df_string.select_dtypes(exclude=dtype)
120121

121122

123+
class CheckDtypes:
124+
def setup(self):
125+
self.ext_dtype = pd.Int64Dtype()
126+
self.np_dtype = np.dtype("int64")
127+
128+
def time_is_extension_array_dtype_true(self):
129+
is_extension_array_dtype(self.ext_dtype)
130+
131+
def time_is_extension_array_dtype_false(self):
132+
is_extension_array_dtype(self.np_dtype)
133+
134+
122135
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/gil.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22

33
from pandas import DataFrame, Series, date_range, factorize, read_csv
4-
from pandas.core.algorithms import take_1d
4+
from pandas.core.algorithms import take_nd
55

66
from .pandas_vb_common import tm
77

@@ -110,7 +110,7 @@ def setup(self, dtype):
110110

111111
@test_parallel(num_threads=2)
112112
def parallel_take1d():
113-
take_1d(df["col"].values, indexer)
113+
take_nd(df["col"].values, indexer)
114114

115115
self.parallel_take1d = parallel_take1d
116116

asv_bench/benchmarks/hash_functions.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,15 @@ def time_isin_outside(self, dtype, exponent):
2525
self.s.isin(self.values_outside)
2626

2727

28+
class UniqueForLargePyObjectInts:
29+
def setup(self):
30+
lst = [x << 32 for x in range(5000)]
31+
self.arr = np.array(lst, dtype=np.object_)
32+
33+
def time_unique(self):
34+
pd.unique(self.arr)
35+
36+
2837
class IsinWithRandomFloat:
2938
params = [
3039
[np.float64, np.object],

asv_bench/benchmarks/io/csv.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ class ToCSVIndexes(BaseIO):
8484
def _create_df(rows, cols):
8585
index_cols = {
8686
"index1": np.random.randint(0, rows, rows),
87-
"index2": np.full(rows, 1, dtype=np.int),
88-
"index3": np.full(rows, 1, dtype=np.int),
87+
"index2": np.full(rows, 1, dtype=int),
88+
"index3": np.full(rows, 1, dtype=int),
8989
}
9090
data_cols = {
9191
f"col{i}": np.random.uniform(0, 100000.0, rows) for i in range(cols)

asv_bench/benchmarks/period.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def time_get_loc(self):
8686
self.index.get_loc(self.period)
8787

8888
def time_shallow_copy(self):
89-
self.index._shallow_copy()
89+
self.index._view()
9090

9191
def time_series_loc(self):
9292
self.series.loc[self.period]

asv_bench/benchmarks/rolling.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,11 @@ class Pairwise:
140140

141141
def setup(self, window, method, pairwise):
142142
N = 10 ** 4
143+
n_groups = 20
144+
groups = [i for _ in range(N // n_groups) for i in range(n_groups)]
143145
arr = np.random.random(N)
144146
self.df = pd.DataFrame(arr)
147+
self.df_group = pd.DataFrame({"A": groups, "B": arr}).groupby("A")
145148

146149
def time_pairwise(self, window, method, pairwise):
147150
if window is None:
@@ -150,6 +153,13 @@ def time_pairwise(self, window, method, pairwise):
150153
r = self.df.rolling(window=window)
151154
getattr(r, method)(self.df, pairwise=pairwise)
152155

156+
def time_groupby(self, window, method, pairwise):
157+
if window is None:
158+
r = self.df_group.expanding()
159+
else:
160+
r = self.df_group.rolling(window=window)
161+
getattr(r, method)(self.df, pairwise=pairwise)
162+
153163

154164
class Quantile:
155165
params = (
@@ -245,6 +255,19 @@ def time_rolling_multiindex_creation(self):
245255

246256
class GroupbyEWM:
247257

258+
params = ["var", "std", "cov", "corr"]
259+
param_names = ["method"]
260+
261+
def setup(self, method):
262+
df = pd.DataFrame({"A": range(50), "B": range(50)})
263+
self.gb_ewm = df.groupby("A").ewm(com=1.0)
264+
265+
def time_groupby_method(self, method):
266+
getattr(self.gb_ewm, method)()
267+
268+
269+
class GroupbyEWMEngine:
270+
248271
params = ["cython", "numba"]
249272
param_names = ["engine"]
250273

asv_bench/benchmarks/series_methods.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ def setup(self):
108108
self.vals_short = np.arange(2).astype(object)
109109
self.vals_long = np.arange(10 ** 5).astype(object)
110110
# because of nans floats are special:
111-
self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float)).astype(object)
112-
self.vals_long_floats = np.arange(10 ** 5, dtype=np.float).astype(object)
111+
self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float_)).astype(object)
112+
self.vals_long_floats = np.arange(10 ** 5, dtype=np.float_).astype(object)
113113

114114
def time_isin_nans(self):
115115
# if nan-objects are different objects,

asv_bench/benchmarks/timedelta.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def time_get_loc(self):
7474
self.index.get_loc(self.timedelta)
7575

7676
def time_shallow_copy(self):
77-
self.index._shallow_copy()
77+
self.index._view()
7878

7979
def time_series_loc(self):
8080
self.series.loc[self.timedelta]

ci/deps/actions-37-locale.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dependencies:
1111
- hypothesis>=3.58.0
1212

1313
# required
14-
- numpy
14+
- numpy<1.20 # GH#39541 compat for pyarrow<3
1515
- python-dateutil
1616
- pytz
1717

ci/deps/azure-37.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies:
1818
- numpy
1919
- python-dateutil
2020
- nomkl
21-
- pyarrow
21+
- pyarrow=0.15.1
2222
- pytz
2323
- s3fs>=0.4.0
2424
- moto>=1.3.14

ci/deps/azure-38-locale.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ dependencies:
2424
- moto
2525
- nomkl
2626
- numexpr
27-
- numpy
27+
- numpy<1.20 # GH#39541 compat with pyarrow<3
2828
- openpyxl
2929
- pytables
3030
- python-dateutil

0 commit comments

Comments
 (0)