Skip to content

Commit 2e02d76

Browse files
committed
Merge remote-tracking branch 'pandas-dev/master' into bigquery-udf-resources
2 parents ec590af + e27b296 commit 2e02d76

File tree

185 files changed

+13011
-9660
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

185 files changed

+13011
-9660
lines changed

.travis.yml

Lines changed: 57 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,6 @@ matrix:
6666
apt:
6767
packages:
6868
- python-gtk2
69-
- python: 3.4
70-
env:
71-
- PYTHON_VERSION=3.4
72-
- JOB_NAME: "34_nslow"
73-
- NOSE_ARGS="not slow and not disabled"
74-
- FULL_DEPS=true
75-
- CLIPBOARD=xsel
76-
- CACHE_NAME="34_nslow"
77-
- USE_CACHE=true
78-
addons:
79-
apt:
80-
packages:
81-
- xsel
8269
- python: 3.5
8370
env:
8471
- PYTHON_VERSION=3.5
@@ -93,6 +80,32 @@ matrix:
9380
apt:
9481
packages:
9582
- xsel
83+
- python: 3.6
84+
env:
85+
- PYTHON_VERSION=3.6
86+
- JOB_NAME: "36"
87+
- NOSE_ARGS="not slow and not network and not disabled"
88+
- PANDAS_TESTING_MODE="deprecate"
89+
addons:
90+
apt:
91+
packages:
92+
- libatlas-base-dev
93+
- gfortran
94+
# In allow_failures
95+
- python: 2.7
96+
env:
97+
- PYTHON_VERSION=2.7
98+
- JOB_NAME: "27_nslow_nnet_COMPAT"
99+
- NOSE_ARGS="not slow and not network and not disabled"
100+
- LOCALE_OVERRIDE="it_IT.UTF-8"
101+
- INSTALL_TEST=true
102+
- JOB_TAG=_COMPAT
103+
- CACHE_NAME="27_nslow_nnet_COMPAT"
104+
- USE_CACHE=true
105+
addons:
106+
apt:
107+
packages:
108+
- language-pack-it
96109
# In allow_failures
97110
- python: 2.7
98111
env:
@@ -103,45 +116,46 @@ matrix:
103116
- FULL_DEPS=true
104117
- CACHE_NAME="27_slow"
105118
- USE_CACHE=true
119+
# In allow_failures
120+
- python: 2.7
121+
env:
122+
- PYTHON_VERSION=2.7
123+
- JOB_NAME: "27_build_test_conda"
124+
- JOB_TAG=_BUILD_TEST
125+
- NOSE_ARGS="not slow and not disabled"
126+
- FULL_DEPS=true
127+
- BUILD_TEST=true
128+
- CACHE_NAME="27_build_test_conda"
129+
- USE_CACHE=true
106130
# In allow_failures
107131
- python: 3.4
108132
env:
109133
- PYTHON_VERSION=3.4
110-
- JOB_NAME: "34_slow"
111-
- JOB_TAG=_SLOW
112-
- NOSE_ARGS="slow and not network and not disabled"
134+
- JOB_NAME: "34_nslow"
135+
- NOSE_ARGS="not slow and not disabled"
113136
- FULL_DEPS=true
114137
- CLIPBOARD=xsel
115-
- CACHE_NAME="34_slow"
138+
- CACHE_NAME="34_nslow"
116139
- USE_CACHE=true
117140
addons:
118141
apt:
119142
packages:
120143
- xsel
121144
# In allow_failures
122-
- python: 2.7
145+
- python: 3.4
123146
env:
124-
- PYTHON_VERSION=2.7
125-
- JOB_NAME: "27_build_test_conda"
126-
- JOB_TAG=_BUILD_TEST
127-
- NOSE_ARGS="not slow and not disabled"
147+
- PYTHON_VERSION=3.4
148+
- JOB_NAME: "34_slow"
149+
- JOB_TAG=_SLOW
150+
- NOSE_ARGS="slow and not network and not disabled"
128151
- FULL_DEPS=true
129-
- BUILD_TEST=true
130-
- CACHE_NAME="27_build_test_conda"
152+
- CLIPBOARD=xsel
153+
- CACHE_NAME="34_slow"
131154
- USE_CACHE=true
132-
# In allow_failures
133-
- python: 3.6-dev
134-
env:
135-
- PYTHON_VERSION=3.6
136-
- JOB_NAME: "36_dev"
137-
- JOB_TAG=_DEV
138-
- NOSE_ARGS="not slow and not network and not disabled"
139-
- PANDAS_TESTING_MODE="deprecate"
140155
addons:
141156
apt:
142157
packages:
143-
- libatlas-base-dev
144-
- gfortran
158+
- xsel
145159
# In allow_failures
146160
- python: 3.5
147161
env:
@@ -157,21 +171,6 @@ matrix:
157171
packages:
158172
- libatlas-base-dev
159173
- gfortran
160-
# In allow_failures
161-
- python: 2.7
162-
env:
163-
- PYTHON_VERSION=2.7
164-
- JOB_NAME: "27_nslow_nnet_COMPAT"
165-
- NOSE_ARGS="not slow and not network and not disabled"
166-
- LOCALE_OVERRIDE="it_IT.UTF-8"
167-
- INSTALL_TEST=true
168-
- JOB_TAG=_COMPAT
169-
- CACHE_NAME="27_nslow_nnet_COMPAT"
170-
- USE_CACHE=true
171-
addons:
172-
apt:
173-
packages:
174-
- language-pack-it
175174
# In allow_failures
176175
- python: 3.5
177176
env:
@@ -226,18 +225,19 @@ matrix:
226225
- BUILD_TEST=true
227226
- CACHE_NAME="27_build_test_conda"
228227
- USE_CACHE=true
229-
- python: 3.6-dev
228+
- python: 3.4
230229
env:
231-
- PYTHON_VERSION=3.6
232-
- JOB_NAME: "36_dev"
233-
- JOB_TAG=_DEV
234-
- NOSE_ARGS="not slow and not network and not disabled"
235-
- PANDAS_TESTING_MODE="deprecate"
230+
- PYTHON_VERSION=3.4
231+
- JOB_NAME: "34_nslow"
232+
- NOSE_ARGS="not slow and not disabled"
233+
- FULL_DEPS=true
234+
- CLIPBOARD=xsel
235+
- CACHE_NAME="34_nslow"
236+
- USE_CACHE=true
236237
addons:
237238
apt:
238239
packages:
239-
- libatlas-base-dev
240-
- gfortran
240+
- xsel
241241
- python: 3.5
242242
env:
243243
- PYTHON_VERSION=3.5

appveyor.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ install:
8080
- cmd: conda config --set ssl_verify false
8181

8282
# add the pandas channel *before* defaults to have defaults take priority
83+
- cmd: conda config --add channels conda-forge
8384
- cmd: conda config --add channels pandas
8485
- cmd: conda config --remove channels defaults
8586
- cmd: conda config --add channels defaults

asv_bench/benchmarks/algorithms.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ class Algorithms(object):
88

99
def setup(self):
1010
N = 100000
11+
np.random.seed(1234)
1112

1213
self.int_unique = pd.Int64Index(np.arange(N * 5))
1314
# cache is_unique
@@ -17,17 +18,24 @@ def setup(self):
1718
self.float = pd.Float64Index(np.random.randn(N).repeat(5))
1819

1920
# Convenience naming.
20-
self.checked_add = pd.core.nanops._checked_add_with_arr
21+
self.checked_add = pd.core.algorithms.checked_add_with_arr
2122

2223
self.arr = np.arange(1000000)
2324
self.arrpos = np.arange(1000000)
2425
self.arrneg = np.arange(-1000000, 0)
2526
self.arrmixed = np.array([1, -1]).repeat(500000)
27+
self.strings = tm.makeStringIndex(100000)
28+
29+
self.arr_nan = np.random.choice([True, False], size=1000000)
30+
self.arrmixed_nan = np.random.choice([True, False], size=1000000)
2631

2732
# match
2833
self.uniques = tm.makeStringIndex(1000).values
2934
self.all = self.uniques.repeat(10)
3035

36+
def time_factorize_string(self):
37+
self.strings.factorize()
38+
3139
def time_factorize_int(self):
3240
self.int.factorize()
3341

@@ -64,6 +72,16 @@ def time_add_overflow_neg_arr(self):
6472
def time_add_overflow_mixed_arr(self):
6573
self.checked_add(self.arr, self.arrmixed)
6674

75+
def time_add_overflow_first_arg_nan(self):
76+
self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan)
77+
78+
def time_add_overflow_second_arg_nan(self):
79+
self.checked_add(self.arr, self.arrmixed, b_mask=self.arrmixed_nan)
80+
81+
def time_add_overflow_both_arg_nan(self):
82+
self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan,
83+
b_mask=self.arrmixed_nan)
84+
6785

6886
class Hashing(object):
6987
goal_time = 0.2

asv_bench/benchmarks/frame_methods.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ class Iteration(object):
6868
def setup(self):
6969
self.df = DataFrame(randn(10000, 1000))
7070
self.df2 = DataFrame(np.random.randn(50000, 10))
71+
self.df3 = pd.DataFrame(np.random.randn(1000,5000),
72+
columns=['C'+str(c) for c in range(5000)])
7173

7274
def f(self):
7375
if hasattr(self.df, '_item_cache'):
@@ -85,6 +87,11 @@ def time_iteritems(self):
8587
def time_iteritems_cached(self):
8688
self.g()
8789

90+
def time_iteritems_indexing(self):
91+
df = self.df3
92+
for col in df:
93+
df[col]
94+
8895
def time_itertuples(self):
8996
for row in self.df2.itertuples():
9097
pass

asv_bench/benchmarks/gil.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,3 +379,38 @@ def pg_read_csv_datetime(self):
379379

380380
def time_read_csv_datetime(self):
381381
self.pg_read_csv_datetime()
382+
383+
384+
class nogil_factorize(object):
385+
number = 1
386+
repeat = 5
387+
388+
def setup(self):
389+
if (not have_real_test_parallel):
390+
raise NotImplementedError
391+
392+
np.random.seed(1234)
393+
self.strings = tm.makeStringIndex(100000)
394+
395+
def factorize_strings(self):
396+
pd.factorize(self.strings)
397+
398+
@test_parallel(num_threads=4)
399+
def _pg_factorize_strings_4(self):
400+
self.factorize_strings()
401+
402+
def time_factorize_strings_4(self):
403+
for i in range(2):
404+
self._pg_factorize_strings_4()
405+
406+
@test_parallel(num_threads=2)
407+
def _pg_factorize_strings_2(self):
408+
self.factorize_strings()
409+
410+
def time_factorize_strings_2(self):
411+
for i in range(4):
412+
self._pg_factorize_strings_2()
413+
414+
def time_factorize_strings(self):
415+
for i in range(8):
416+
self.factorize_strings()

asv_bench/benchmarks/io_bench.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def setup(self, compression, engine):
153153
# The Python 2 C parser can't read bz2 from open files.
154154
raise NotImplementedError
155155
try:
156-
import boto
156+
import s3fs
157157
except ImportError:
158158
# Skip these benchmarks if `boto` is not installed.
159159
raise NotImplementedError

asv_bench/benchmarks/join_merge.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,12 +302,19 @@ def setup(self):
302302
self.df1 = self.df1.sort_values('time')
303303
self.df2 = self.df2.sort_values('time')
304304

305+
self.df1['time32'] = np.int32(self.df1.time)
306+
self.df2['time32'] = np.int32(self.df2.time)
307+
305308
self.df1a = self.df1[['time', 'value1']]
306309
self.df2a = self.df2[['time', 'value2']]
307310
self.df1b = self.df1[['time', 'key', 'value1']]
308311
self.df2b = self.df2[['time', 'key', 'value2']]
309312
self.df1c = self.df1[['time', 'key2', 'value1']]
310313
self.df2c = self.df2[['time', 'key2', 'value2']]
314+
self.df1d = self.df1[['time32', 'value1']]
315+
self.df2d = self.df2[['time32', 'value2']]
316+
self.df1e = self.df1[['time', 'key', 'key2', 'value1']]
317+
self.df2e = self.df2[['time', 'key', 'key2', 'value2']]
311318

312319
def time_noby(self):
313320
merge_asof(self.df1a, self.df2a, on='time')
@@ -318,6 +325,12 @@ def time_by_object(self):
318325
def time_by_int(self):
319326
merge_asof(self.df1c, self.df2c, on='time', by='key2')
320327

328+
def time_on_int32(self):
329+
merge_asof(self.df1d, self.df2d, on='time32')
330+
331+
def time_multiby(self):
332+
merge_asof(self.df1e, self.df2e, on='time', by=['key', 'key2'])
333+
321334

322335
#----------------------------------------------------------------------
323336
# data alignment

asv_bench/benchmarks/period.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,28 @@ def time_value_counts_pindex(self):
4949
self.i.value_counts()
5050

5151

52+
class period_standard_indexing(object):
53+
goal_time = 0.2
54+
55+
def setup(self):
56+
self.index = PeriodIndex(start='1985', periods=1000, freq='D')
57+
self.series = Series(range(1000), index=self.index)
58+
self.period = self.index[500]
59+
60+
def time_get_loc(self):
61+
self.index.get_loc(self.period)
62+
63+
def time_shape(self):
64+
self.index.shape
65+
66+
def time_shallow_copy(self):
67+
self.index._shallow_copy()
68+
69+
def time_series_loc(self):
70+
self.series.loc[self.period]
71+
72+
def time_align(self):
73+
pd.DataFrame({'a': self.series, 'b': self.series[:500]})
74+
75+
def time_intersection(self):
76+
self.index[:750].intersection(self.index[250:])

asv_bench/benchmarks/reshape.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .pandas_vb_common import *
2-
from pandas.core.reshape import melt
2+
from pandas.core.reshape import melt, wide_to_long
33

44

55
class melt_dataframe(object):
@@ -74,3 +74,25 @@ def setup(self):
7474

7575
def time_unstack_sparse_keyspace(self):
7676
self.idf.unstack()
77+
78+
79+
class wide_to_long_big(object):
80+
goal_time = 0.2
81+
82+
def setup(self):
83+
vars = 'ABCD'
84+
nyrs = 20
85+
nidvars = 20
86+
N = 5000
87+
yrvars = []
88+
for var in vars:
89+
for yr in range(1, nyrs + 1):
90+
yrvars.append(var + str(yr))
91+
92+
self.df = pd.DataFrame(np.random.randn(N, nidvars + len(yrvars)),
93+
columns=list(range(nidvars)) + yrvars)
94+
self.vars = vars
95+
96+
def time_wide_to_long_big(self):
97+
self.df['id'] = self.df.index
98+
wide_to_long(self.df, list(self.vars), i='id', j='year')

0 commit comments

Comments
 (0)