Skip to content

Commit c21588a

Browse files
committed
Merge remote-tracking branch 'pandas-dev/master' into bigquery-udf-resources
2 parents 146f0f3 + 0c82abe commit c21588a

File tree

135 files changed

+5365
-27339
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

135 files changed

+5365
-27339
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
*.class
2828
*.dll
2929
*.exe
30+
*.pxi
3031
*.o
3132
*.py[ocd]
3233
*.so

.travis.yml

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,9 @@ cache:
1414

1515
env:
1616
global:
17-
# scatterci API key
18-
#- secure: "Bx5umgo6WjuGY+5XFa004xjCiX/vq0CyMZ/ETzcs7EIBI1BE/0fIDXOoWhoxbY9HPfdPGlDnDgB9nGqr5wArO2s+BavyKBWg6osZ3dmkfuJPMOWeyCa92EeP+sfKw8e5HSU5MizW9e319wHWOF/xkzdHR7T67Qd5erhv91x4DnQ="
19-
# ironcache API key
20-
#- secure: "e4eEFn9nDQc3Xa5BWYkzfX37jaWVq89XidVX+rcCNEr5OlOImvveeXnF1IzbRXznH4Sv0YsLwUd8RGUWOmyCvkONq/VJeqCHWtTMyfaCIdqSyhIP9Odz8r9ahch+Y0XFepBey92AJHmlnTh+2GjCDgIiqq4fzglojnp56Vg1ojA="
21-
#- secure: "CjmYmY5qEu3KrvMtel6zWFEtMq8ORBeS1S1odJHnjQpbwT1KY2YFZRVlLphfyDQXSz6svKUdeRrCNp65baBzs3DQNA8lIuXGIBYFeJxqVGtYAZZs6+TzBPfJJK798sGOj5RshrOJkFG2rdlWNuTq/XphI0JOrN3nPUkRrdQRpAw="
22-
# pandas-docs-bot GH
23-
- secure: "PCzUFR8CHmw9lH84p4ygnojdF7Z8U5h7YfY0RyT+5K/aiQ1ZTU3ZkDTPI0/rR5FVMxsEEKEQKMcc5fvqW0PeD7Q2wRmluloKgT9w4EVEJ1ppKf7lITPcvZR2QgVOvjv4AfDtibLHFNiaSjzoqyJVjM4igjOu8WTlF3JfZcmOQjQ="
17+
18+
# pandas-docs-travis GH
19+
- secure: "YvvTc+FrSYHgdxqoxn9s8VOaCWjvZzlkaf6k55kkmQqCYR9dPiLMsot1F96/N7o3YlD1s0znPQCak93Du8HHi/8809zAXloTaMSZrWz4R4qn96xlZFRE88O/w/Z1t3VVYpKX3MHlCggBc8MtXrqmvWKJMAqXyysZ4TTzoiJDPvE="
2420

2521
git:
2622
# for cloning

asv_bench/benchmarks/algorithms.py

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import numpy as np
22
import pandas as pd
3+
from pandas.util import testing as tm
34

45

5-
class algorithm(object):
6+
class Algorithms(object):
67
goal_time = 0.2
78

89
def setup(self):
@@ -23,21 +24,28 @@ def setup(self):
2324
self.arrneg = np.arange(-1000000, 0)
2425
self.arrmixed = np.array([1, -1]).repeat(500000)
2526

26-
def time_int_factorize(self):
27+
# match
28+
self.uniques = tm.makeStringIndex(1000).values
29+
self.all = self.uniques.repeat(10)
30+
31+
def time_factorize_int(self):
2732
self.int.factorize()
2833

29-
def time_float_factorize(self):
34+
def time_factorize_float(self):
3035
self.int.factorize()
3136

32-
def time_int_unique_duplicated(self):
37+
def time_duplicated_int_unique(self):
3338
self.int_unique.duplicated()
3439

35-
def time_int_duplicated(self):
40+
def time_duplicated_int(self):
3641
self.int.duplicated()
3742

38-
def time_float_duplicated(self):
43+
def time_duplicated_float(self):
3944
self.float.duplicated()
4045

46+
def time_match_strings(self):
47+
pd.match(self.all, self.uniques)
48+
4149
def time_add_overflow_pos_scalar(self):
4250
self.checked_add(self.arr, 1)
4351

@@ -55,3 +63,35 @@ def time_add_overflow_neg_arr(self):
5563

5664
def time_add_overflow_mixed_arr(self):
5765
self.checked_add(self.arr, self.arrmixed)
66+
67+
68+
class Hashing(object):
69+
goal_time = 0.2
70+
71+
def setup(self):
72+
N = 100000
73+
74+
self.df = pd.DataFrame(
75+
{'A': pd.Series(tm.makeStringIndex(100).take(
76+
np.random.randint(0, 100, size=N))),
77+
'B': pd.Series(tm.makeStringIndex(10000).take(
78+
np.random.randint(0, 10000, size=N))),
79+
'D': np.random.randn(N),
80+
'E': np.arange(N),
81+
'F': pd.date_range('20110101', freq='s', periods=N),
82+
'G': pd.timedelta_range('1 day', freq='s', periods=N),
83+
})
84+
self.df['C'] = self.df['B'].astype('category')
85+
self.df.iloc[10:20] = np.nan
86+
87+
def time_frame(self):
88+
self.df.hash()
89+
90+
def time_series_int(self):
91+
self.df.E.hash()
92+
93+
def time_series_string(self):
94+
self.df.B.hash()
95+
96+
def time_series_categorical(self):
97+
self.df.C.hash()

asv_bench/benchmarks/attrs_caching.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,32 @@
11
from .pandas_vb_common import *
2+
from pandas.util.decorators import cache_readonly
23

34

4-
class getattr_dataframe_index(object):
5+
class DataFrameAttributes(object):
56
goal_time = 0.2
67

78
def setup(self):
89
self.df = DataFrame(np.random.randn(10, 6))
910
self.cur_index = self.df.index
1011

11-
def time_getattr_dataframe_index(self):
12+
def time_get_index(self):
1213
self.foo = self.df.index
1314

15+
def time_set_index(self):
16+
self.df.index = self.cur_index
17+
1418

15-
class setattr_dataframe_index(object):
19+
class CacheReadonly(object):
1620
goal_time = 0.2
1721

1822
def setup(self):
19-
self.df = DataFrame(np.random.randn(10, 6))
20-
self.cur_index = self.df.index
2123

22-
def time_setattr_dataframe_index(self):
23-
self.df.index = self.cur_index
24+
class Foo:
25+
26+
@cache_readonly
27+
def prop(self):
28+
return 5
29+
self.obj = Foo()
30+
31+
def time_cache_readonly(self):
32+
self.obj.prop

0 commit comments

Comments
 (0)