Skip to content

Commit f457cd4

Browse files
authored
Merge branch 'master' into master
2 parents 6eb2e5a + 5ce06b5 commit f457cd4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+1169
-503
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@
5656
<tr>
5757
<td></td>
5858
<td>
59-
<a href="https://ci.appveyor.com/project/pandas-dev/pandas">
60-
<img src="https://ci.appveyor.com/api/projects/status/86vn83mxgnl4xf1s/branch/master?svg=true" alt="appveyor build status" />
59+
<a href="https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=master">
60+
<img src="https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=master" alt="Azure Pipelines build status" />
6161
</a>
6262
</td>
6363
</tr>

asv_bench/benchmarks/algorithms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
try:
1010
hashing = import_module(imp)
1111
break
12-
except:
12+
except (ImportError, TypeError, ValueError):
1313
pass
1414

1515
from .pandas_vb_common import setup # noqa

asv_bench/benchmarks/frame_methods.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -505,14 +505,21 @@ class NSort(object):
505505
param_names = ['keep']
506506

507507
def setup(self, keep):
508-
self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
508+
self.df = DataFrame(np.random.randn(100000, 3),
509+
columns=list('ABC'))
509510

510-
def time_nlargest(self, keep):
511+
def time_nlargest_one_column(self, keep):
511512
self.df.nlargest(100, 'A', keep=keep)
512513

513-
def time_nsmallest(self, keep):
514+
def time_nlargest_two_columns(self, keep):
515+
self.df.nlargest(100, ['A', 'B'], keep=keep)
516+
517+
def time_nsmallest_one_column(self, keep):
514518
self.df.nsmallest(100, 'A', keep=keep)
515519

520+
def time_nsmallest_two_columns(self, keep):
521+
self.df.nsmallest(100, ['A', 'B'], keep=keep)
522+
516523

517524
class Describe(object):
518525

asv_bench/benchmarks/indexing.py

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,95 +11,110 @@
1111
class NumericSeriesIndexing(object):
1212

1313
goal_time = 0.2
14-
params = [Int64Index, Float64Index]
15-
param = ['index']
14+
params = [
15+
(Int64Index, Float64Index),
16+
('unique_monotonic_inc', 'nonunique_monotonic_inc'),
17+
]
18+
param_names = ['index_dtype', 'index_structure']
1619

17-
def setup(self, index):
20+
def setup(self, index, index_structure):
1821
N = 10**6
19-
idx = index(range(N))
20-
self.data = Series(np.random.rand(N), index=idx)
22+
indices = {
23+
'unique_monotonic_inc': index(range(N)),
24+
'nonunique_monotonic_inc': index(
25+
list(range(55)) + [54] + list(range(55, N - 1))),
26+
}
27+
self.data = Series(np.random.rand(N), index=indices[index_structure])
2128
self.array = np.arange(10000)
2229
self.array_list = self.array.tolist()
2330

24-
def time_getitem_scalar(self, index):
31+
def time_getitem_scalar(self, index, index_structure):
2532
self.data[800000]
2633

27-
def time_getitem_slice(self, index):
34+
def time_getitem_slice(self, index, index_structure):
2835
self.data[:800000]
2936

30-
def time_getitem_list_like(self, index):
37+
def time_getitem_list_like(self, index, index_structure):
3138
self.data[[800000]]
3239

33-
def time_getitem_array(self, index):
40+
def time_getitem_array(self, index, index_structure):
3441
self.data[self.array]
3542

36-
def time_getitem_lists(self, index):
43+
def time_getitem_lists(self, index, index_structure):
3744
self.data[self.array_list]
3845

39-
def time_iloc_array(self, index):
46+
def time_iloc_array(self, index, index_structure):
4047
self.data.iloc[self.array]
4148

42-
def time_iloc_list_like(self, index):
49+
def time_iloc_list_like(self, index, index_structure):
4350
self.data.iloc[[800000]]
4451

45-
def time_iloc_scalar(self, index):
52+
def time_iloc_scalar(self, index, index_structure):
4653
self.data.iloc[800000]
4754

48-
def time_iloc_slice(self, index):
55+
def time_iloc_slice(self, index, index_structure):
4956
self.data.iloc[:800000]
5057

51-
def time_ix_array(self, index):
58+
def time_ix_array(self, index, index_structure):
5259
self.data.ix[self.array]
5360

54-
def time_ix_list_like(self, index):
61+
def time_ix_list_like(self, index, index_structure):
5562
self.data.ix[[800000]]
5663

57-
def time_ix_scalar(self, index):
64+
def time_ix_scalar(self, index, index_structure):
5865
self.data.ix[800000]
5966

60-
def time_ix_slice(self, index):
67+
def time_ix_slice(self, index, index_structure):
6168
self.data.ix[:800000]
6269

63-
def time_loc_array(self, index):
70+
def time_loc_array(self, index, index_structure):
6471
self.data.loc[self.array]
6572

66-
def time_loc_list_like(self, index):
73+
def time_loc_list_like(self, index, index_structure):
6774
self.data.loc[[800000]]
6875

69-
def time_loc_scalar(self, index):
76+
def time_loc_scalar(self, index, index_structure):
7077
self.data.loc[800000]
7178

72-
def time_loc_slice(self, index):
79+
def time_loc_slice(self, index, index_structure):
7380
self.data.loc[:800000]
7481

7582

7683
class NonNumericSeriesIndexing(object):
7784

7885
goal_time = 0.2
79-
params = ['string', 'datetime']
80-
param_names = ['index']
86+
params = [
87+
('string', 'datetime'),
88+
('unique_monotonic_inc', 'nonunique_monotonic_inc'),
89+
]
90+
param_names = ['index_dtype', 'index_structure']
8191

82-
def setup(self, index):
83-
N = 10**5
92+
def setup(self, index, index_structure):
93+
N = 10**6
8494
indexes = {'string': tm.makeStringIndex(N),
8595
'datetime': date_range('1900', periods=N, freq='s')}
8696
index = indexes[index]
97+
if index_structure == 'nonunique_monotonic_inc':
98+
index = index.insert(item=index[2], loc=2)[:-1]
8799
self.s = Series(np.random.rand(N), index=index)
88100
self.lbl = index[80000]
89101

90-
def time_getitem_label_slice(self, index):
102+
def time_getitem_label_slice(self, index, index_structure):
91103
self.s[:self.lbl]
92104

93-
def time_getitem_pos_slice(self, index):
105+
def time_getitem_pos_slice(self, index, index_structure):
94106
self.s[:80000]
95107

96-
def time_get_value(self, index):
108+
def time_get_value(self, index, index_structure):
97109
with warnings.catch_warnings(record=True):
98110
self.s.get_value(self.lbl)
99111

100-
def time_getitem_scalar(self, index):
112+
def time_getitem_scalar(self, index, index_structure):
101113
self.s[self.lbl]
102114

115+
def time_getitem_list_like(self, index, index_structure):
116+
self.s[[self.lbl]]
117+
103118

104119
class DataFrameStringIndexing(object):
105120

asv_bench/benchmarks/io/csv.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
import random
2-
import timeit
32
import string
43

54
import numpy as np
65
import pandas.util.testing as tm
76
from pandas import DataFrame, Categorical, date_range, read_csv
8-
from pandas.compat import PY2
97
from pandas.compat import cStringIO as StringIO
108

119
from ..pandas_vb_common import setup, BaseIO # noqa
@@ -181,8 +179,8 @@ def time_read_csv(self, sep, decimal, float_precision):
181179
names=list('abc'), float_precision=float_precision)
182180

183181
def time_read_csv_python_engine(self, sep, decimal, float_precision):
184-
read_csv(self.data(self.StringIO_input), sep=sep, header=None, engine='python',
185-
float_precision=None, names=list('abc'))
182+
read_csv(self.data(self.StringIO_input), sep=sep, header=None,
183+
engine='python', float_precision=None, names=list('abc'))
186184

187185

188186
class ReadCSVCategorical(BaseIO):

asv_bench/benchmarks/join_merge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def setup(self):
2929
try:
3030
with warnings.catch_warnings(record=True):
3131
self.mdf1.consolidate(inplace=True)
32-
except:
32+
except (AttributeError, TypeError):
3333
pass
3434
self.mdf2 = self.mdf1.copy()
3535
self.mdf2.index = self.df2.index

asv_bench/benchmarks/pandas_vb_common.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@
22
from importlib import import_module
33

44
import numpy as np
5-
from pandas import Panel
65

76
# Compatibility import for lib
87
for imp in ['pandas._libs.lib', 'pandas.lib']:
98
try:
109
lib = import_module(imp)
1110
break
12-
except:
11+
except (ImportError, TypeError, ValueError):
1312
pass
1413

1514
numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
@@ -34,7 +33,7 @@ def remove(self, f):
3433
"""Remove created files"""
3534
try:
3635
os.remove(f)
37-
except:
36+
except OSError:
3837
# On Windows, attempting to remove a file that is in use
3938
# causes an exception to be raised
4039
pass

asv_bench/benchmarks/stat_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def setup(self, op, dtype, axis, use_bottleneck):
1818
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
1919
try:
2020
pd.options.compute.use_bottleneck = use_bottleneck
21-
except:
21+
except TypeError:
2222
from pandas.core import nanops
2323
nanops._USE_BOTTLENECK = use_bottleneck
2424
self.df_func = getattr(df, op)
@@ -56,7 +56,7 @@ def setup(self, op, dtype, use_bottleneck):
5656
s = pd.Series(np.random.randn(100000)).astype(dtype)
5757
try:
5858
pd.options.compute.use_bottleneck = use_bottleneck
59-
except:
59+
except TypeError:
6060
from pandas.core import nanops
6161
nanops._USE_BOTTLENECK = use_bottleneck
6262
self.s_func = getattr(s, op)

asv_bench/benchmarks/timeseries.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import warnings
21
from datetime import timedelta
32

43
import numpy as np

ci/doctests.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then
2121

2222
# DataFrame / Series docstrings
2323
pytest --doctest-modules -v pandas/core/frame.py \
24-
-k"-axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
24+
-k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
2525

2626
if [ $? -ne "0" ]; then
2727
RET=1

ci/requirements-optional-pip.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ lxml
1414
matplotlib
1515
nbsphinx
1616
numexpr
17-
openpyxl=2.5.5
17+
openpyxl==2.5.5
1818
pyarrow
1919
pymysql
2020
tables
@@ -28,4 +28,4 @@ statsmodels
2828
xarray
2929
xlrd
3030
xlsxwriter
31-
xlwt
31+
xlwt
201 KB
Binary file not shown.
74.7 KB
Binary file not shown.
-201 KB
Binary file not shown.
-103 KB
Binary file not shown.

doc/make.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,10 @@ def _sphinx_build(self, kind):
233233
'-b{}'.format(kind),
234234
'-{}'.format(
235235
'v' * self.verbosity) if self.verbosity else '',
236-
'-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
236+
'-d"{}"'.format(os.path.join(BUILD_PATH, 'doctrees')),
237237
'-Dexclude_patterns={}'.format(self.exclude_patterns),
238-
SOURCE_PATH,
239-
os.path.join(BUILD_PATH, kind))
238+
'"{}"'.format(SOURCE_PATH),
239+
'"{}"'.format(os.path.join(BUILD_PATH, kind)))
240240

241241
def _open_browser(self):
242242
base_url = os.path.join('file://', DOC_PATH, 'build', 'html')

doc/source/api.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2603,3 +2603,12 @@ objects.
26032603
generated/pandas.Series.ix
26042604
generated/pandas.Series.imag
26052605
generated/pandas.Series.real
2606+
2607+
2608+
.. Can't convince sphinx to generate toctree for this class attribute.
2609+
.. So we do it manually to avoid a warning
2610+
2611+
.. toctree::
2612+
:hidden:
2613+
2614+
generated/pandas.api.extensions.ExtensionDtype.na_value

doc/source/basics.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1935,7 +1935,7 @@ NumPy's type-system for a few cases.
19351935
* :ref:`Categorical <categorical>`
19361936
* :ref:`Datetime with Timezone <timeseries.timezone_series>`
19371937
* :ref:`Period <timeseries.periods>`
1938-
* :ref:`Interval <advanced.indexing.intervallindex>`
1938+
* :ref:`Interval <indexing.intervallindex>`
19391939

19401940
Pandas uses the ``object`` dtype for storing strings.
19411941

doc/source/computation.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,21 @@ Like ``cov``, ``corr`` also supports the optional ``min_periods`` keyword:
153153
frame.corr(min_periods=12)
154154
155155
156+
.. versionadded:: 0.24.0
157+
158+
The ``method`` argument can also be a callable for a generic correlation
159+
calculation. In this case, it should be a single function
160+
that produces a single value from two ndarray inputs. Suppose we wanted to
161+
compute the correlation based on histogram intersection:
162+
163+
.. ipython:: python
164+
165+
# histogram intersection
166+
histogram_intersection = lambda a, b: np.minimum(
167+
np.true_divide(a, a.sum()), np.true_divide(b, b.sum())
168+
).sum()
169+
frame.corr(method=histogram_intersection)
170+
156171
A related method :meth:`~DataFrame.corrwith` is implemented on DataFrame to
157172
compute the correlation between like-labeled Series contained in different
158173
DataFrame objects.

doc/source/cookbook.rst

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -505,13 +505,11 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
505505
.. ipython:: python
506506
507507
df = pd.DataFrame({'A' : [1, 1, 2, 2], 'B' : [1, -1, 1, 2]})
508-
509508
gb = df.groupby('A')
510509
511510
def replace(g):
512-
mask = g < 0
513-
g.loc[mask] = g[~mask].mean()
514-
return g
511+
mask = g < 0
512+
return g.where(mask, g[~mask].mean())
515513
516514
gb.transform(replace)
517515

doc/source/ecosystem.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ large data to thin clients.
7373
`seaborn <https://seaborn.pydata.org>`__
7474
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7575

76-
Seaborn is a Python visualization library based on `matplotlib
77-
<http://matplotlib.org>`__. It provides a high-level, dataset-oriented
76+
Seaborn is a Python visualization library based on
77+
`matplotlib <http://matplotlib.org>`__. It provides a high-level, dataset-oriented
7878
interface for creating attractive statistical graphics. The plotting functions
7979
in seaborn understand pandas objects and leverage pandas grouping operations
8080
internally to support concise specification of complex visualizations. Seaborn
@@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying
140140
(Note: HTML tables may or may not be
141141
compatible with non-HTML Jupyter output formats.)
142142

143-
See :ref:`Options and Settings <options>` and :ref:`<options.available>`
143+
See :ref:`Options and Settings <options>` and :ref:`options.available <available>`
144144
for pandas ``display.`` settings.
145145

146146
`quantopian/qgrid <https://github.com/quantopian/qgrid>`__
@@ -169,7 +169,7 @@ or the clipboard into a new pandas DataFrame via a sophisticated import wizard.
169169
Most pandas classes, methods and data attributes can be autocompleted in
170170
Spyder's `Editor <https://docs.spyder-ide.org/editor.html>`__ and
171171
`IPython Console <https://docs.spyder-ide.org/ipythonconsole.html>`__,
172-
and Spyder's `Help pane<https://docs.spyder-ide.org/help.html>`__ can retrieve
172+
and Spyder's `Help pane <https://docs.spyder-ide.org/help.html>`__ can retrieve
173173
and render Numpydoc documentation on pandas objects in rich text with Sphinx
174174
both automatically and on-demand.
175175

0 commit comments

Comments
 (0)