pandas-dev
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎asv_bench/benchmarks/algorithms.py
Lines changed: 1 addition & 1 deletion b/‎asv_bench/benchmarks/algorithms.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/frame_methods.py
Lines changed: 10 additions & 3 deletions b/‎asv_bench/benchmarks/frame_methods.py
Lines changed: 10 additions & 3 deletions
diff --git a/‎asv_bench/benchmarks/indexing.py
Lines changed: 45 additions & 30 deletions b/‎asv_bench/benchmarks/indexing.py
Lines changed: 45 additions & 30 deletions
diff --git a/‎asv_bench/benchmarks/io/csv.py
Lines changed: 2 additions & 4 deletions b/‎asv_bench/benchmarks/io/csv.py
Lines changed: 2 additions & 4 deletions
diff --git a/‎asv_bench/benchmarks/join_merge.py
Lines changed: 1 addition & 1 deletion b/‎asv_bench/benchmarks/join_merge.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/pandas_vb_common.py
Lines changed: 2 additions & 3 deletions b/‎asv_bench/benchmarks/pandas_vb_common.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎asv_bench/benchmarks/stat_ops.py
Lines changed: 2 additions & 2 deletions b/‎asv_bench/benchmarks/stat_ops.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎asv_bench/benchmarks/timeseries.py
Lines changed: 0 additions & 1 deletion b/‎asv_bench/benchmarks/timeseries.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎ci/doctests.sh
Lines changed: 1 addition & 1 deletion b/‎ci/doctests.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/requirements-optional-pip.txt
Lines changed: 2 additions & 2 deletions b/‎ci/requirements-optional-pip.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf
201 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf
201 KB
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx
74.7 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx
74.7 KB
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet_JP.pdf
-201 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet_JP.pdf
-201 KB
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet_JP.pptx
-103 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet_JP.pptx
-103 KB
diff --git a/‎doc/make.py
Lines changed: 3 additions & 3 deletions b/‎doc/make.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/api.rst
Lines changed: 9 additions & 0 deletions b/‎doc/source/api.rst
Lines changed: 9 additions & 0 deletions
diff --git a/‎doc/source/basics.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/basics.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/computation.rst
Lines changed: 15 additions & 0 deletions b/‎doc/source/computation.rst
Lines changed: 15 additions & 0 deletions
diff --git a/‎doc/source/cookbook.rst
Lines changed: 2 additions & 4 deletions b/‎doc/source/cookbook.rst
Lines changed: 2 additions & 4 deletions
diff --git a/‎doc/source/ecosystem.rst
Lines changed: 4 additions & 4 deletions b/‎doc/source/ecosystem.rst
Lines changed: 4 additions & 4 deletions
@@ -56,8 +56,8 @@
 <tr>
   <td></td>
   <td>
-    <a href="https://ci.appveyor.com/project/pandas-dev/pandas">
-    <img src="https://ci.appveyor.com/api/projects/status/86vn83mxgnl4xf1s/branch/master?svg=true" alt="appveyor build status" />
+    <a href="https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=master">
+      <img src="https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=master" alt="Azure Pipelines build status" />
     </a>
   </td>
 </tr>
 
@@ -9,7 +9,7 @@
     try:
         hashing = import_module(imp)
         break
-    except:
+    except (ImportError, TypeError, ValueError):
         pass
 
 from .pandas_vb_common import setup # noqa
 
@@ -505,14 +505,21 @@ class NSort(object):
     param_names = ['keep']
 
     def setup(self, keep):
-        self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
+        self.df = DataFrame(np.random.randn(100000, 3),
+                            columns=list('ABC'))
 
-    def time_nlargest(self, keep):
+    def time_nlargest_one_column(self, keep):
         self.df.nlargest(100, 'A', keep=keep)
 
-    def time_nsmallest(self, keep):
+    def time_nlargest_two_columns(self, keep):
+        self.df.nlargest(100, ['A', 'B'], keep=keep)
+
+    def time_nsmallest_one_column(self, keep):
         self.df.nsmallest(100, 'A', keep=keep)
 
+    def time_nsmallest_two_columns(self, keep):
+        self.df.nsmallest(100, ['A', 'B'], keep=keep)
+
 
 class Describe(object):
 
 
@@ -11,95 +11,110 @@
 class NumericSeriesIndexing(object):
 
     goal_time = 0.2
-    params = [Int64Index, Float64Index]
-    param = ['index']
+    params = [
+        (Int64Index, Float64Index),
+        ('unique_monotonic_inc', 'nonunique_monotonic_inc'),
+    ]
+    param_names = ['index_dtype', 'index_structure']
 
-    def setup(self, index):
+    def setup(self, index, index_structure):
         N = 10**6
-        idx = index(range(N))
-        self.data = Series(np.random.rand(N), index=idx)
+        indices = {
+            'unique_monotonic_inc': index(range(N)),
+            'nonunique_monotonic_inc': index(
+                list(range(55)) + [54] + list(range(55, N - 1))),
+        }
+        self.data = Series(np.random.rand(N), index=indices[index_structure])
         self.array = np.arange(10000)
         self.array_list = self.array.tolist()
 
-    def time_getitem_scalar(self, index):
+    def time_getitem_scalar(self, index, index_structure):
         self.data[800000]
 
-    def time_getitem_slice(self, index):
+    def time_getitem_slice(self, index, index_structure):
         self.data[:800000]
 
-    def time_getitem_list_like(self, index):
+    def time_getitem_list_like(self, index, index_structure):
         self.data[[800000]]
 
-    def time_getitem_array(self, index):
+    def time_getitem_array(self, index, index_structure):
         self.data[self.array]
 
-    def time_getitem_lists(self, index):
+    def time_getitem_lists(self, index, index_structure):
         self.data[self.array_list]
 
-    def time_iloc_array(self, index):
+    def time_iloc_array(self, index, index_structure):
         self.data.iloc[self.array]
 
-    def time_iloc_list_like(self, index):
+    def time_iloc_list_like(self, index, index_structure):
         self.data.iloc[[800000]]
 
-    def time_iloc_scalar(self, index):
+    def time_iloc_scalar(self, index, index_structure):
         self.data.iloc[800000]
 
-    def time_iloc_slice(self, index):
+    def time_iloc_slice(self, index, index_structure):
         self.data.iloc[:800000]
 
-    def time_ix_array(self, index):
+    def time_ix_array(self, index, index_structure):
         self.data.ix[self.array]
 
-    def time_ix_list_like(self, index):
+    def time_ix_list_like(self, index, index_structure):
         self.data.ix[[800000]]
 
-    def time_ix_scalar(self, index):
+    def time_ix_scalar(self, index, index_structure):
         self.data.ix[800000]
 
-    def time_ix_slice(self, index):
+    def time_ix_slice(self, index, index_structure):
         self.data.ix[:800000]
 
-    def time_loc_array(self, index):
+    def time_loc_array(self, index, index_structure):
         self.data.loc[self.array]
 
-    def time_loc_list_like(self, index):
+    def time_loc_list_like(self, index, index_structure):
         self.data.loc[[800000]]
 
-    def time_loc_scalar(self, index):
+    def time_loc_scalar(self, index, index_structure):
         self.data.loc[800000]
 
-    def time_loc_slice(self, index):
+    def time_loc_slice(self, index, index_structure):
         self.data.loc[:800000]
 
 
 class NonNumericSeriesIndexing(object):
 
     goal_time = 0.2
-    params = ['string', 'datetime']
-    param_names = ['index']
+    params = [
+        ('string', 'datetime'),
+        ('unique_monotonic_inc', 'nonunique_monotonic_inc'),
+    ]
+    param_names = ['index_dtype', 'index_structure']
 
-    def setup(self, index):
-        N = 10**5
+    def setup(self, index, index_structure):
+        N = 10**6
         indexes = {'string': tm.makeStringIndex(N),
                    'datetime': date_range('1900', periods=N, freq='s')}
         index = indexes[index]
+        if index_structure == 'nonunique_monotonic_inc':
+            index = index.insert(item=index[2], loc=2)[:-1]
         self.s = Series(np.random.rand(N), index=index)
         self.lbl = index[80000]
 
-    def time_getitem_label_slice(self, index):
+    def time_getitem_label_slice(self, index, index_structure):
         self.s[:self.lbl]
 
-    def time_getitem_pos_slice(self, index):
+    def time_getitem_pos_slice(self, index, index_structure):
         self.s[:80000]
 
-    def time_get_value(self, index):
+    def time_get_value(self, index, index_structure):
         with warnings.catch_warnings(record=True):
             self.s.get_value(self.lbl)
 
-    def time_getitem_scalar(self, index):
+    def time_getitem_scalar(self, index, index_structure):
         self.s[self.lbl]
 
+    def time_getitem_list_like(self, index, index_structure):
+        self.s[[self.lbl]]
+
 
 class DataFrameStringIndexing(object):
 
 
@@ -1,11 +1,9 @@
 import random
-import timeit
 import string
 
 import numpy as np
 import pandas.util.testing as tm
 from pandas import DataFrame, Categorical, date_range, read_csv
-from pandas.compat import PY2
 from pandas.compat import cStringIO as StringIO
 
 from ..pandas_vb_common import setup, BaseIO  # noqa
@@ -181,8 +179,8 @@ def time_read_csv(self, sep, decimal, float_precision):
                  names=list('abc'), float_precision=float_precision)
 
     def time_read_csv_python_engine(self, sep, decimal, float_precision):
-        read_csv(self.data(self.StringIO_input), sep=sep, header=None, engine='python',
-                 float_precision=None, names=list('abc'))
+        read_csv(self.data(self.StringIO_input), sep=sep, header=None,
+                 engine='python', float_precision=None, names=list('abc'))
 
 
 class ReadCSVCategorical(BaseIO):
 
@@ -29,7 +29,7 @@ def setup(self):
         try:
             with warnings.catch_warnings(record=True):
                 self.mdf1.consolidate(inplace=True)
-        except:
+        except (AttributeError, TypeError):
             pass
         self.mdf2 = self.mdf1.copy()
         self.mdf2.index = self.df2.index
 
@@ -2,14 +2,13 @@
 from importlib import import_module
 
 import numpy as np
-from pandas import Panel
 
 # Compatibility import for lib
 for imp in ['pandas._libs.lib', 'pandas.lib']:
     try:
         lib = import_module(imp)
         break
-    except:
+    except (ImportError, TypeError, ValueError):
         pass
 
 numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
@@ -34,7 +33,7 @@ def remove(self, f):
         """Remove created files"""
         try:
             os.remove(f)
-        except:
+        except OSError:
             # On Windows, attempting to remove a file that is in use
             # causes an exception to be raised
             pass
 
@@ -18,7 +18,7 @@ def setup(self, op, dtype, axis, use_bottleneck):
         df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
         try:
             pd.options.compute.use_bottleneck = use_bottleneck
-        except:
+        except TypeError:
             from pandas.core import nanops
             nanops._USE_BOTTLENECK = use_bottleneck
         self.df_func = getattr(df, op)
@@ -56,7 +56,7 @@ def setup(self, op, dtype, use_bottleneck):
         s = pd.Series(np.random.randn(100000)).astype(dtype)
         try:
             pd.options.compute.use_bottleneck = use_bottleneck
-        except:
+        except TypeError:
             from pandas.core import nanops
             nanops._USE_BOTTLENECK = use_bottleneck
         self.s_func = getattr(s, op)
 
@@ -1,4 +1,3 @@
-import warnings
 from datetime import timedelta
 
 import numpy as np
 
@@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then
 
     # DataFrame / Series docstrings
     pytest --doctest-modules -v pandas/core/frame.py \
-        -k"-axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
+        -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
 
     if [ $? -ne "0" ]; then
         RET=1
 
@@ -14,7 +14,7 @@ lxml
 matplotlib
 nbsphinx
 numexpr
-openpyxl=2.5.5
+openpyxl==2.5.5
 pyarrow
 pymysql
 tables
@@ -28,4 +28,4 @@ statsmodels
 xarray
 xlrd
 xlsxwriter
-xlwt
+xlwt
@@ -233,10 +233,10 @@ def _sphinx_build(self, kind):
                      '-b{}'.format(kind),
                      '-{}'.format(
                          'v' * self.verbosity) if self.verbosity else '',
-                     '-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
+                     '-d"{}"'.format(os.path.join(BUILD_PATH, 'doctrees')),
                      '-Dexclude_patterns={}'.format(self.exclude_patterns),
-                     SOURCE_PATH,
-                     os.path.join(BUILD_PATH, kind))
+                     '"{}"'.format(SOURCE_PATH),
+                     '"{}"'.format(os.path.join(BUILD_PATH, kind)))
 
     def _open_browser(self):
         base_url = os.path.join('file://', DOC_PATH, 'build', 'html')
 
@@ -2603,3 +2603,12 @@ objects.
    generated/pandas.Series.ix
    generated/pandas.Series.imag
    generated/pandas.Series.real
+
+
+.. Can't convince sphinx to generate toctree for this class attribute.
+.. So we do it manually to avoid a warning
+
+.. toctree::
+   :hidden:
+
+   generated/pandas.api.extensions.ExtensionDtype.na_value
@@ -1935,7 +1935,7 @@ NumPy's type-system for a few cases.
 * :ref:`Categorical <categorical>`
 * :ref:`Datetime with Timezone <timeseries.timezone_series>`
 * :ref:`Period <timeseries.periods>`
-* :ref:`Interval <advanced.indexing.intervallindex>`
+* :ref:`Interval <indexing.intervallindex>`
 
 Pandas uses the ``object`` dtype for storing strings.
 
 
@@ -153,6 +153,21 @@ Like ``cov``, ``corr`` also supports the optional ``min_periods`` keyword:
    frame.corr(min_periods=12)
 
 
+.. versionadded:: 0.24.0
+
+The ``method`` argument can also be a callable for a generic correlation
+calculation. In this case, it should be a single function
+that produces a single value from two ndarray inputs. Suppose we wanted to
+compute the correlation based on histogram intersection:
+
+.. ipython:: python
+
+   # histogram intersection
+   histogram_intersection = lambda a, b: np.minimum(
+       np.true_divide(a, a.sum()), np.true_divide(b, b.sum())
+   ).sum()
+   frame.corr(method=histogram_intersection)
+
 A related method :meth:`~DataFrame.corrwith` is implemented on DataFrame to 
 compute the correlation between like-labeled Series contained in different 
 DataFrame objects.
 
@@ -505,13 +505,11 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
 .. ipython:: python
 
    df = pd.DataFrame({'A' : [1, 1, 2, 2], 'B' : [1, -1, 1, 2]})
-
    gb = df.groupby('A')
 
    def replace(g):
-      mask = g < 0
-      g.loc[mask] = g[~mask].mean()
-      return g
+       mask = g < 0
+       return g.where(mask, g[~mask].mean())
 
    gb.transform(replace)
 
 
@@ -73,8 +73,8 @@ large data to thin clients.
 `seaborn <https://seaborn.pydata.org>`__
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Seaborn is a Python visualization library based on `matplotlib
-<http://matplotlib.org>`__.  It provides a high-level, dataset-oriented
+Seaborn is a Python visualization library based on
+`matplotlib <http://matplotlib.org>`__. It provides a high-level, dataset-oriented
 interface for creating attractive statistical graphics. The plotting functions
 in seaborn understand pandas objects and leverage pandas grouping operations
 internally to support concise specification of complex visualizations. Seaborn
@@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying
 (Note: HTML tables may or may not be
 compatible with non-HTML Jupyter output formats.)
 
-See :ref:`Options and Settings <options>` and :ref:`<options.available>`
+See :ref:`Options and Settings <options>` and :ref:`options.available <available>`
 for pandas ``display.`` settings.
 
 `quantopian/qgrid <https://github.com/quantopian/qgrid>`__
@@ -169,7 +169,7 @@ or the clipboard into a new pandas DataFrame via a sophisticated import wizard.
 Most pandas classes, methods and data attributes can be autocompleted in
 Spyder's `Editor <https://docs.spyder-ide.org/editor.html>`__ and
 `IPython Console <https://docs.spyder-ide.org/ipythonconsole.html>`__,
-and Spyder's `Help pane<https://docs.spyder-ide.org/help.html>`__ can retrieve
+and Spyder's `Help pane <https://docs.spyder-ide.org/help.html>`__ can retrieve
 and render Numpydoc documentation on pandas objects in rich text with Sphinx
 both automatically and on-demand.
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-import warnings`
`2`	`1`	`from datetime import timedelta`
`3`	`2`
`4`	`3`	`import numpy as np`