pandas-dev
diff --git a/‎.github/workflows/ci.yml
Lines changed: 12 additions & 9 deletions b/‎.github/workflows/ci.yml
Lines changed: 12 additions & 9 deletions
diff --git a/‎asv_bench/benchmarks/frame_methods.py
Lines changed: 1 addition & 1 deletion b/‎asv_bench/benchmarks/frame_methods.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/index_object.py
Lines changed: 13 additions & 0 deletions b/‎asv_bench/benchmarks/index_object.py
Lines changed: 13 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/multiindex_object.py
Lines changed: 13 additions & 1 deletion b/‎asv_bench/benchmarks/multiindex_object.py
Lines changed: 13 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/stat_ops.py
Lines changed: 28 additions & 52 deletions b/‎asv_bench/benchmarks/stat_ops.py
Lines changed: 28 additions & 52 deletions
diff --git a/‎ci/azure/posix.yml
Lines changed: 12 additions & 10 deletions b/‎ci/azure/posix.yml
Lines changed: 12 additions & 10 deletions
diff --git a/‎ci/code_checks.sh
Lines changed: 18 additions & 17 deletions b/‎ci/code_checks.sh
Lines changed: 18 additions & 17 deletions
diff --git a/‎ci/deps/azure-36-locale.yaml
Lines changed: 21 additions & 14 deletions b/‎ci/deps/azure-36-locale.yaml
Lines changed: 21 additions & 14 deletions
@@ -15,12 +15,12 @@ jobs:
     runs-on: ubuntu-latest
     steps:
 
+    - name: Setting conda path
+      run: echo "::add-path::${HOME}/miniconda3/bin"
+
     - name: Checkout
       uses: actions/checkout@v1
 
-    - name: Setting conda path
-      run: echo "::set-env name=PATH::${HOME}/miniconda3/bin:${PATH}"
-
     - name: Looking for unwanted patterns
       run: ci/code_checks.sh patterns
       if: true
@@ -80,15 +80,18 @@ jobs:
         git fetch upstream
         if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
             asv machine --yes
-            ASV_OUTPUT="$(asv dev)"
-            if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then
-                echo "##vso[task.logissue type=error]Benchmarks run with errors"
-                echo "$ASV_OUTPUT"
+            asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
+            if grep "failed" benchmarks.log > /dev/null ; then
                 exit 1
-            else
-                echo "Benchmarks run without errors"
             fi
         else
             echo "Benchmarks did not run, no changes detected"
         fi
       if: true
+
+    - name: Publish benchmarks artifact
+      uses: actions/upload-artifact@master
+      with:
+        name: Benchmarks log
+        path: asv_bench/benchmarks.log
+      if: failure()
@@ -565,7 +565,7 @@ def setup(self):
 
     def time_frame_get_dtype_counts(self):
         with warnings.catch_warnings(record=True):
-            self.df.get_dtype_counts()
+            self.df._data.get_dtype_counts()
 
     def time_info(self):
         self.df.info()
 
@@ -7,6 +7,7 @@
     Float64Index,
     Index,
     IntervalIndex,
+    MultiIndex,
     RangeIndex,
     Series,
     date_range,
@@ -111,6 +112,18 @@ def time_get_loc_dec(self):
         self.idx_dec.get_loc(100000)
 
 
+class IndexEquals:
+    def setup(self):
+        idx_large_fast = RangeIndex(100000)
+        idx_small_slow = date_range(start="1/1/2012", periods=1)
+        self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
+
+        self.idx_non_object = RangeIndex(1)
+
+    def time_non_object_equals_multiindex(self):
+        self.idx_non_object.equals(self.mi_large_slow)
+
+
 class IndexAppend:
     def setup(self):
 
 
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from pandas import DataFrame, MultiIndex, date_range
+from pandas import DataFrame, MultiIndex, RangeIndex, date_range
 import pandas.util.testing as tm
 
 
@@ -147,4 +147,16 @@ def time_categorical_level(self):
         self.df.set_index(["a", "b"])
 
 
+class Equals:
+    def setup(self):
+        idx_large_fast = RangeIndex(100000)
+        idx_small_slow = date_range(start="1/1/2012", periods=1)
+        self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
+
+        self.idx_non_object = RangeIndex(1)
+
+    def time_equals_non_object_index(self):
+        self.mi_large_slow.equals(self.idx_non_object)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -7,20 +7,14 @@
 
 class FrameOps:
 
-    params = [ops, ["float", "int"], [0, 1], [True, False]]
-    param_names = ["op", "dtype", "axis", "use_bottleneck"]
+    params = [ops, ["float", "int"], [0, 1]]
+    param_names = ["op", "dtype", "axis"]
 
-    def setup(self, op, dtype, axis, use_bottleneck):
+    def setup(self, op, dtype, axis):
         df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
-        try:
-            pd.options.compute.use_bottleneck = use_bottleneck
-        except TypeError:
-            from pandas.core import nanops
-
-            nanops._USE_BOTTLENECK = use_bottleneck
         self.df_func = getattr(df, op)
 
-    def time_op(self, op, dtype, axis, use_bottleneck):
+    def time_op(self, op, dtype, axis):
         self.df_func(axis=axis)
 
 
@@ -46,20 +40,14 @@ def time_op(self, level, op):
 
 class SeriesOps:
 
-    params = [ops, ["float", "int"], [True, False]]
-    param_names = ["op", "dtype", "use_bottleneck"]
+    params = [ops, ["float", "int"]]
+    param_names = ["op", "dtype"]
 
-    def setup(self, op, dtype, use_bottleneck):
+    def setup(self, op, dtype):
         s = pd.Series(np.random.randn(100000)).astype(dtype)
-        try:
-            pd.options.compute.use_bottleneck = use_bottleneck
-        except TypeError:
-            from pandas.core import nanops
-
-            nanops._USE_BOTTLENECK = use_bottleneck
         self.s_func = getattr(s, op)
 
-    def time_op(self, op, dtype, use_bottleneck):
+    def time_op(self, op, dtype):
         self.s_func()
 
 
@@ -101,61 +89,49 @@ def time_average_old(self, constructor, pct):
 
 class Correlation:
 
-    params = [["spearman", "kendall", "pearson"], [True, False]]
-    param_names = ["method", "use_bottleneck"]
+    params = [["spearman", "kendall", "pearson"]]
+    param_names = ["method"]
 
-    def setup(self, method, use_bottleneck):
-        try:
-            pd.options.compute.use_bottleneck = use_bottleneck
-        except TypeError:
-            from pandas.core import nanops
+    def setup(self, method):
+        self.df = pd.DataFrame(np.random.randn(500, 15))
+        self.df2 = pd.DataFrame(np.random.randn(500, 15))
+        self.df_wide = pd.DataFrame(np.random.randn(500, 100))
+        self.df_wide_nans = self.df_wide.where(np.random.random((500, 100)) < 0.9)
+        self.s = pd.Series(np.random.randn(500))
+        self.s2 = pd.Series(np.random.randn(500))
 
-            nanops._USE_BOTTLENECK = use_bottleneck
-        self.df = pd.DataFrame(np.random.randn(1000, 30))
-        self.df2 = pd.DataFrame(np.random.randn(1000, 30))
-        self.df_wide = pd.DataFrame(np.random.randn(1000, 200))
-        self.df_wide_nans = self.df_wide.where(np.random.random((1000, 200)) < 0.9)
-        self.s = pd.Series(np.random.randn(1000))
-        self.s2 = pd.Series(np.random.randn(1000))
-
-    def time_corr(self, method, use_bottleneck):
+    def time_corr(self, method):
         self.df.corr(method=method)
 
-    def time_corr_wide(self, method, use_bottleneck):
+    def time_corr_wide(self, method):
         self.df_wide.corr(method=method)
 
-    def time_corr_wide_nans(self, method, use_bottleneck):
+    def time_corr_wide_nans(self, method):
         self.df_wide_nans.corr(method=method)
 
-    def peakmem_corr_wide(self, method, use_bottleneck):
+    def peakmem_corr_wide(self, method):
         self.df_wide.corr(method=method)
 
-    def time_corr_series(self, method, use_bottleneck):
+    def time_corr_series(self, method):
         self.s.corr(self.s2, method=method)
 
-    def time_corrwith_cols(self, method, use_bottleneck):
+    def time_corrwith_cols(self, method):
         self.df.corrwith(self.df2, method=method)
 
-    def time_corrwith_rows(self, method, use_bottleneck):
+    def time_corrwith_rows(self, method):
         self.df.corrwith(self.df2, axis=1, method=method)
 
 
 class Covariance:
 
-    params = [[True, False]]
-    param_names = ["use_bottleneck"]
-
-    def setup(self, use_bottleneck):
-        try:
-            pd.options.compute.use_bottleneck = use_bottleneck
-        except TypeError:
-            from pandas.core import nanops
+    params = []
+    param_names = []
 
-            nanops._USE_BOTTLENECK = use_bottleneck
+    def setup(self):
         self.s = pd.Series(np.random.randn(100000))
         self.s2 = pd.Series(np.random.randn(100000))
 
-    def time_cov_series(self, use_bottleneck):
+    def time_cov_series(self):
         self.s.cov(self.s2)
 
 
 
@@ -20,14 +20,14 @@ jobs:
           CONDA_PY: "36"
           PATTERN: "not slow and not network"
         py36_locale_slow_old_np:
-          ENV_FILE: ci/deps/azure-36-locale.yaml
+          ENV_FILE: ci/deps/azure-36-locale_slow.yaml
           CONDA_PY: "36"
           PATTERN: "slow"
           LOCALE_OVERRIDE: "zh_CN.UTF-8"
           EXTRA_APT: "language-pack-zh-hans"
 
-        py36_locale_slow:
-          ENV_FILE: ci/deps/azure-36-locale_slow.yaml
+        py36_locale:
+          ENV_FILE: ci/deps/azure-36-locale.yaml
           CONDA_PY: "36"
           PATTERN: "not slow and not network"
           LOCALE_OVERRIDE: "it_IT.UTF-8"
@@ -44,13 +44,15 @@ jobs:
           PATTERN: "not slow and not network"
           LOCALE_OVERRIDE: "zh_CN.UTF-8"
 
-        py37_np_dev:
-          ENV_FILE: ci/deps/azure-37-numpydev.yaml
-          CONDA_PY: "37"
-          PATTERN: "not slow and not network"
-          TEST_ARGS: "-W error"
-          PANDAS_TESTING_MODE: "deprecate"
-          EXTRA_APT: "xsel"
+        # Disabled for NumPy object-dtype warning.
+        # https://github.com/pandas-dev/pandas/issues/30043
+        # py37_np_dev:
+        #   ENV_FILE: ci/deps/azure-37-numpydev.yaml
+        #   CONDA_PY: "37"
+        #   PATTERN: "not slow and not network"
+        #   TEST_ARGS: "-W error"
+        #   PANDAS_TESTING_MODE: "deprecate"
+        #   EXTRA_APT: "xsel"
 
   steps:
     - script: |
 
@@ -34,17 +34,13 @@ function invgrep {
     #
     # This is useful for the CI, as we want to fail if one of the patterns
     # that we want to avoid is found by grep.
-    if [[ "$AZURE" == "true" ]]; then
-        set -o pipefail
-        grep -n "$@" | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Found unwanted pattern: " $3}'
-    else
-        grep "$@"
-    fi
-    return $((! $?))
+    grep -n "$@" | sed "s/^/$INVGREP_PREPEND/" | sed "s/$/$INVGREP_APPEND/" ; EXIT_STATUS=${PIPESTATUS[0]}
+    return $((! $EXIT_STATUS))
 }
 
-if [[ "$AZURE" == "true" ]]; then
-    FLAKE8_FORMAT="##vso[task.logissue type=error;sourcepath=%(path)s;linenumber=%(row)s;columnnumber=%(col)s;code=%(code)s;]%(text)s"
+if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+    FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code):%(text)s"
+    INVGREP_PREPEND="##[error]"
 else
     FLAKE8_FORMAT="default"
 fi
@@ -109,7 +105,12 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
 
     # Imports - Check formatting using isort see setup.cfg for settings
     MSG='Check import format using isort ' ; echo $MSG
-    isort --recursive --check-only pandas asv_bench
+    ISORT_CMD="isort --recursive --check-only pandas asv_bench"
+    if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+        eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
+    else
+        eval $ISORT_CMD
+    fi
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
 fi
@@ -198,15 +199,15 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
+    invgrep -R --include=*.{py,pyx} '\.__class__' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
-    set -o pipefail
-    if [[ "$AZURE" == "true" ]]; then
-        # we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files
-        ! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
-    else
-        ! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
-    fi
+    INVGREP_APPEND=" <- trailing whitespaces found"
+    invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
     RET=$(($RET + $?)) ; echo $MSG "DONE"
+    unset INVGREP_APPEND
 fi
 
 ### CODE ###
 
@@ -13,20 +13,27 @@ dependencies:
   - pytest-azurepipelines
 
   # pandas dependencies
-  - beautifulsoup4==4.6.0
-  - bottleneck=1.2.*
+  - beautifulsoup4
+  - gcsfs
+  - html5lib
+  - ipython
+  - jinja2
   - lxml
-  - matplotlib=2.2.2
-  - numpy=1.14.*
-  - openpyxl=2.4.8
+  - matplotlib=3.0.*
+  - nomkl
+  - numexpr
+  - numpy=1.15.*
+  - openpyxl
+  # lowest supported version of pyarrow (putting it here instead of in
+  # azure-36-minimum_versions because it needs numpy >= 1.14)
+  - pyarrow=0.12
+  - pytables
   - python-dateutil
-  - python-blosc
-  - pytz=2017.2
+  - pytz
+  - s3fs
   - scipy
-  - sqlalchemy=1.1.4
-  - xlrd=1.1.0
-  - xlsxwriter=0.9.8
-  - xlwt=1.2.0
-  - pip
-  - pip:
-    - html5lib==1.0b2
+  - xarray
+  - xlrd
+  - xlsxwriter
+  - xlwt
+  - moto