pandas-dev
diff --git a/‎.devcontainer.json
Lines changed: 3 additions & 1 deletion b/‎.devcontainer.json
Lines changed: 3 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.travis.yml
Lines changed: 20 additions & 8 deletions b/‎.travis.yml
Lines changed: 20 additions & 8 deletions
diff --git a/‎LICENSES/XARRAY_LICENSE
Lines changed: 4 additions & 0 deletions b/‎LICENSES/XARRAY_LICENSE
Lines changed: 4 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 4 additions & 3 deletions b/‎README.md
Lines changed: 4 additions & 3 deletions
diff --git a/‎asv_bench/asv.conf.json
Lines changed: 1 addition & 0 deletions b/‎asv_bench/asv.conf.json
Lines changed: 1 addition & 0 deletions
diff --git a/‎asv_bench/benchmarks/algorithms.py
Lines changed: 14 additions & 3 deletions b/‎asv_bench/benchmarks/algorithms.py
Lines changed: 14 additions & 3 deletions
diff --git a/‎asv_bench/benchmarks/arithmetic.py
Lines changed: 86 additions & 4 deletions b/‎asv_bench/benchmarks/arithmetic.py
Lines changed: 86 additions & 4 deletions
diff --git a/‎asv_bench/benchmarks/categoricals.py
Lines changed: 4 additions & 0 deletions b/‎asv_bench/benchmarks/categoricals.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/groupby.py
Lines changed: 60 additions & 2 deletions b/‎asv_bench/benchmarks/groupby.py
Lines changed: 60 additions & 2 deletions
@@ -17,7 +17,9 @@
 		"python.linting.pylintEnabled": false,
 		"python.linting.mypyEnabled": true,
 		"python.testing.pytestEnabled": true,
-		"python.testing.cwd": "pandas/tests"
+		"python.testing.pytestArgs": [
+			"pandas"
+		]
 	},
 
 	// Add the IDs of extensions you want installed when the container is created in the array below.
 
@@ -3,7 +3,7 @@ repos:
     rev: 19.10b0
     hooks:
     -   id: black
-        language_version: python3.7
+        language_version: python3
 -   repo: https://gitlab.com/pycqa/flake8
     rev: 3.7.7
     hooks:
 
@@ -14,6 +14,8 @@ cache:
 
 env:
   global:
+    # Variable for test workers
+    - PYTEST_WORKERS="auto"
     # create a github personal access token
     # cd pandas-dev/pandas
     # travis encrypt 'PANDAS_GH_TOKEN=personal_access_token' -r pandas-dev/pandas
@@ -27,12 +29,21 @@ matrix:
   fast_finish: true
 
   include:
+    # In allowed failures
+    - dist: bionic
+      python: 3.9-dev
+      env:
+        - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
     - env:
         - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
 
     - env:
         - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network and not clipboard)"
 
+    - arch: arm64
+      env:
+        - JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)"
+
     - env:
         - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
       services:
@@ -47,17 +58,18 @@ matrix:
       services:
         - mysql
         - postgresql
+  allow_failures:
+    - arch: arm64
+      env:
+        - JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)"
+    - dist: bionic
+      env:
+        - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
 
-    - env:
-        - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
-      services:
-        - mysql
-        - postgresql
 
 before_install:
   - echo "before_install"
-  # set non-blocking IO on travis
-  # https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024
+  # Use blocking IO on travis.  Ref:  https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024
   - python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);'
   - source ci/travis_process_gbq_encryption.sh
   - export PATH="$HOME/miniconda3/bin:$PATH"
@@ -83,7 +95,7 @@ install:
 script:
   - echo "script start"
   - echo "$JOB"
-  - source activate pandas-dev
+  - if [ "$JOB" != "3.9-dev" ]; then source activate pandas-dev; fi
   - ci/run_tests.sh
 
 after_script:
 
@@ -1,3 +1,7 @@
+Copyright 2014-2019, xarray Developers
+
+--------------------------------------------------------------------------------
+
 Apache License
 Version 2.0, January 2004
 http://www.apache.org/licenses/
 
@@ -16,10 +16,11 @@
 [![Downloads](https://anaconda.org/conda-forge/pandas/badges/downloads.svg)](https://pandas.pydata.org)
 [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas)
 [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 
 ## What is it?
 
-**pandas** is a Python package providing fast, flexible, and expressive data
+**pandas** is a Python package that provides fast, flexible, and expressive data
 structures designed to make working with "relational" or "labeled" data both
 easy and intuitive. It aims to be the fundamental high-level building block for
 doing practical, **real world** data analysis in Python. Additionally, it has
@@ -153,11 +154,11 @@ For usage questions, the best place to go to is [StackOverflow](https://stackove
 Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata).
 
 ## Discussion and Development
-Most development discussion is taking place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
+Most development discussions take place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
 
 ## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas)
 
-All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
+All contributions, bug reports, bug fixes, documentation improvements, enhancements, and ideas are welcome.
 
 A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas.pydata.org/docs/dev/development/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
 
 
@@ -53,6 +53,7 @@
         "xlwt": [],
         "odfpy": [],
         "pytest": [],
+        "jinja2": [],
         // If using Windows with python 2.7 and want to build using the
         // mingw toolchain (rather than MSVC), uncomment the following line.
         // "libpython": [],
 
@@ -34,7 +34,16 @@ class Factorize:
     params = [
         [True, False],
         [True, False],
-        ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
+        [
+            "int",
+            "uint",
+            "float",
+            "string",
+            "datetime64[ns]",
+            "datetime64[ns, tz]",
+            "Int64",
+            "boolean",
+        ],
     ]
     param_names = ["unique", "sort", "dtype"]
 
@@ -49,13 +58,15 @@ def setup(self, unique, sort, dtype):
             "datetime64[ns, tz]": pd.date_range(
                 "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
             ),
+            "Int64": pd.array(np.arange(N), dtype="Int64"),
+            "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
         }[dtype]
         if not unique:
             data = data.repeat(5)
-        self.idx = data
+        self.data = data
 
     def time_factorize(self, unique, sort, dtype):
-        self.idx.factorize(sort=sort)
+        pd.factorize(self.data, sort=sort)
 
 
 class Duplicated:
 
@@ -67,7 +67,7 @@ def time_series_op_with_fill_value_no_nas(self):
         self.ser.add(self.ser, fill_value=4)
 
 
-class MixedFrameWithSeriesAxis0:
+class MixedFrameWithSeriesAxis:
     params = [
         [
             "eq",
@@ -78,7 +78,7 @@ class MixedFrameWithSeriesAxis0:
             "gt",
             "add",
             "sub",
-            "div",
+            "truediv",
             "floordiv",
             "mul",
             "pow",
@@ -87,15 +87,72 @@ class MixedFrameWithSeriesAxis0:
     param_names = ["opname"]
 
     def setup(self, opname):
-        arr = np.arange(10 ** 6).reshape(100, -1)
+        arr = np.arange(10 ** 6).reshape(1000, -1)
         df = DataFrame(arr)
         df["C"] = 1.0
         self.df = df
         self.ser = df[0]
+        self.row = df.iloc[0]
 
     def time_frame_op_with_series_axis0(self, opname):
         getattr(self.df, opname)(self.ser, axis=0)
 
+    def time_frame_op_with_series_axis1(self, opname):
+        getattr(operator, opname)(self.df, self.ser)
+
+
+class FrameWithFrameWide:
+    # Many-columns, mixed dtypes
+
+    params = [
+        [
+            # GH#32779 has discussion of which operators are included here
+            operator.add,
+            operator.floordiv,
+            operator.gt,
+        ]
+    ]
+    param_names = ["op"]
+
+    def setup(self, op):
+        # we choose dtypes so as to make the blocks
+        #  a) not perfectly match between right and left
+        #  b) appreciably bigger than single columns
+        n_cols = 2000
+        n_rows = 500
+
+        # construct dataframe with 2 blocks
+        arr1 = np.random.randn(n_rows, int(n_cols / 2)).astype("f8")
+        arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("f4")
+        df = pd.concat(
+            [pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True,
+        )
+        # should already be the case, but just to be sure
+        df._consolidate_inplace()
+
+        # TODO: GH#33198 the setting here shoudlnt need two steps
+        arr1 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
+        arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("i8")
+        arr3 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
+        df2 = pd.concat(
+            [pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
+            axis=1,
+            ignore_index=True,
+        )
+        # should already be the case, but just to be sure
+        df2._consolidate_inplace()
+
+        self.left = df
+        self.right = df2
+
+    def time_op_different_blocks(self, op):
+        # blocks (and dtypes) are not aligned
+        op(self.left, self.right)
+
+    def time_op_same_blocks(self, op):
+        # blocks (and dtypes) are aligned
+        op(self.left, self.left)
+
 
 class Ops:
 
@@ -409,7 +466,32 @@ def setup(self, offset):
         self.rng = rng
 
     def time_apply_index(self, offset):
-        offset.apply_index(self.rng)
+        self.rng + offset
+
+
+class BinaryOpsMultiIndex:
+    params = ["sub", "add", "mul", "div"]
+    param_names = ["func"]
+
+    def setup(self, func):
+        date_range = pd.date_range("20200101 00:00", "20200102 0:00", freq="S")
+        level_0_names = [str(i) for i in range(30)]
+
+        index = pd.MultiIndex.from_product([level_0_names, date_range])
+        column_names = ["col_1", "col_2"]
+
+        self.df = pd.DataFrame(
+            np.random.rand(len(index), 2), index=index, columns=column_names
+        )
+
+        self.arg_df = pd.DataFrame(
+            np.random.randint(1, 10, (len(level_0_names), 2)),
+            index=level_0_names,
+            columns=column_names,
+        )
+
+    def time_binary_op_multiindex(self, func):
+        getattr(self.df, func)(self.arg_df, level=0)
 
 
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -34,6 +34,7 @@ def setup(self):
         self.values_all_int8 = np.ones(N, "int8")
         self.categorical = pd.Categorical(self.values, self.categories)
         self.series = pd.Series(self.categorical)
+        self.intervals = pd.interval_range(0, 1, periods=N // 10)
 
     def time_regular(self):
         pd.Categorical(self.values, self.categories)
@@ -44,6 +45,9 @@ def time_fastpath(self):
     def time_datetimes(self):
         pd.Categorical(self.datetimes)
 
+    def time_interval(self):
+        pd.Categorical(self.datetimes, categories=self.datetimes)
+
     def time_datetimes_with_nat(self):
         pd.Categorical(self.datetimes_with_nat)
 
 
@@ -16,7 +16,7 @@
 
 from .pandas_vb_common import tm
 
-method_blacklist = {
+method_blocklist = {
     "object": {
         "median",
         "prod",
@@ -403,7 +403,7 @@ class GroupByMethods:
     ]
 
     def setup(self, dtype, method, application):
-        if method in method_blacklist.get(dtype, {}):
+        if method in method_blocklist.get(dtype, {}):
             raise NotImplementedError  # skip benchmark
         ngroups = 1000
         size = ngroups * 2
@@ -660,4 +660,62 @@ def function(values):
         self.grouper.transform(function, engine="cython")
 
 
+class AggEngine:
+    def setup(self):
+        N = 10 ** 3
+        data = DataFrame(
+            {0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
+            columns=[0, 1],
+        )
+        self.grouper = data.groupby(0)
+
+    def time_series_numba(self):
+        def function(values, index):
+            total = 0
+            for i, value in enumerate(values):
+                if i % 2:
+                    total += value + 5
+                else:
+                    total += value * 2
+            return total
+
+        self.grouper[1].agg(function, engine="numba")
+
+    def time_series_cython(self):
+        def function(values):
+            total = 0
+            for i, value in enumerate(values):
+                if i % 2:
+                    total += value + 5
+                else:
+                    total += value * 2
+            return total
+
+        self.grouper[1].agg(function, engine="cython")
+
+    def time_dataframe_numba(self):
+        def function(values, index):
+            total = 0
+            for i, value in enumerate(values):
+                if i % 2:
+                    total += value + 5
+                else:
+                    total += value * 2
+            return total
+
+        self.grouper.agg(function, engine="numba")
+
+    def time_dataframe_cython(self):
+        def function(values):
+            total = 0
+            for i, value in enumerate(values):
+                if i % 2:
+                    total += value + 5
+                else:
+                    total += value * 2
+            return total
+
+        self.grouper.agg(function, engine="cython")
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip