From 613d51b11234e5c426abf1326067d0a6aafd33d1 Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Wed, 19 Aug 2020 23:13:51 +0100
Subject: [PATCH 1/2] Fix arrow tests

Fix tests for pyarrow 1.0.0

Revert "Add new core members"

This reverts commit 7ef7c12
---
 ci/deps/azure-windows-38.yaml   |  1 +
 pandas/tests/io/test_parquet.py | 19 ++++++++++++++-----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/ci/deps/azure-windows-38.yaml b/ci/deps/azure-windows-38.yaml
index 1f383164b5328..805a976c92bc0 100644
--- a/ci/deps/azure-windows-38.yaml
+++ b/ci/deps/azure-windows-38.yaml
@@ -26,6 +26,7 @@ dependencies:
   - pytables
   - python-dateutil
   - pytz
+  - s3fs>=0.4.0
   - scipy
   - xlrd
   - xlsxwriter
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 82157f3d722a9..9a3f68bfc09cb 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -557,13 +557,22 @@ def test_s3_roundtrip(self, df_compat, s3_resource, pa):
     @pytest.mark.parametrize("partition_col", [["A"], []])
     def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col):
         # GH #26388
-        # https://github.com/apache/arrow/blob/master/python/pyarrow/tests/test_parquet.py#L2716
-        # As per pyarrow partitioned columns become 'categorical' dtypes
+        expected_df = df_compat.copy()
+
+        # read_table uses the new Arrow Datasets API since pyarrow 1.0.0
+        # Previous behaviour was pyarrow partitioned columns become 'categorical' dtypes
         # and are added to back of dataframe on read
 
-        expected_df = df_compat.copy()
-        if partition_col:
-            expected_df[partition_col] = expected_df[partition_col].astype("category")
+        legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0")
+        if partition_col and legacy_read_table:
+            partition_col_type = "category"
+        else:
+            partition_col_type = "int32"
+
+        expected_df[partition_col] = expected_df[partition_col].astype(
+            partition_col_type
+        )
+
         check_round_trip(
             df_compat,
             pa,

From 2484e197363eb7a44ca525367ac9dfb6d0429b16 Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Sat, 22 Aug 2020 18:15:15 +0100
Subject: [PATCH 2/2] Remove typo

---
 pandas/tests/io/test_parquet.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 9d5a7ff98065d..4e0c16c71a6a8 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -702,7 +702,6 @@ def test_timestamp_nanoseconds(self, pa):
         # with version 2.0, pyarrow defaults to writing the nanoseconds, so
         # this should work without error
         df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1n", periods=10)})
-        df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1n", periods=10)})
         check_round_trip(df, pa, write_kwargs={"version": "2.0"})
 
     @td.skip_if_no("pyarrow", min_version="0.17")