From f3d45aa02719e5a93347b02fb6c90b64b14f58b9 Mon Sep 17 00:00:00 2001
From: Josiah Baker <josiahfbaker@gmail.com>
Date: Sun, 29 Sep 2019 15:41:05 -0400
Subject: [PATCH 01/22] DOC: fix PR09 doc string errors in Timestamp class

This fixes Parameter {} description should finish with "."
in the Timestamp class

closes #28673
---
 pandas/_libs/tslibs/timestamps.pyx | 209 ++++++++++++++++-------------
 1 file changed, 113 insertions(+), 96 deletions(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 6ca39d83afd25..5fa18cdca0aa5 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -251,11 +251,11 @@ class Timestamp(_Timestamp):
         Parameters
         ----------
         ordinal : int
-            date corresponding to a proleptic Gregorian ordinal
+            Date corresponding to a proleptic Gregorian ordinal.
         freq : str, DateOffset
-            Offset which Timestamp will have
+            Offset to apply to the Timestamp.
         tz : str, pytz.timezone, dateutil.tz.tzfile or None
-            Time zone for time which Timestamp will have.
+            Time zone for the Timestamp.
         """
         return cls(datetime.fromordinal(ordinal),
                    freq=freq, tz=tz)
@@ -271,7 +271,7 @@ class Timestamp(_Timestamp):
         Parameters
         ----------
         tz : str or timezone object, default None
-            Timezone to localize to
+            Timezone to localize to.
         """
         if isinstance(tz, str):
             tz = maybe_get_tz(tz)
@@ -289,7 +289,7 @@ class Timestamp(_Timestamp):
         Parameters
         ----------
         tz : str or timezone object, default None
-            Timezone to localize to
+            Timezone to localize to.
         """
         return cls.now(tz)
 
@@ -445,29 +445,32 @@ class Timestamp(_Timestamp):
 
         Parameters
         ----------
-        freq : a freq string indicating the rounding resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
-            A nonexistent time does not exist in a particular timezone
-            where clocks moved forward due to DST.
-
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
-
-            .. versionadded:: 0.24.0
+        freq : str
+            Frequency string indicating the ceiling resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
+            A nonexistent time does not exist in a particular timezone where
+            clocks moved forward due to DST.
+
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
+
+            .. versionadded:: 0.24.0.
 
         Returns
         -------
@@ -487,33 +490,36 @@ default 'raise'
 
         Parameters
         ----------
-        freq : a freq string indicating the flooring resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
-            A nonexistent time does not exist in a particular timezone
-            where clocks moved forward due to DST.
-
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
-
-            .. versionadded:: 0.24.0
+        freq : str
+            Frequency string indicating the ceiling resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
+            A nonexistent time does not exist in a particular timezone where
+            clocks moved forward due to DST.
+
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
+
+            .. versionadded:: 0.24.0.
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """
         return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
 
@@ -523,33 +529,36 @@ default 'raise'
 
         Parameters
         ----------
-        freq : a freq string indicating the ceiling resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
-            A nonexistent time does not exist in a particular timezone
-            where clocks moved forward due to DST.
-
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
-
-            .. versionadded:: 0.24.0
+        freq : str
+            Frequency string indicating the ceiling resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
+            A nonexistent time does not exist in a particular timezone where
+            clocks moved forward due to DST.
+
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
+
+            .. versionadded:: 0.24.0.
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """
         return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
 
@@ -606,7 +615,7 @@ default 'raise'
         Parameters
         ----------
         locale : string, default None (English locale)
-            locale determining the language in which to return the day name
+            Locale determining the language in which to return the day name.
 
         Returns
         -------
@@ -623,7 +632,7 @@ default 'raise'
         Parameters
         ----------
         locale : string, default None (English locale)
-            locale determining the language in which to return the month name
+            Locale determining the language in which to return the month name.
 
         Returns
         -------
@@ -779,35 +788,43 @@ default 'raise'
             `ambiguous` parameter dictates how ambiguous times should be
             handled.
 
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
 
         nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
 default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
+            The behavior is as follows:
 
-            .. versionadded:: 0.24.0
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
+
+            .. versionadded:: 0.24.0.
 
         errors : 'raise', 'coerce', default None
-            - 'raise' will raise a NonExistentTimeError if a timestamp is not
-               valid in the specified timezone (e.g. due to a transition from
-               or to DST time). Use ``nonexistent='raise'`` instead.
-            - 'coerce' will return NaT if the timestamp can not be converted
+            Determine how errors should be handled.
+
+            The behavior is as follows:
+
+            * 'raise' will raise a NonExistentTimeError if a timestamp is not
+              valid in the specified timezone (e.g. due to a transition from
+              or to DST time). Use ``nonexistent='raise'`` instead.
+            * 'coerce' will return NaT if the timestamp can not be converted
               into the specified timezone. Use ``nonexistent='NaT'`` instead.
 
-              .. deprecated:: 0.24.0
+            .. deprecated:: 0.24.0.
 
         Returns
         -------

From a4ee42bb02707de49a578bfbe5d781182f7dd722 Mon Sep 17 00:00:00 2001
From: Josiah Baker <josiahfbaker@gmail.com>
Date: Sun, 29 Sep 2019 19:10:56 -0400
Subject: [PATCH 02/22] fix docstrings for Timestamp, NaT classes

docstrings for Timestamp, NaT and Timedelta classes must match
for overlapping methods. verified they now pass the test_nat.py test
---
 pandas/_libs/tslibs/nattype.pyx    | 186 ++++++++++++++++-------------
 pandas/_libs/tslibs/timedeltas.pyx |   9 +-
 pandas/_libs/tslibs/timestamps.pyx |  19 ++-
 3 files changed, 116 insertions(+), 98 deletions(-)

diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index 328fc26e4fef6..c6ff48e755545 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -396,7 +396,7 @@ class NaTType(_NaT):
         Parameters
         ----------
         locale : string, default None (English locale)
-            locale determining the language in which to return the month name
+            Locale determining the language in which to return the month name.
 
         Returns
         -------
@@ -411,7 +411,7 @@ class NaTType(_NaT):
         Parameters
         ----------
         locale : string, default None (English locale)
-            locale determining the language in which to return the day name
+            Locale determining the language in which to return the day name.
 
         Returns
         -------
@@ -509,11 +509,11 @@ class NaTType(_NaT):
         Parameters
         ----------
         ordinal : int
-            date corresponding to a proleptic Gregorian ordinal
+            Date corresponding to a proleptic Gregorian ordinal.
         freq : str, DateOffset
-            Offset which Timestamp will have
+            Offset to apply to the Timestamp.
         tz : str, pytz.timezone, dateutil.tz.tzfile or None
-            Time zone for time which Timestamp will have.
+            Time zone for the Timestamp.
         """)
 
     # _nat_methods
@@ -534,7 +534,7 @@ class NaTType(_NaT):
         Parameters
         ----------
         tz : str or timezone object, default None
-            Timezone to localize to
+            Timezone to localize to.
         """)
     today = _make_nat_func('today',  # noqa:E128
         """
@@ -547,7 +547,7 @@ class NaTType(_NaT):
         Parameters
         ----------
         tz : str or timezone object, default None
-            Timezone to localize to
+            Timezone to localize to.
         """)
     round = _make_nat_func('round',  # noqa:E128
         """
@@ -555,29 +555,32 @@ class NaTType(_NaT):
 
         Parameters
         ----------
-        freq : a freq string indicating the rounding resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
+        freq : str
+            Frequency string indicating the rounding resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
 
-            .. versionadded:: 0.24.0
+            .. versionadded:: 0.24.0.
 
         Returns
         -------
@@ -585,7 +588,7 @@ default 'raise'
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """)
     floor = _make_nat_func('floor',  # noqa:E128
         """
@@ -593,33 +596,36 @@ default 'raise'
 
         Parameters
         ----------
-        freq : a freq string indicating the flooring resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
+        freq : str
+            Frequency string indicating the flooring resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
 
-            .. versionadded:: 0.24.0
+            .. versionadded:: 0.24.0.
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """)
     ceil = _make_nat_func('ceil',  # noqa:E128
         """
@@ -627,33 +633,36 @@ default 'raise'
 
         Parameters
         ----------
-        freq : a freq string indicating the ceiling resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
+        freq : str
+            Frequency string indicating the ceiling resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
 
-            .. versionadded:: 0.24.0
+            .. versionadded:: 0.24.0.
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """)
 
     tz_convert = _make_nat_func('tz_convert',  # noqa:E128
@@ -694,35 +703,42 @@ default 'raise'
             `ambiguous` parameter dictates how ambiguous times should be
             handled.
 
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
 
         nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
 default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
+            The behavior is as follows:
 
-            .. versionadded:: 0.24.0
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
 
+            .. versionadded:: 0.24.0.
         errors : 'raise', 'coerce', default None
-            - 'raise' will raise a NonExistentTimeError if a timestamp is not
-               valid in the specified timezone (e.g. due to a transition from
-               or to DST time). Use ``nonexistent='raise'`` instead.
-            - 'coerce' will return NaT if the timestamp can not be converted
+            Determine how errors should be handled.
+
+            The behavior is as follows:
+
+            * 'raise' will raise a NonExistentTimeError if a timestamp is not
+              valid in the specified timezone (e.g. due to a transition from
+              or to DST time). Use ``nonexistent='raise'`` instead.
+            * 'coerce' will return NaT if the timestamp can not be converted
               into the specified timezone. Use ``nonexistent='NaT'`` instead.
 
-              .. deprecated:: 0.24.0
+            .. deprecated:: 0.24.0.
 
         Returns
         -------
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index b232042c70eac..82fa37bd9448a 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1323,7 +1323,8 @@ class Timedelta(_Timedelta):
 
         Parameters
         ----------
-        freq : a freq string indicating the rounding resolution
+        freq : str
+            Frequency string indicating the rounding resolution.
 
         Returns
         -------
@@ -1341,7 +1342,8 @@ class Timedelta(_Timedelta):
 
         Parameters
         ----------
-        freq : a freq string indicating the flooring resolution
+        freq : str
+            Frequency string indicating the flooring resolution.
         """
         return self._round(freq, np.floor)
 
@@ -1351,7 +1353,8 @@ class Timedelta(_Timedelta):
 
         Parameters
         ----------
-        freq : a freq string indicating the ceiling resolution
+        freq : str
+            Frequency string indicating the ceiling resolution.
         """
         return self._round(freq, np.ceil)
 
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 5fa18cdca0aa5..197bd7ec6bb9b 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -446,7 +446,7 @@ class Timestamp(_Timestamp):
         Parameters
         ----------
         freq : str
-            Frequency string indicating the ceiling resolution.
+            Frequency string indicating the rounding resolution.
         ambiguous : {'raise', 'NaT', bool}, default 'raise'
             The behavior is as follows:
 
@@ -458,8 +458,8 @@ class Timestamp(_Timestamp):
             .. versionadded:: 0.24.0.
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
-            A nonexistent time does not exist in a particular timezone where
-            clocks moved forward due to DST.
+            A nonexistent time does not exist in a particular timezone
+            where clocks moved forward due to DST.
 
             * 'shift_forward' will shift the nonexistent time forward to the
               closest existing time.
@@ -478,7 +478,7 @@ timedelta}, default 'raise'
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """
         return self._round(
             freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent
@@ -491,7 +491,7 @@ timedelta}, default 'raise'
         Parameters
         ----------
         freq : str
-            Frequency string indicating the ceiling resolution.
+            Frequency string indicating the flooring resolution.
         ambiguous : {'raise', 'NaT', bool}, default 'raise'
             The behavior is as follows:
 
@@ -503,8 +503,8 @@ timedelta}, default 'raise'
             .. versionadded:: 0.24.0.
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
-            A nonexistent time does not exist in a particular timezone where
-            clocks moved forward due to DST.
+            A nonexistent time does not exist in a particular timezone
+            where clocks moved forward due to DST.
 
             * 'shift_forward' will shift the nonexistent time forward to the
               closest existing time.
@@ -542,8 +542,8 @@ timedelta}, default 'raise'
             .. versionadded:: 0.24.0.
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
-            A nonexistent time does not exist in a particular timezone where
-            clocks moved forward due to DST.
+            A nonexistent time does not exist in a particular timezone
+            where clocks moved forward due to DST.
 
             * 'shift_forward' will shift the nonexistent time forward to the
               closest existing time.
@@ -812,7 +812,6 @@ default 'raise'
               nonexistent times.
 
             .. versionadded:: 0.24.0.
-
         errors : 'raise', 'coerce', default None
             Determine how errors should be handled.
 

From 70703e1c7343e6bd02d9d155d945928fabc11f56 Mon Sep 17 00:00:00 2001
From: Josiah Baker <josiahfbaker@gmail.com>
Date: Sun, 29 Sep 2019 19:38:56 -0400
Subject: [PATCH 03/22] fix unit description in Timedelta

---
 pandas/_libs/tslibs/timedeltas.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 82fa37bd9448a..5181fff2e589f 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1222,7 +1222,7 @@ class Timedelta(_Timedelta):
         'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds', 'sec', 'second',
         'ms', 'milliseconds', 'millisecond', 'milli', 'millis', 'L',
         'us', 'microseconds', 'microsecond', 'micro', 'micros', 'U',
-        'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond', 'N'}
+        'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond', 'N'}.
     **kwargs
         Available kwargs: {days, seconds, microseconds,
         milliseconds, minutes, hours, weeks}.

From 59f29e701f60ee78bd7e458c660f868c681b1294 Mon Sep 17 00:00:00 2001
From: Josiah Baker <josiahfbaker@gmail.com>
Date: Sun, 29 Sep 2019 15:41:05 -0400
Subject: [PATCH 04/22] DOC: fix PR09 doc string errors in Timestamp class

This fixes Parameter {} description should finish with "."
in the Timestamp class

closes #28673
---
 pandas/_libs/tslibs/timestamps.pyx | 209 ++++++++++++++++-------------
 1 file changed, 113 insertions(+), 96 deletions(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 6ca39d83afd25..5fa18cdca0aa5 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -251,11 +251,11 @@ class Timestamp(_Timestamp):
         Parameters
         ----------
         ordinal : int
-            date corresponding to a proleptic Gregorian ordinal
+            Date corresponding to a proleptic Gregorian ordinal.
         freq : str, DateOffset
-            Offset which Timestamp will have
+            Offset to apply to the Timestamp.
         tz : str, pytz.timezone, dateutil.tz.tzfile or None
-            Time zone for time which Timestamp will have.
+            Time zone for the Timestamp.
         """
         return cls(datetime.fromordinal(ordinal),
                    freq=freq, tz=tz)
@@ -271,7 +271,7 @@ class Timestamp(_Timestamp):
         Parameters
         ----------
         tz : str or timezone object, default None
-            Timezone to localize to
+            Timezone to localize to.
         """
         if isinstance(tz, str):
             tz = maybe_get_tz(tz)
@@ -289,7 +289,7 @@ class Timestamp(_Timestamp):
         Parameters
         ----------
         tz : str or timezone object, default None
-            Timezone to localize to
+            Timezone to localize to.
         """
         return cls.now(tz)
 
@@ -445,29 +445,32 @@ class Timestamp(_Timestamp):
 
         Parameters
         ----------
-        freq : a freq string indicating the rounding resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
-            A nonexistent time does not exist in a particular timezone
-            where clocks moved forward due to DST.
-
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
-
-            .. versionadded:: 0.24.0
+        freq : str
+            Frequency string indicating the ceiling resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
+            A nonexistent time does not exist in a particular timezone where
+            clocks moved forward due to DST.
+
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
+
+            .. versionadded:: 0.24.0.
 
         Returns
         -------
@@ -487,33 +490,36 @@ default 'raise'
 
         Parameters
         ----------
-        freq : a freq string indicating the flooring resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
-            A nonexistent time does not exist in a particular timezone
-            where clocks moved forward due to DST.
-
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
-
-            .. versionadded:: 0.24.0
+        freq : str
+            Frequency string indicating the ceiling resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
+            A nonexistent time does not exist in a particular timezone where
+            clocks moved forward due to DST.
+
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
+
+            .. versionadded:: 0.24.0.
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """
         return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
 
@@ -523,33 +529,36 @@ default 'raise'
 
         Parameters
         ----------
-        freq : a freq string indicating the ceiling resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
-            A nonexistent time does not exist in a particular timezone
-            where clocks moved forward due to DST.
-
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
-
-            .. versionadded:: 0.24.0
+        freq : str
+            Frequency string indicating the ceiling resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
+            A nonexistent time does not exist in a particular timezone where
+            clocks moved forward due to DST.
+
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
+
+            .. versionadded:: 0.24.0.
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """
         return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
 
@@ -606,7 +615,7 @@ default 'raise'
         Parameters
         ----------
         locale : string, default None (English locale)
-            locale determining the language in which to return the day name
+            Locale determining the language in which to return the day name.
 
         Returns
         -------
@@ -623,7 +632,7 @@ default 'raise'
         Parameters
         ----------
         locale : string, default None (English locale)
-            locale determining the language in which to return the month name
+            Locale determining the language in which to return the month name.
 
         Returns
         -------
@@ -779,35 +788,43 @@ default 'raise'
             `ambiguous` parameter dictates how ambiguous times should be
             handled.
 
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
 
         nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
 default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
+            The behavior is as follows:
 
-            .. versionadded:: 0.24.0
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
+
+            .. versionadded:: 0.24.0.
 
         errors : 'raise', 'coerce', default None
-            - 'raise' will raise a NonExistentTimeError if a timestamp is not
-               valid in the specified timezone (e.g. due to a transition from
-               or to DST time). Use ``nonexistent='raise'`` instead.
-            - 'coerce' will return NaT if the timestamp can not be converted
+            Determine how errors should be handled.
+
+            The behavior is as follows:
+
+            * 'raise' will raise a NonExistentTimeError if a timestamp is not
+              valid in the specified timezone (e.g. due to a transition from
+              or to DST time). Use ``nonexistent='raise'`` instead.
+            * 'coerce' will return NaT if the timestamp can not be converted
               into the specified timezone. Use ``nonexistent='NaT'`` instead.
 
-              .. deprecated:: 0.24.0
+            .. deprecated:: 0.24.0.
 
         Returns
         -------

From b58af1ae979b6ec86dfc03e5ce6363774af02ca2 Mon Sep 17 00:00:00 2001
From: Josiah Baker <josiahfbaker@gmail.com>
Date: Sun, 29 Sep 2019 19:10:56 -0400
Subject: [PATCH 05/22] fix docstrings for Timestamp, NaT classes

docstrings for Timestamp, NaT and Timedelta classes must match
for overlapping methods. verified they now pass the test_nat.py test
---
 pandas/_libs/tslibs/nattype.pyx    | 186 ++++++++++++++++-------------
 pandas/_libs/tslibs/timedeltas.pyx |   9 +-
 pandas/_libs/tslibs/timestamps.pyx |  19 ++-
 3 files changed, 116 insertions(+), 98 deletions(-)

diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index 328fc26e4fef6..c6ff48e755545 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -396,7 +396,7 @@ class NaTType(_NaT):
         Parameters
         ----------
         locale : string, default None (English locale)
-            locale determining the language in which to return the month name
+            Locale determining the language in which to return the month name.
 
         Returns
         -------
@@ -411,7 +411,7 @@ class NaTType(_NaT):
         Parameters
         ----------
         locale : string, default None (English locale)
-            locale determining the language in which to return the day name
+            Locale determining the language in which to return the day name.
 
         Returns
         -------
@@ -509,11 +509,11 @@ class NaTType(_NaT):
         Parameters
         ----------
         ordinal : int
-            date corresponding to a proleptic Gregorian ordinal
+            Date corresponding to a proleptic Gregorian ordinal.
         freq : str, DateOffset
-            Offset which Timestamp will have
+            Offset to apply to the Timestamp.
         tz : str, pytz.timezone, dateutil.tz.tzfile or None
-            Time zone for time which Timestamp will have.
+            Time zone for the Timestamp.
         """)
 
     # _nat_methods
@@ -534,7 +534,7 @@ class NaTType(_NaT):
         Parameters
         ----------
         tz : str or timezone object, default None
-            Timezone to localize to
+            Timezone to localize to.
         """)
     today = _make_nat_func('today',  # noqa:E128
         """
@@ -547,7 +547,7 @@ class NaTType(_NaT):
         Parameters
         ----------
         tz : str or timezone object, default None
-            Timezone to localize to
+            Timezone to localize to.
         """)
     round = _make_nat_func('round',  # noqa:E128
         """
@@ -555,29 +555,32 @@ class NaTType(_NaT):
 
         Parameters
         ----------
-        freq : a freq string indicating the rounding resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
+        freq : str
+            Frequency string indicating the rounding resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
 
-            .. versionadded:: 0.24.0
+            .. versionadded:: 0.24.0.
 
         Returns
         -------
@@ -585,7 +588,7 @@ default 'raise'
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """)
     floor = _make_nat_func('floor',  # noqa:E128
         """
@@ -593,33 +596,36 @@ default 'raise'
 
         Parameters
         ----------
-        freq : a freq string indicating the flooring resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
+        freq : str
+            Frequency string indicating the flooring resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
 
-            .. versionadded:: 0.24.0
+            .. versionadded:: 0.24.0.
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """)
     ceil = _make_nat_func('ceil',  # noqa:E128
         """
@@ -627,33 +633,36 @@ default 'raise'
 
         Parameters
         ----------
-        freq : a freq string indicating the ceiling resolution
-        ambiguous : bool, 'NaT', default 'raise'
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
-default 'raise'
+        freq : str
+            Frequency string indicating the ceiling resolution.
+        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+
+            .. versionadded:: 0.24.0.
+        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
 
-            .. versionadded:: 0.24.0
+            .. versionadded:: 0.24.0.
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """)
 
     tz_convert = _make_nat_func('tz_convert',  # noqa:E128
@@ -694,35 +703,42 @@ default 'raise'
             `ambiguous` parameter dictates how ambiguous times should be
             handled.
 
-            - bool contains flags to determine if time is dst or not (note
-              that this flag is only applicable for ambiguous fall dst dates)
-            - 'NaT' will return NaT for an ambiguous time
-            - 'raise' will raise an AmbiguousTimeError for an ambiguous time
+            The behavior is as follows:
+
+            * bool contains flags to determine if time is dst or not (note
+              that this flag is only applicable for ambiguous fall dst dates).
+            * 'NaT' will return NaT for an ambiguous time.
+            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
 
         nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
 default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
-            - 'shift_forward' will shift the nonexistent time forward to the
-              closest existing time
-            - 'shift_backward' will shift the nonexistent time backward to the
-              closest existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
+            The behavior is as follows:
 
-            .. versionadded:: 0.24.0
+            * 'shift_forward' will shift the nonexistent time forward to the
+              closest existing time.
+            * 'shift_backward' will shift the nonexistent time backward to the
+              closest existing time.
+            * 'NaT' will return NaT where there are nonexistent times.
+            * timedelta objects will shift nonexistent times by the timedelta.
+            * 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times.
 
+            .. versionadded:: 0.24.0.
         errors : 'raise', 'coerce', default None
-            - 'raise' will raise a NonExistentTimeError if a timestamp is not
-               valid in the specified timezone (e.g. due to a transition from
-               or to DST time). Use ``nonexistent='raise'`` instead.
-            - 'coerce' will return NaT if the timestamp can not be converted
+            Determine how errors should be handled.
+
+            The behavior is as follows:
+
+            * 'raise' will raise a NonExistentTimeError if a timestamp is not
+              valid in the specified timezone (e.g. due to a transition from
+              or to DST time). Use ``nonexistent='raise'`` instead.
+            * 'coerce' will return NaT if the timestamp can not be converted
               into the specified timezone. Use ``nonexistent='NaT'`` instead.
 
-              .. deprecated:: 0.24.0
+            .. deprecated:: 0.24.0.
 
         Returns
         -------
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index b232042c70eac..82fa37bd9448a 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1323,7 +1323,8 @@ class Timedelta(_Timedelta):
 
         Parameters
         ----------
-        freq : a freq string indicating the rounding resolution
+        freq : str
+            Frequency string indicating the rounding resolution.
 
         Returns
         -------
@@ -1341,7 +1342,8 @@ class Timedelta(_Timedelta):
 
         Parameters
         ----------
-        freq : a freq string indicating the flooring resolution
+        freq : str
+            Frequency string indicating the flooring resolution.
         """
         return self._round(freq, np.floor)
 
@@ -1351,7 +1353,8 @@ class Timedelta(_Timedelta):
 
         Parameters
         ----------
-        freq : a freq string indicating the ceiling resolution
+        freq : str
+            Frequency string indicating the ceiling resolution.
         """
         return self._round(freq, np.ceil)
 
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 5fa18cdca0aa5..197bd7ec6bb9b 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -446,7 +446,7 @@ class Timestamp(_Timestamp):
         Parameters
         ----------
         freq : str
-            Frequency string indicating the ceiling resolution.
+            Frequency string indicating the rounding resolution.
         ambiguous : {'raise', 'NaT', bool}, default 'raise'
             The behavior is as follows:
 
@@ -458,8 +458,8 @@ class Timestamp(_Timestamp):
             .. versionadded:: 0.24.0.
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
-            A nonexistent time does not exist in a particular timezone where
-            clocks moved forward due to DST.
+            A nonexistent time does not exist in a particular timezone
+            where clocks moved forward due to DST.
 
             * 'shift_forward' will shift the nonexistent time forward to the
               closest existing time.
@@ -478,7 +478,7 @@ timedelta}, default 'raise'
 
         Raises
         ------
-        ValueError if the freq cannot be converted
+        ValueError if the freq cannot be converted.
         """
         return self._round(
             freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent
@@ -491,7 +491,7 @@ timedelta}, default 'raise'
         Parameters
         ----------
         freq : str
-            Frequency string indicating the ceiling resolution.
+            Frequency string indicating the flooring resolution.
         ambiguous : {'raise', 'NaT', bool}, default 'raise'
             The behavior is as follows:
 
@@ -503,8 +503,8 @@ timedelta}, default 'raise'
             .. versionadded:: 0.24.0.
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
-            A nonexistent time does not exist in a particular timezone where
-            clocks moved forward due to DST.
+            A nonexistent time does not exist in a particular timezone
+            where clocks moved forward due to DST.
 
             * 'shift_forward' will shift the nonexistent time forward to the
               closest existing time.
@@ -542,8 +542,8 @@ timedelta}, default 'raise'
             .. versionadded:: 0.24.0.
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
-            A nonexistent time does not exist in a particular timezone where
-            clocks moved forward due to DST.
+            A nonexistent time does not exist in a particular timezone
+            where clocks moved forward due to DST.
 
             * 'shift_forward' will shift the nonexistent time forward to the
               closest existing time.
@@ -812,7 +812,6 @@ default 'raise'
               nonexistent times.
 
             .. versionadded:: 0.24.0.
-
         errors : 'raise', 'coerce', default None
             Determine how errors should be handled.
 

From 5b99c06b827be825053a35ff345b9258a67ab494 Mon Sep 17 00:00:00 2001
From: Josiah Baker <josiahfbaker@gmail.com>
Date: Sun, 29 Sep 2019 19:38:56 -0400
Subject: [PATCH 06/22] fix unit description in Timedelta

---
 pandas/_libs/tslibs/timedeltas.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 82fa37bd9448a..5181fff2e589f 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1222,7 +1222,7 @@ class Timedelta(_Timedelta):
         'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds', 'sec', 'second',
         'ms', 'milliseconds', 'millisecond', 'milli', 'millis', 'L',
         'us', 'microseconds', 'microsecond', 'micro', 'micros', 'U',
-        'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond', 'N'}
+        'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond', 'N'}.
     **kwargs
         Available kwargs: {days, seconds, microseconds,
         milliseconds, minutes, hours, weeks}.

From e3eb09efd05b9c39228b0da68af43765ccfea43d Mon Sep 17 00:00:00 2001
From: Josiah Baker <josiahfbaker@gmail.com>
Date: Mon, 30 Sep 2019 20:53:36 -0400
Subject: [PATCH 07/22] change ambiguous param args and remove period

change ambiguous parameter to be more readable
remove the period after versionadd
---
 pandas/_libs/tslibs/nattype.pyx    | 22 +++++++++++-----------
 pandas/_libs/tslibs/timestamps.pyx | 22 +++++++++++-----------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index c6ff48e755545..75462b4b0a914 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -557,7 +557,7 @@ class NaTType(_NaT):
         ----------
         freq : str
             Frequency string indicating the rounding resolution.
-        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+        ambiguous : bool or {'raise', 'NaT'}, default 'raise'
             The behavior is as follows:
 
             * bool contains flags to determine if time is dst or not (note
@@ -565,7 +565,7 @@ class NaTType(_NaT):
             * 'NaT' will return NaT for an ambiguous time.
             * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
@@ -580,7 +580,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Returns
         -------
@@ -598,7 +598,7 @@ timedelta}, default 'raise'
         ----------
         freq : str
             Frequency string indicating the flooring resolution.
-        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+        ambiguous : bool or {'raise', 'NaT'}, default 'raise'
             The behavior is as follows:
 
             * bool contains flags to determine if time is dst or not (note
@@ -606,7 +606,7 @@ timedelta}, default 'raise'
             * 'NaT' will return NaT for an ambiguous time.
             * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
@@ -621,7 +621,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Raises
         ------
@@ -635,7 +635,7 @@ timedelta}, default 'raise'
         ----------
         freq : str
             Frequency string indicating the ceiling resolution.
-        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+        ambiguous : bool or {'raise', 'NaT'}, default 'raise'
             The behavior is as follows:
 
             * bool contains flags to determine if time is dst or not (note
@@ -643,7 +643,7 @@ timedelta}, default 'raise'
             * 'NaT' will return NaT for an ambiguous time.
             * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
@@ -658,7 +658,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Raises
         ------
@@ -726,7 +726,7 @@ default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
         errors : 'raise', 'coerce', default None
             Determine how errors should be handled.
 
@@ -738,7 +738,7 @@ default 'raise'
             * 'coerce' will return NaT if the timestamp can not be converted
               into the specified timezone. Use ``nonexistent='NaT'`` instead.
 
-            .. deprecated:: 0.24.0.
+            .. deprecated:: 0.24.0
 
         Returns
         -------
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 197bd7ec6bb9b..d30ab7b854309 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -447,7 +447,7 @@ class Timestamp(_Timestamp):
         ----------
         freq : str
             Frequency string indicating the rounding resolution.
-        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+        ambiguous : bool or {'raise', 'NaT'}, default 'raise'
             The behavior is as follows:
 
             * bool contains flags to determine if time is dst or not (note
@@ -455,7 +455,7 @@ class Timestamp(_Timestamp):
             * 'NaT' will return NaT for an ambiguous time.
             * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
@@ -470,7 +470,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Returns
         -------
@@ -492,7 +492,7 @@ timedelta}, default 'raise'
         ----------
         freq : str
             Frequency string indicating the flooring resolution.
-        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+        ambiguous : bool or {'raise', 'NaT'}, default 'raise'
             The behavior is as follows:
 
             * bool contains flags to determine if time is dst or not (note
@@ -500,7 +500,7 @@ timedelta}, default 'raise'
             * 'NaT' will return NaT for an ambiguous time.
             * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
@@ -515,7 +515,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Raises
         ------
@@ -531,7 +531,7 @@ timedelta}, default 'raise'
         ----------
         freq : str
             Frequency string indicating the ceiling resolution.
-        ambiguous : {'raise', 'NaT', bool}, default 'raise'
+        ambiguous : bool or {'raise', 'NaT'}, default 'raise'
             The behavior is as follows:
 
             * bool contains flags to determine if time is dst or not (note
@@ -539,7 +539,7 @@ timedelta}, default 'raise'
             * 'NaT' will return NaT for an ambiguous time.
             * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
         nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
@@ -554,7 +554,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Raises
         ------
@@ -811,7 +811,7 @@ default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
         errors : 'raise', 'coerce', default None
             Determine how errors should be handled.
 
@@ -823,7 +823,7 @@ default 'raise'
             * 'coerce' will return NaT if the timestamp can not be converted
               into the specified timezone. Use ``nonexistent='NaT'`` instead.
 
-            .. deprecated:: 0.24.0.
+            .. deprecated:: 0.24.0
 
         Returns
         -------

From 776451e8414bfd0c6f554f6f9b389a7f1aeb9b19 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 30 Sep 2019 21:00:18 -0700
Subject: [PATCH 08/22] CLN: Assorted typings (#28604)

---
 pandas/core/algorithms.py   | 24 ++++++++++++------------
 pandas/core/util/hashing.py | 21 ++++++++++++++-------
 2 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 6e73e1636a75b..002bbcc63d04f 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -176,7 +176,6 @@ def _reconstruct_data(values, dtype, original):
     -------
     Index for extension types, otherwise ndarray casted to dtype
     """
-    from pandas import Index
 
     if is_extension_array_dtype(dtype):
         values = dtype.construct_array_type()._from_sequence(values)
@@ -184,7 +183,7 @@ def _reconstruct_data(values, dtype, original):
         values = values.astype(dtype)
 
         # we only support object dtypes bool Index
-        if isinstance(original, Index):
+        if isinstance(original, ABCIndexClass):
             values = values.astype(object)
     elif dtype is not None:
         values = values.astype(dtype)
@@ -833,7 +832,7 @@ def duplicated(values, keep="first"):
     return f(values, keep=keep)
 
 
-def mode(values, dropna=True):
+def mode(values, dropna: bool = True):
     """
     Returns the mode(s) of an array.
 
@@ -1888,7 +1887,7 @@ def searchsorted(arr, value, side="left", sorter=None):
 }
 
 
-def diff(arr, n, axis=0):
+def diff(arr, n: int, axis: int = 0):
     """
     difference of n between self,
     analogous to s-s.shift(n)
@@ -1904,7 +1903,6 @@ def diff(arr, n, axis=0):
     Returns
     -------
     shifted
-
     """
 
     n = int(n)
@@ -1935,13 +1933,15 @@ def diff(arr, n, axis=0):
         f = _diff_special[arr.dtype.name]
         f(arr, out_arr, n, axis)
     else:
-        res_indexer = [slice(None)] * arr.ndim
-        res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
-        res_indexer = tuple(res_indexer)
-
-        lag_indexer = [slice(None)] * arr.ndim
-        lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
-        lag_indexer = tuple(lag_indexer)
+        # To keep mypy happy, _res_indexer is a list while res_indexer is
+        #  a tuple, ditto for lag_indexer.
+        _res_indexer = [slice(None)] * arr.ndim
+        _res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
+        res_indexer = tuple(_res_indexer)
+
+        _lag_indexer = [slice(None)] * arr.ndim
+        _lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
+        lag_indexer = tuple(_lag_indexer)
 
         # need to make sure that we account for na for datelike/timedelta
         # we don't actually want to subtract these i8 numbers
diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
index bcdbf0855cbb4..4bcc53606aeca 100644
--- a/pandas/core/util/hashing.py
+++ b/pandas/core/util/hashing.py
@@ -26,7 +26,7 @@
 _default_hash_key = "0123456789123456"
 
 
-def _combine_hash_arrays(arrays, num_items):
+def _combine_hash_arrays(arrays, num_items: int):
     """
     Parameters
     ----------
@@ -55,7 +55,11 @@ def _combine_hash_arrays(arrays, num_items):
 
 
 def hash_pandas_object(
-    obj, index=True, encoding="utf8", hash_key=None, categorize=True
+    obj,
+    index: bool = True,
+    encoding: str = "utf8",
+    hash_key=None,
+    categorize: bool = True,
 ):
     """
     Return a data hash of the Index/Series/DataFrame.
@@ -125,7 +129,10 @@ def hash_pandas_object(
                 for _ in [None]
             )
             num_items += 1
-            hashes = itertools.chain(hashes, index_hash_generator)
+
+            # keep `hashes` specifically a generator to keep mypy happy
+            _hashes = itertools.chain(hashes, index_hash_generator)
+            hashes = (x for x in _hashes)
         h = _combine_hash_arrays(hashes, num_items)
 
         h = Series(h, index=obj.index, dtype="uint64", copy=False)
@@ -179,7 +186,7 @@ def hash_tuples(vals, encoding="utf8", hash_key=None):
     return h
 
 
-def hash_tuple(val, encoding="utf8", hash_key=None):
+def hash_tuple(val, encoding: str = "utf8", hash_key=None):
     """
     Hash a single tuple efficiently
 
@@ -201,7 +208,7 @@ def hash_tuple(val, encoding="utf8", hash_key=None):
     return h
 
 
-def _hash_categorical(c, encoding, hash_key):
+def _hash_categorical(c, encoding: str, hash_key: str):
     """
     Hash a Categorical by hashing its categories, and then mapping the codes
     to the hashes
@@ -239,7 +246,7 @@ def _hash_categorical(c, encoding, hash_key):
     return result
 
 
-def hash_array(vals, encoding="utf8", hash_key=None, categorize=True):
+def hash_array(vals, encoding: str = "utf8", hash_key=None, categorize: bool = True):
     """
     Given a 1d array, return an array of deterministic integers.
 
@@ -317,7 +324,7 @@ def hash_array(vals, encoding="utf8", hash_key=None, categorize=True):
     return vals
 
 
-def _hash_scalar(val, encoding="utf8", hash_key=None):
+def _hash_scalar(val, encoding: str = "utf8", hash_key=None):
     """
     Hash scalar value
 

From 962140f39fea80f33ef2d22dd8566cd6045eb6cb Mon Sep 17 00:00:00 2001
From: Josiah Baker <josiahfbaker@gmail.com>
Date: Tue, 1 Oct 2019 00:40:56 -0400
Subject: [PATCH 09/22] remove periods after versionadd and fix deprecated
 directive

missed a few periods from the last commit
also incorrectly moved a sphinx directive earlier
---
 pandas/_libs/tslibs/nattype.pyx    | 8 ++++----
 pandas/_libs/tslibs/timestamps.pyx | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index c00fac48d51f0..b17c6079d81fd 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -580,7 +580,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Returns
         -------
@@ -621,7 +621,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Raises
         ------
@@ -658,7 +658,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Raises
         ------
@@ -738,7 +738,7 @@ default 'raise'
             * 'coerce' will return NaT if the timestamp can not be converted
               into the specified timezone. Use ``nonexistent='NaT'`` instead.
 
-            .. deprecated:: 0.24.0
+              .. deprecated:: 0.24.0
 
         Returns
         -------
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 8bb8c543159e1..261fd7d8068aa 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -470,7 +470,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Returns
         -------
@@ -515,7 +515,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Raises
         ------
@@ -554,7 +554,7 @@ timedelta}, default 'raise'
             * 'raise' will raise an NonExistentTimeError if there are
               nonexistent times.
 
-            .. versionadded:: 0.24.0.
+            .. versionadded:: 0.24.0
 
         Raises
         ------
@@ -823,7 +823,7 @@ default 'raise'
             * 'coerce' will return NaT if the timestamp can not be converted
               into the specified timezone. Use ``nonexistent='NaT'`` instead.
 
-            .. deprecated:: 0.24.0
+              .. deprecated:: 0.24.0
 
         Returns
         -------

From 225d5e2bb5ac2443bf34af1f57d1ad42982a4680 Mon Sep 17 00:00:00 2001
From: Jack Bicknell <jackbicknell@gmail.com>
Date: Tue, 1 Oct 2019 05:08:12 +0100
Subject: [PATCH 10/22] DOC: Fixed PR08 docstring errors in pandas.tseries
 (#28571)

---
 pandas/tseries/offsets.py | 116 +++++++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 46 deletions(-)

diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 82cbfa831bf32..4ebb4f353a8fd 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -1007,9 +1007,9 @@ class CustomBusinessDay(_CustomMixin, BusinessDay):
     normalize : bool, default False
         Normalize start/end dates to midnight before generating date range
     weekmask : str, Default 'Mon Tue Wed Thu Fri'
-        weekmask of valid business days, passed to ``numpy.busdaycalendar``
+        Weekmask of valid business days, passed to ``numpy.busdaycalendar``
     holidays : list
-        list/array of dates to exclude from the set of valid business days,
+        List/array of dates to exclude from the set of valid business days,
         passed to ``numpy.busdaycalendar``
     calendar : pd.HolidayCalendar or np.busdaycalendar
     offset : timedelta, default timedelta(0)
@@ -1671,16 +1671,19 @@ class WeekOfMonth(_WeekOfMonthMixin, DateOffset):
     Parameters
     ----------
     n : int
-    week : {0, 1, 2, 3, ...}, default 0
-        0 is 1st week of month, 1 2nd week, etc.
-    weekday : {0, 1, ..., 6}, default 0
-        0: Mondays
-        1: Tuesdays
-        2: Wednesdays
-        3: Thursdays
-        4: Fridays
-        5: Saturdays
-        6: Sundays
+    week : int {0, 1, 2, 3, ...}, default 0
+        A specific integer for the week of the month.
+        e.g. 0 is 1st week of month, 1 is the 2nd week, etc.
+    weekday : int {0, 1, ..., 6}, default 0
+        A specific integer for the day of the week.
+
+        - 0 is Monday
+        - 1 is Tuesday
+        - 2 is Wednesday
+        - 3 is Thursday
+        - 4 is Friday
+        - 5 is Saturday
+        - 6 is Sunday
     """
 
     _prefix = "WOM"
@@ -1747,14 +1750,16 @@ class LastWeekOfMonth(_WeekOfMonthMixin, DateOffset):
     Parameters
     ----------
     n : int, default 1
-    weekday : {0, 1, ..., 6}, default 0
-        0: Mondays
-        1: Tuesdays
-        2: Wednesdays
-        3: Thursdays
-        4: Fridays
-        5: Saturdays
-        6: Sundays
+    weekday : int {0, 1, ..., 6}, default 0
+        A specific integer for the day of the week.
+
+        - 0 is Monday
+        - 1 is Tuesday
+        - 2 is Wednesday
+        - 3 is Thursday
+        - 4 is Friday
+        - 5 is Saturday
+        - 6 is Sunday
     """
 
     _prefix = "LWOM"
@@ -2055,6 +2060,7 @@ class FY5253(DateOffset):
     http://en.wikipedia.org/wiki/4-4-5_calendar
 
     The year may either:
+
     - end on the last X day of the Y month.
     - end on the last X day closest to the last day of the Y month.
 
@@ -2064,17 +2070,25 @@ class FY5253(DateOffset):
     Parameters
     ----------
     n : int
-    weekday : {0, 1, ..., 6}
-        0: Mondays
-        1: Tuesdays
-        2: Wednesdays
-        3: Thursdays
-        4: Fridays
-        5: Saturdays
-        6: Sundays
-    startingMonth : The month in which fiscal years end. {1, 2, ... 12}
-    variation : str
-        {"nearest", "last"} for "LastOfMonth" or "NearestEndMonth"
+    weekday : int {0, 1, ..., 6}, default 0
+        A specific integer for the day of the week.
+
+        - 0 is Monday
+        - 1 is Tuesday
+        - 2 is Wednesday
+        - 3 is Thursday
+        - 4 is Friday
+        - 5 is Saturday
+        - 6 is Sunday
+
+    startingMonth : int {1, 2, ... 12}, default 1
+        The month in which the fiscal year ends.
+
+    variation : str, default "nearest"
+        Method of employing 4-4-5 calendar. There are two options:
+
+        - "nearest" means year end is **weekday** closest to last day of month in year.
+        - "last" means year end is final **weekday** of the final month in fiscal year.
     """
 
     _prefix = "RE"
@@ -2258,6 +2272,7 @@ class FY5253Quarter(DateOffset):
     http://en.wikipedia.org/wiki/4-4-5_calendar
 
     The year may either:
+
     - end on the last X day of the Y month.
     - end on the last X day closest to the last day of the Y month.
 
@@ -2271,19 +2286,28 @@ class FY5253Quarter(DateOffset):
     Parameters
     ----------
     n : int
-    weekday : {0, 1, ..., 6}
-        0: Mondays
-        1: Tuesdays
-        2: Wednesdays
-        3: Thursdays
-        4: Fridays
-        5: Saturdays
-        6: Sundays
-    startingMonth : The month in which fiscal years end. {1, 2, ... 12}
-    qtr_with_extra_week : The quarter number that has the leap
-        or 14 week when needed. {1,2,3,4}
-    variation : str
-        {"nearest", "last"} for "LastOfMonth" or "NearestEndMonth"
+    weekday : int {0, 1, ..., 6}, default 0
+        A specific integer for the day of the week.
+
+        - 0 is Monday
+        - 1 is Tuesday
+        - 2 is Wednesday
+        - 3 is Thursday
+        - 4 is Friday
+        - 5 is Saturday
+        - 6 is Sunday
+
+    startingMonth : int {1, 2, ..., 12}, default 1
+        The month in which fiscal years end.
+
+    qtr_with_extra_week : int {1, 2, 3, 4}, default 1
+        The quarter number that has the leap or 14 week when needed.
+
+    variation : str, default "nearest"
+        Method of employing 4-4-5 calendar. There are two options:
+
+        - "nearest" means year end is **weekday** closest to last day of month in year.
+        - "last" means year end is final **weekday** of the final month in fiscal year.
     """
 
     _prefix = "REQ"
@@ -2707,8 +2731,8 @@ def generate_range(start=None, end=None, periods=None, offset=BDay()):
 
     Parameters
     ----------
-    start : datetime (default None)
-    end : datetime (default None)
+    start : datetime, (default None)
+    end : datetime, (default None)
     periods : int, (default None)
     offset : DateOffset, (default BDay())
 

From 4661d77bbf3ae97d21be8a2a5bdfa4f900a20e2f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Tue, 1 Oct 2019 06:59:03 -0500
Subject: [PATCH 11/22] DOC: Add scaling to large datasets section (#28577)

* DOC: Add scaling to large datasets section

Closes https://github.com/pandas-dev/pandas/issues/28315
---
 doc/.gitignore                          |   4 +
 doc/source/index.rst.template           |   1 +
 doc/source/user_guide/index.rst         |   1 +
 doc/source/user_guide/scale.rst         | 373 ++++++++++++++++++++++++
 doc/source/whatsnew/v1.0.0.rst          |   7 +
 environment.yml                         |   8 +-
 pandas/util/testing.py                  |  81 +++++
 requirements-dev.txt                    |   7 +-
 scripts/generate_pip_deps_from_conda.py |   2 +-
 9 files changed, 481 insertions(+), 3 deletions(-)
 create mode 100644 doc/.gitignore
 create mode 100644 doc/source/user_guide/scale.rst

diff --git a/doc/.gitignore b/doc/.gitignore
new file mode 100644
index 0000000000000..e23892d6100e8
--- /dev/null
+++ b/doc/.gitignore
@@ -0,0 +1,4 @@
+data/
+timeseries.csv
+timeseries.parquet
+timeseries_wide.parquet
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
index f5669626aa2b3..6ff42eee9dad2 100644
--- a/doc/source/index.rst.template
+++ b/doc/source/index.rst.template
@@ -83,6 +83,7 @@ See the :ref:`overview` for more detail about what's in the library.
   * :doc:`user_guide/style`
   * :doc:`user_guide/options`
   * :doc:`user_guide/enhancingperf`
+  * :doc:`user_guide/scale`
   * :doc:`user_guide/sparse`
   * :doc:`user_guide/gotchas`
   * :doc:`user_guide/cookbook`
diff --git a/doc/source/user_guide/index.rst b/doc/source/user_guide/index.rst
index 05df83decbd7e..b86961a71433b 100644
--- a/doc/source/user_guide/index.rst
+++ b/doc/source/user_guide/index.rst
@@ -38,6 +38,7 @@ Further information on any specific method can be obtained in the
     style
     options
     enhancingperf
+    scale
     sparse
     gotchas
     cookbook
diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
new file mode 100644
index 0000000000000..7b590a3a1fcc8
--- /dev/null
+++ b/doc/source/user_guide/scale.rst
@@ -0,0 +1,373 @@
+.. _scale:
+
+*************************
+Scaling to large datasets
+*************************
+
+Pandas provides data structures for in-memory analytics, which makes using pandas
+to analyze datasets that are larger than memory datasets somewhat tricky. Even datasets
+that are a sizable fraction of memory become unwieldy, as some pandas operations need
+to make intermediate copies.
+
+This document provides a few recommendations for scaling your analysis to larger datasets.
+It's a complement to :ref:`enhancingperf`, which focuses on speeding up analysis
+for datasets that fit in memory.
+
+But first, it's worth considering *not using pandas*. Pandas isn't the right
+tool for all situations. If you're working with very large datasets and a tool
+like PostgreSQL fits your needs, then you should probably be using that.
+Assuming you want or need the expressiveness and power of pandas, let's carry on.
+
+.. ipython:: python
+
+   import pandas as pd
+   import numpy as np
+
+.. ipython:: python
+   :suppress:
+
+   from pandas.util.testing import _make_timeseries
+
+   # Make a random in-memory dataset
+   ts = _make_timeseries(freq="30S", seed=0)
+   ts.to_csv("timeseries.csv")
+   ts.to_parquet("timeseries.parquet")
+
+
+Load less data
+--------------
+
+.. ipython:: python
+   :suppress:
+
+   # make a similar dataset with many columns
+   timeseries = [
+       _make_timeseries(freq="1T", seed=i).rename(columns=lambda x: f"{x}_{i}")
+       for i in range(10)
+   ]
+   ts_wide = pd.concat(timeseries, axis=1)
+   ts_wide.to_parquet("timeseries_wide.parquet")
+
+Suppose our raw dataset on disk has many columns::
+
+                        id_0    name_0       x_0       y_0  id_1   name_1       x_1  ...  name_8       x_8       y_8  id_9   name_9       x_9       y_9
+   timestamp                                                                         ...
+   2000-01-01 00:00:00  1015   Michael -0.399453  0.095427   994    Frank -0.176842  ...     Dan -0.315310  0.713892  1025   Victor -0.135779  0.346801
+   2000-01-01 00:01:00   969  Patricia  0.650773 -0.874275  1003    Laura  0.459153  ...  Ursula  0.913244 -0.630308  1047    Wendy -0.886285  0.035852
+   2000-01-01 00:02:00  1016    Victor -0.721465 -0.584710  1046  Michael  0.524994  ...     Ray -0.656593  0.692568  1064   Yvonne  0.070426  0.432047
+   2000-01-01 00:03:00   939     Alice -0.746004 -0.908008   996   Ingrid -0.414523  ...   Jerry -0.958994  0.608210   978    Wendy  0.855949 -0.648988
+   2000-01-01 00:04:00  1017       Dan  0.919451 -0.803504  1048    Jerry -0.569235  ...   Frank -0.577022 -0.409088   994      Bob -0.270132  0.335176
+   ...                   ...       ...       ...       ...   ...      ...       ...  ...     ...       ...       ...   ...      ...       ...       ...
+   2000-12-30 23:56:00   999       Tim  0.162578  0.512817   973    Kevin -0.403352  ...     Tim -0.380415  0.008097  1041  Charlie  0.191477 -0.599519
+   2000-12-30 23:57:00   970     Laura -0.433586 -0.600289   958   Oliver -0.966577  ...   Zelda  0.971274  0.402032  1038   Ursula  0.574016 -0.930992
+   2000-12-30 23:58:00  1065     Edith  0.232211 -0.454540   971      Tim  0.158484  ...   Alice -0.222079 -0.919274  1022      Dan  0.031345 -0.657755
+   2000-12-30 23:59:00  1019    Ingrid  0.322208 -0.615974   981   Hannah  0.607517  ...   Sarah -0.424440 -0.117274   990   George -0.375530  0.563312
+   2000-12-31 00:00:00   937    Ursula -0.906523  0.943178  1018    Alice -0.564513  ...   Jerry  0.236837  0.807650   985   Oliver  0.777642  0.783392
+
+   [525601 rows x 40 columns]
+
+
+To load the columns we want, we have two options.
+Option 1 loads in all the data and then filters to what we need.
+
+.. ipython:: python
+
+   columns = ['id_0', 'name_0', 'x_0', 'y_0']
+
+   pd.read_parquet("timeseries_wide.parquet")[columns]
+
+Option 2 only loads the columns we request.
+
+.. ipython:: python
+
+   pd.read_parquet("timeseries_wide.parquet", columns=columns)
+
+If we were to measure the memory usage of the two calls, we'd see that specifying
+``columns`` uses about 1/10th the memory in this case.
+
+With :func:`pandas.read_csv`, you can specify ``usecols`` to limit the columns
+read into memory. Not all file formats that can be read by pandas provide an option
+to read a subset of columns.
+
+Use efficient datatypes
+-----------------------
+
+The default pandas data types are not the most memory efficient. This is
+especially true for high-cardinality text data (columns with relatively few
+unique values). By using more efficient data types you can store larger datasets
+in memory.
+
+.. ipython:: python
+
+   ts = pd.read_parquet("timeseries.parquet")
+   ts
+
+Now, let's inspect the data types and memory usage to see where we should focus our
+attention.
+
+.. ipython:: python
+
+   ts.dtypes
+
+.. ipython:: python
+
+   ts.memory_usage(deep=True)  # memory usage in bytes
+
+
+The ``name`` column is taking up much more memory than any other. It has just a
+few unique values, so it's a good candidate for converting to a
+:class:`Categorical`. With a Categorical, we store each unique name once and use
+space-efficient integers to know which specific name is used in each row.
+
+
+.. ipython:: python
+
+   ts2 = ts.copy()
+   ts2['name'] = ts2['name'].astype('category')
+   ts2.memory_usage(deep=True)
+
+We can go a bit further and downcast the numeric columns to their smallest types
+using :func:`pandas.to_numeric`.
+
+.. ipython:: python
+
+   ts2['id'] = pd.to_numeric(ts2['id'], downcast='unsigned')
+   ts2[['x', 'y']] = ts2[['x', 'y']].apply(pd.to_numeric, downcast='float')
+   ts2.dtypes
+
+.. ipython:: python
+
+   ts2.memory_usage(deep=True)
+
+.. ipython:: python
+
+   reduction = (ts2.memory_usage(deep=True).sum()
+                / ts.memory_usage(deep=True).sum())
+   print(f"{reduction:0.2f}")
+
+In all, we've reduced the in-memory footprint of this dataset to 1/5 of its
+original size.
+
+See :ref:`categorical` for more on ``Categorical`` and :ref:`basics.dtypes`
+for an overview of all of pandas' dtypes.
+
+Use chunking
+------------
+
+Some workloads can be achieved with chunking: splitting a large problem like "convert this
+directory of CSVs to parquet" into a bunch of small problems ("convert this individual CSV
+file into a Parquet file. Now repeat that for each file in this directory."). As long as each chunk
+fits in memory, you can work with datasets that are much larger than memory.
+
+.. note::
+
+   Chunking works well when the operation you're performing requires zero or minimal
+   coordination between chunks. For more complicated workflows, you're better off
+   :ref:`using another library <scale.other_libraries>`.
+
+Suppose we have an even larger "logical dataset" on disk that's a directory of parquet
+files. Each file in the directory represents a different year of the entire dataset.
+
+.. ipython:: python
+   :suppress:
+
+   import pathlib
+
+   N = 12
+   starts = [f'20{i:>02d}-01-01' for i in range(N)]
+   ends = [f'20{i:>02d}-12-13' for i in range(N)]
+
+   pathlib.Path("data/timeseries").mkdir(exist_ok=True)
+
+   for i, (start, end) in enumerate(zip(starts, ends)):
+       ts = _make_timeseries(start=start, end=end, freq='1T', seed=i)
+       ts.to_parquet(f"data/timeseries/ts-{i:0>2d}.parquet")
+
+
+::
+
+   data
+   └── timeseries
+       ├── ts-00.parquet
+       ├── ts-01.parquet
+       ├── ts-02.parquet
+       ├── ts-03.parquet
+       ├── ts-04.parquet
+       ├── ts-05.parquet
+       ├── ts-06.parquet
+       ├── ts-07.parquet
+       ├── ts-08.parquet
+       ├── ts-09.parquet
+       ├── ts-10.parquet
+       └── ts-11.parquet
+
+Now we'll implement an out-of-core ``value_counts``. The peak memory usage of this
+workflow is the single largest chunk, plus a small series storing the unique value
+counts up to this point. As long as each individual file fits in memory, this will
+work for arbitrary-sized datasets.
+
+.. ipython:: python
+
+   %%time
+   files = pathlib.Path("data/timeseries/").glob("ts*.parquet")
+   counts = pd.Series(dtype=int)
+   for path in files:
+       # Only one dataframe is in memory at a time...
+       df = pd.read_parquet(path)
+       # ... plus a small Series `counts`, which is updated.
+       counts = counts.add(df['name'].value_counts(), fill_value=0)
+   counts.astype(int)
+
+Some readers, like :meth:`pandas.read_csv`, offer parameters to control the
+``chunksize`` when reading a single file.
+
+Manually chunking is an OK option for workflows that don't
+require too sophisticated of operations. Some operations, like ``groupby``, are
+much harder to do chunkwise. In these cases, you may be better switching to a
+different library that implements these out-of-core algorithms for you.
+
+.. _scale.other_libraries:
+
+Use other libraries
+-------------------
+
+Pandas is just one library offering a DataFrame API. Because of its popularity,
+pandas' API has become something of a standard that other libraries implement.
+The pandas documentation maintains a list of libraries implementing a DataFrame API
+in :ref:`our ecosystem page <ecosystem.out-of-core>`.
+
+For example, `Dask`_, a parallel computing library, has `dask.dataframe`_, a
+pandas-like API for working with larger than memory datasets in parallel. Dask
+can use multiple threads or processes on a single machine, or a cluster of
+machines to process data in parallel.
+
+
+We'll import ``dask.dataframe`` and notice that the API feels similar to pandas.
+We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in.
+
+.. ipython:: python
+
+   import dask.dataframe as dd
+
+   ddf = dd.read_parquet("data/timeseries/ts*.parquet", engine="pyarrow")
+   ddf
+
+Inspecting the ``ddf`` object, we see a few things
+
+* There are familiar attributes like ``.columns`` and ``.dtypes``
+* There are familiar methods like ``.groupby``, ``.sum``, etc.
+* There are new attributes like ``.npartitions`` and ``.divisions``
+
+The partitions and divisions are how Dask parallizes computation. A **Dask**
+DataFrame is made up of many **Pandas** DataFrames. A single method call on a
+Dask DataFrame ends up making many pandas method calls, and Dask knows how to
+coordinate everything to get the result.
+
+.. ipython:: python
+
+   ddf.columns
+   ddf.dtypes
+   ddf.npartitions
+
+One major difference: the ``dask.dataframe`` API is *lazy*. If you look at the
+repr above, you'll notice that the values aren't actually printed out; just the
+column names and dtypes. That's because Dask hasn't actually read the data yet.
+Rather than executing immediately, doing operations build up a **task graph**.
+
+.. ipython:: python
+
+   ddf
+   ddf['name']
+   ddf['name'].value_counts()
+
+Each of these calls is instant because the result isn't being computed yet.
+We're just building up a list of computation to do when someone needs the
+result. Dask knows that the return type of a ``pandas.Series.value_counts``
+is a pandas Series with a certain dtype and a certain name. So the Dask version
+returns a Dask Series with the same dtype and the same name.
+
+To get the actual result you can call ``.compute()``.
+
+.. ipython:: python
+
+   %time ddf['name'].value_counts().compute()
+
+At that point, you get back the same thing you'd get with pandas, in this case
+a concrete pandas Series with the count of each ``name``.
+
+Calling ``.compute`` causes the full task graph to be executed. This includes
+reading the data, selecting the columns, and doing the ``value_counts``. The
+execution is done *in parallel* where possible, and Dask tries to keep the
+overall memory footprint small. You can work with datasets that are much larger
+than memory, as long as each partition (a regular pandas DataFrame) fits in memory.
+
+By default, ``dask.dataframe`` operations use a threadpool to do operations in
+parallel. We can also connect to a cluster to distribute the work on many
+machines. In this case we'll connect to a local "cluster" made up of several
+processes on this single machine.
+
+.. code-block:: python
+
+   >>> from dask.distributed import Client, LocalCluster
+
+   >>> cluster = LocalCluster()
+   >>> client = Client(cluster)
+   >>> client
+   <Client: 'tcp://127.0.0.1:53349' processes=4 threads=8, memory=17.18 GB>
+
+Once this ``client`` is created, all of Dask's computation will take place on
+the cluster (which is just processes in this case).
+
+Dask implements the most used parts of the pandas API. For example, we can do
+a familiar groupby aggregation.
+
+.. ipython:: python
+
+   %time ddf.groupby('name')[['x', 'y']].mean().compute().head()
+
+The grouping and aggregation is done out-of-core and in parallel.
+
+When Dask knows the ``divisions`` of a dataset, certain optimizations are
+possible. When reading parquet datasets written by dask, the divisions will be
+known automatically. In this case, since we created the parquet files manually,
+we need to supply the divisions manually.
+
+.. ipython:: python
+
+   N = 12
+   starts = [f'20{i:>02d}-01-01' for i in range(N)]
+   ends = [f'20{i:>02d}-12-13' for i in range(N)]
+
+   divisions = tuple(pd.to_datetime(starts)) + (pd.Timestamp(ends[-1]),)
+   ddf.divisions = divisions
+   ddf
+
+Now we can do things like fast random access with ``.loc``.
+
+.. ipython:: python
+
+   ddf.loc['2002-01-01 12:01':'2002-01-01 12:05'].compute()
+
+Dask knows to just look in the 3rd partition for selecting values in `2002`. It
+doesn't need to look at any other data.
+
+Many workflows involve a large amount of data and processing it in a way that
+reduces the size to something that fits in memory. In this case, we'll resample
+to daily frequency and take the mean. Once we've taken the mean, we know the
+results will fit in memory, so we can safely call ``compute`` without running
+out of memory. At that point it's just a regular pandas object.
+
+.. ipython:: python
+
+   @savefig dask_resample.png
+   ddf[['x', 'y']].resample("1D").mean().cumsum().compute().plot()
+
+These Dask examples have all be done using multiple processes on a single
+machine. Dask can be `deployed on a cluster
+<https://docs.dask.org/en/latest/setup.html>`_ to scale up to even larger
+datasets.
+
+You see more dask examples at https://examples.dask.org.
+
+.. _Dask: https://dask.org
+.. _dask.dataframe: https://docs.dask.org/en/latest/dataframe.html
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index eb4b72d01d59a..b075a9d8b5e8b 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -111,6 +111,13 @@ Other API changes
 - :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`)
 -
 
+.. _whatsnew_1000.api.documentation:
+
+Documentation Improvements
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Added new section on :ref:`scale` (:issue:`28315`).
+
 .. _whatsnew_1000.deprecations:
 
 Deprecations
diff --git a/environment.yml b/environment.yml
index 7629fa52e7829..7c3ec9064cba3 100644
--- a/environment.yml
+++ b/environment.yml
@@ -35,6 +35,12 @@ dependencies:
   - nbconvert>=5.4.1
   - nbsphinx
   - pandoc
+  # Dask and its dependencies
+  - dask-core
+  - toolz>=0.7.3
+  - fsspec>=0.5.1
+  - partd>=0.3.10
+  - cloudpickle>=0.2.1
 
   # web (jinja2 is also needed, but it's also an optional pandas dependency)
   - markdown
@@ -76,7 +82,7 @@ dependencies:
   - html5lib  # pandas.read_html
   - lxml  # pandas.read_html
   - openpyxl  # pandas.read_excel, DataFrame.to_excel, pandas.ExcelWriter, pandas.ExcelFile
-  - pyarrow>=0.9.0  # pandas.read_paquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
+  - pyarrow>=0.13.1  # pandas.read_paquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
   - pyqt>=5.9.2  # pandas.read_clipboard
   - pytables>=3.4.2  # pandas.read_hdf, DataFrame.to_hdf
   - python-snappy  # required by pyarrow
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index aee58f808d9e6..1c0a8dbc19ccd 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -1651,6 +1651,87 @@ def makeMultiIndex(k=10, names=None, **kwargs):
     return MultiIndex.from_product((("foo", "bar"), (1, 2)), names=names, **kwargs)
 
 
+_names = [
+    "Alice",
+    "Bob",
+    "Charlie",
+    "Dan",
+    "Edith",
+    "Frank",
+    "George",
+    "Hannah",
+    "Ingrid",
+    "Jerry",
+    "Kevin",
+    "Laura",
+    "Michael",
+    "Norbert",
+    "Oliver",
+    "Patricia",
+    "Quinn",
+    "Ray",
+    "Sarah",
+    "Tim",
+    "Ursula",
+    "Victor",
+    "Wendy",
+    "Xavier",
+    "Yvonne",
+    "Zelda",
+]
+
+
+def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None):
+    """
+    Make a DataFrame with a DatetimeIndex
+
+    Parameters
+    ----------
+    start : str or Timestamp, default "2000-01-01"
+        The start of the index. Passed to date_range with `freq`.
+    end : str or Timestamp, default "2000-12-31"
+        The end of the index. Passed to date_range with `freq`.
+    freq : str or Freq
+        The frequency to use for the DatetimeIndex
+    seed : int, optional
+        The random state seed.
+
+        * name : object dtype with string names
+        * id : int dtype with
+        * x, y : float dtype
+
+    Examples
+    --------
+    >>> _make_timeseries()
+                  id    name         x         y
+    timestamp
+    2000-01-01   982   Frank  0.031261  0.986727
+    2000-01-02  1025   Edith -0.086358 -0.032920
+    2000-01-03   982   Edith  0.473177  0.298654
+    2000-01-04  1009   Sarah  0.534344 -0.750377
+    2000-01-05   963   Zelda -0.271573  0.054424
+    ...          ...     ...       ...       ...
+    2000-12-27   980  Ingrid -0.132333 -0.422195
+    2000-12-28   972   Frank -0.376007 -0.298687
+    2000-12-29  1009  Ursula -0.865047 -0.503133
+    2000-12-30  1000  Hannah -0.063757 -0.507336
+    2000-12-31   972     Tim -0.869120  0.531685
+    """
+    index = pd.date_range(start=start, end=end, freq=freq, name="timestamp")
+    n = len(index)
+    state = np.random.RandomState(seed)
+    columns = {
+        "name": state.choice(_names, size=n),
+        "id": state.poisson(1000, size=n),
+        "x": state.rand(n) * 2 - 1,
+        "y": state.rand(n) * 2 - 1,
+    }
+    df = pd.DataFrame(columns, index=index, columns=sorted(columns))
+    if df.index[-1] == end:
+        df = df.iloc[:-1]
+    return df
+
+
 def all_index_generator(k=10):
     """Generator which can be iterated over to get instances of all the various
     index classes.
diff --git a/requirements-dev.txt b/requirements-dev.txt
index fd8e6378240b4..698e4f3aea094 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -17,6 +17,11 @@ numpydoc>=0.9.0
 nbconvert>=5.4.1
 nbsphinx
 pandoc
+dask-core
+toolz>=0.7.3
+fsspec>=0.5.1
+partd>=0.3.10
+cloudpickle>=0.2.1
 markdown
 feedparser
 pyyaml
@@ -48,7 +53,7 @@ fastparquet>=0.2.1
 html5lib
 lxml
 openpyxl
-pyarrow>=0.9.0
+pyarrow>=0.13.1
 pyqt5>=5.9.2
 tables>=3.4.2
 python-snappy
diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py
index 29fe8bf84c12b..44fe50b99560a 100755
--- a/scripts/generate_pip_deps_from_conda.py
+++ b/scripts/generate_pip_deps_from_conda.py
@@ -20,7 +20,7 @@
 import yaml
 
 EXCLUDE = {"python=3"}
-RENAME = {"pytables": "tables", "pyqt": "pyqt5"}
+RENAME = {"pytables": "tables", "pyqt": "pyqt5", "dask-core": "dask"}
 
 
 def conda_package_to_pip(package):

From c9b6f83ce6ebca0f0208d38467cc68d69b471b8a Mon Sep 17 00:00:00 2001
From: Kaiqi Dong <kaiqi@kth.se>
Date: Tue, 1 Oct 2019 14:00:37 +0200
Subject: [PATCH 12/22] BUG: restore limit in RangeIndex.get_indexer (#28671)

---
 doc/source/whatsnew/v0.25.2.rst     |  2 +-
 pandas/core/indexes/range.py        |  6 ++++--
 pandas/tests/frame/test_indexing.py | 16 ++++++++++++++++
 pandas/tests/indexes/test_range.py  |  8 ++++++++
 4 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst
index 14682b706f924..f904d69d6421b 100644
--- a/doc/source/whatsnew/v0.25.2.rst
+++ b/doc/source/whatsnew/v0.25.2.rst
@@ -49,7 +49,7 @@ Interval
 Indexing
 ^^^^^^^^
 
--
+- Fix regression in :meth:`DataFrame.reindex` not following ``limit`` argument (:issue:`28631`).
 -
 -
 
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 8783351cc74d1..43445a0d5d5a2 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -380,8 +380,10 @@ def get_loc(self, key, method=None, tolerance=None):
 
     @Appender(_index_shared_docs["get_indexer"])
     def get_indexer(self, target, method=None, limit=None, tolerance=None):
-        if not (method is None and tolerance is None and is_list_like(target)):
-            return super().get_indexer(target, method=method, tolerance=tolerance)
+        if com.any_not_none(method, tolerance, limit) or not is_list_like(target):
+            return super().get_indexer(
+                target, method=method, tolerance=tolerance, limit=limit
+            )
 
         if self.step > 0:
             start, stop, step = self.start, self.stop, self.step
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
index 6b073c460ea08..6d239e96cd167 100644
--- a/pandas/tests/frame/test_indexing.py
+++ b/pandas/tests/frame/test_indexing.py
@@ -2217,6 +2217,22 @@ def test_reindex_frame_add_nat(self):
         assert mask[-5:].all()
         assert not mask[:-5].any()
 
+    def test_reindex_limit(self):
+        # GH 28631
+        data = [["A", "A", "A"], ["B", "B", "B"], ["C", "C", "C"], ["D", "D", "D"]]
+        exp_data = [
+            ["A", "A", "A"],
+            ["B", "B", "B"],
+            ["C", "C", "C"],
+            ["D", "D", "D"],
+            ["D", "D", "D"],
+            [np.nan, np.nan, np.nan],
+        ]
+        df = DataFrame(data)
+        result = df.reindex([0, 1, 2, 3, 4, 5], method="ffill", limit=1)
+        expected = DataFrame(exp_data)
+        tm.assert_frame_equal(result, expected)
+
     def test_set_dataframe_column_ns_dtype(self):
         x = DataFrame([datetime.now(), datetime.now()])
         assert x[0].dtype == np.dtype("M8[ns]")
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index 58b98297f00f3..7e08a5deaff7a 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -416,6 +416,14 @@ def test_get_indexer_backfill(self):
         expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected)
 
+    def test_get_indexer_limit(self):
+        # GH 28631
+        idx = RangeIndex(4)
+        target = RangeIndex(6)
+        result = idx.get_indexer(target, method="pad", limit=1)
+        expected = np.array([0, 1, 2, 3, 3, -1], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
     def test_join_outer(self):
         # join with Int64Index
         other = Int64Index(np.arange(25, 14, -1))

From d68e9fb4563b54cbdad135edb2b484b30e2d807d Mon Sep 17 00:00:00 2001
From: Victoria Zdanovskaya <ladyyvii@users.noreply.github.com>
Date: Tue, 1 Oct 2019 13:12:04 +0100
Subject: [PATCH 13/22] DOC: Fixed PR09 docstring errors in pandas.tseries
 (#27977) (#28707)

---
 pandas/tseries/offsets.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 4ebb4f353a8fd..81d8869dd7ba0 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -204,7 +204,8 @@ def __add__(date):
     normalize : bool, default False
         Whether to round the result of a DateOffset addition down to the
         previous midnight.
-    **kwds : Temporal parameter that add to or replace the offset value.
+    **kwds
+        Temporal parameter that add to or replace the offset value.
 
         Parameters that **add** to the offset (like Timedelta):
 
@@ -1005,12 +1006,12 @@ class CustomBusinessDay(_CustomMixin, BusinessDay):
     ----------
     n : int, default 1
     normalize : bool, default False
-        Normalize start/end dates to midnight before generating date range
+        Normalize start/end dates to midnight before generating date range.
     weekmask : str, Default 'Mon Tue Wed Thu Fri'
-        Weekmask of valid business days, passed to ``numpy.busdaycalendar``
+        Weekmask of valid business days, passed to ``numpy.busdaycalendar``.
     holidays : list
         List/array of dates to exclude from the set of valid business days,
-        passed to ``numpy.busdaycalendar``
+        passed to ``numpy.busdaycalendar``.
     calendar : pd.HolidayCalendar or np.busdaycalendar
     offset : timedelta, default timedelta(0)
     """
@@ -1519,7 +1520,7 @@ class Week(DateOffset):
     Parameters
     ----------
     weekday : int, default None
-        Always generate specific day of week. 0 for Monday
+        Always generate specific day of week. 0 for Monday.
     """
 
     _adjust_dst = True
@@ -2085,7 +2086,9 @@ class FY5253(DateOffset):
         The month in which the fiscal year ends.
 
     variation : str, default "nearest"
-        Method of employing 4-4-5 calendar. There are two options:
+        Method of employing 4-4-5 calendar.
+
+        There are two options:
 
         - "nearest" means year end is **weekday** closest to last day of month in year.
         - "last" means year end is final **weekday** of the final month in fiscal year.
@@ -2304,7 +2307,9 @@ class FY5253Quarter(DateOffset):
         The quarter number that has the leap or 14 week when needed.
 
     variation : str, default "nearest"
-        Method of employing 4-4-5 calendar. There are two options:
+        Method of employing 4-4-5 calendar.
+
+        There are two options:
 
         - "nearest" means year end is **weekday** closest to last day of month in year.
         - "last" means year end is final **weekday** of the final month in fiscal year.

From 2ad37043a83c1ef4afe821a0a4e324b463413361 Mon Sep 17 00:00:00 2001
From: Victoria Zdanovskaya <ladyyvii@users.noreply.github.com>
Date: Tue, 1 Oct 2019 13:49:34 +0100
Subject: [PATCH 14/22] CI Failing: TestReadHtml.test_spam_url #28708 (#28710)

---
 pandas/tests/io/test_html.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 183d217eb09d6..1045b72f0aa6e 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -135,8 +135,8 @@ def test_banklist_url(self):
     @network
     def test_spam_url(self):
         url = (
-            "http://ndb.nal.usda.gov/ndb/foods/show/300772?fg=&man=&"
-            "lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam"
+            "https://raw.githubusercontent.com/pandas-dev/pandas/master/"
+            "pandas/tests/io/data/spam.html"
         )
         df1 = self.read_html(url, ".*Water.*")
         df2 = self.read_html(url, "Unit")

From 5ec718a880825d8900394fc09b5f0463ab10e430 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 1 Oct 2019 06:06:03 -0700
Subject: [PATCH 15/22] TST: un-xfail incorrectly xfailed tests for
 maybe_promote (#28564)

---
 pandas/core/dtypes/cast.py               | 28 +++++++-
 pandas/tests/dtypes/cast/test_promote.py | 85 +++++++-----------------
 2 files changed, 50 insertions(+), 63 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index b59660056aadb..a3ad84ff89a66 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -358,6 +358,7 @@ def maybe_promote(dtype, fill_value=np.nan):
             fill_value = NaT
     elif is_extension_array_dtype(dtype) and isna(fill_value):
         fill_value = dtype.na_value
+
     elif is_float(fill_value):
         if issubclass(dtype.type, np.bool_):
             dtype = np.object_
@@ -366,6 +367,8 @@ def maybe_promote(dtype, fill_value=np.nan):
     elif is_bool(fill_value):
         if not issubclass(dtype.type, np.bool_):
             dtype = np.object_
+        else:
+            fill_value = np.bool_(fill_value)
     elif is_integer(fill_value):
         if issubclass(dtype.type, np.bool_):
             dtype = np.object_
@@ -374,6 +377,10 @@ def maybe_promote(dtype, fill_value=np.nan):
             arr = np.asarray(fill_value)
             if arr != arr.astype(dtype):
                 dtype = arr.dtype
+        elif issubclass(dtype.type, np.floating):
+            # check if we can cast
+            if _check_lossless_cast(fill_value, dtype):
+                fill_value = dtype.type(fill_value)
     elif is_complex(fill_value):
         if issubclass(dtype.type, np.bool_):
             dtype = np.object_
@@ -398,12 +405,31 @@ def maybe_promote(dtype, fill_value=np.nan):
         pass
     elif is_datetime64tz_dtype(dtype):
         pass
-    elif issubclass(np.dtype(dtype).type, str):
+    elif issubclass(np.dtype(dtype).type, (bytes, str)):
         dtype = np.object_
 
     return dtype, fill_value
 
 
+def _check_lossless_cast(value, dtype: np.dtype) -> bool:
+    """
+    Check if we can cast the given value to the given dtype _losslesly_.
+
+    Parameters
+    ----------
+    value : object
+    dtype : np.dtype
+
+    Returns
+    -------
+    bool
+    """
+    casted = dtype.type(value)
+    if casted == value:
+        return True
+    return False
+
+
 def infer_dtype_from(val, pandas_dtype=False):
     """
     interpret the dtype from a scalar or array. This is a convenience
diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py
index 44aebd4d277f2..211c550100018 100644
--- a/pandas/tests/dtypes/cast/test_promote.py
+++ b/pandas/tests/dtypes/cast/test_promote.py
@@ -23,6 +23,7 @@
     is_timedelta64_dtype,
 )
 from pandas.core.dtypes.dtypes import DatetimeTZDtype, PandasExtensionDtype
+from pandas.core.dtypes.missing import isna
 
 import pandas as pd
 
@@ -95,6 +96,7 @@ def _safe_dtype_assert(left_dtype, right_dtype):
     """
     Compare two dtypes without raising TypeError.
     """
+    __tracebackhide__ = True
     if isinstance(right_dtype, PandasExtensionDtype):
         # switch order of equality check because numpy dtypes (e.g. if
         # left_dtype is np.object_) do not know some expected dtypes (e.g.
@@ -157,20 +159,17 @@ def _check_promote(
 
     _safe_dtype_assert(result_dtype, expected_dtype)
 
-    # for equal values, also check type (relevant e.g. for int vs float, resp.
-    # for different datetimes and timedeltas)
-    match_value = (
-        result_fill_value
-        == expected_fill_value
-        # disabled type check due to too many xfails; GH 23982/25425
-        # and type(result_fill_value) == type(expected_fill_value)
-    )
+    # GH#23982/25425 require the same type in addition to equality/NA-ness
+    res_type = type(result_fill_value)
+    ex_type = type(expected_fill_value)
+    assert res_type == ex_type
+
+    match_value = result_fill_value == expected_fill_value
 
+    # Note: type check above ensures that we have the _same_ NA value
     # for missing values, None == None and iNaT == iNaT (which is checked
     # through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT
-    match_missing = (result_fill_value is np.nan and expected_fill_value is np.nan) or (
-        result_fill_value is NaT and expected_fill_value is NaT
-    )
+    match_missing = isna(result_fill_value) and isna(expected_fill_value)
 
     assert match_value or match_missing
 
@@ -251,7 +250,9 @@ def test_maybe_promote_bool_with_any(any_numpy_dtype_reduced, box):
 
     if boxed and fill_dtype == bool:
         pytest.xfail("falsely upcasts to object")
-    if boxed and box_dtype is None and is_datetime_or_timedelta_dtype(fill_dtype):
+    if boxed and box_dtype is None and fill_dtype.kind == "M":
+        pytest.xfail("wrongly casts fill_value")
+    if boxed and box_dtype is None and fill_dtype.kind == "m":
         pytest.xfail("wrongly casts fill_value")
 
     # create array of given dtype; casts "1" to correct dtype
@@ -282,7 +283,9 @@ def test_maybe_promote_any_with_bool(any_numpy_dtype_reduced, box):
         pytest.xfail("falsely upcasts to object")
     if boxed and dtype not in (str, object) and box_dtype is None:
         pytest.xfail("falsely upcasts to object")
-    if not boxed and is_datetime_or_timedelta_dtype(dtype):
+    if not boxed and dtype.kind == "M":
+        pytest.xfail("raises error")
+    if not boxed and dtype.kind == "m":
         pytest.xfail("raises error")
 
     # filling anything but bool with bool casts to object
@@ -393,9 +396,6 @@ def test_maybe_promote_datetimetz_with_any_numpy_dtype(
     fill_dtype = np.dtype(any_numpy_dtype_reduced)
     boxed, box_dtype = box  # read from parametrized fixture
 
-    if box_dtype != object:
-        pytest.xfail("does not upcast correctly")
-
     # create array of given dtype; casts "1" to correct dtype
     fill_value = np.array([1], dtype=fill_dtype)[0]
 
@@ -430,8 +430,6 @@ def test_maybe_promote_datetimetz_with_datetimetz(
         pytest.xfail("Cannot process fill_value with this dtype, see GH 24310")
     if dtype.tz == fill_dtype.tz and boxed:
         pytest.xfail("falsely upcasts")
-    if dtype.tz != fill_dtype.tz and not boxed:
-        pytest.xfail("falsely upcasts")
 
     # create array of given dtype; casts "1" to correct dtype
     fill_value = pd.Series([10 ** 9], dtype=fill_dtype)[0]
@@ -466,14 +464,10 @@ def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value, box):
     dtype = DatetimeTZDtype(tz=tz_aware_fixture)
     boxed, box_dtype = box  # read from parametrized fixture
 
-    if boxed and (
-        box_dtype == object
-        or (box_dtype is None and (fill_value is None or fill_value is NaT))
-    ):
-        pytest.xfail("false upcasts to object")
     # takes the opinion that DatetimeTZ should have single na-marker
     # using iNaT would lead to errors elsewhere -> NaT
     if not boxed and fill_value == iNaT:
+        # TODO: are we sure iNaT _should_ be cast to NaT?
         pytest.xfail("wrong missing value marker")
 
     expected_dtype = dtype
@@ -509,8 +503,10 @@ def test_maybe_promote_any_numpy_dtype_with_datetimetz(
     fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture)
     boxed, box_dtype = box  # read from parametrized fixture
 
-    if is_datetime_or_timedelta_dtype(dtype) and not boxed:
+    if dtype.kind == "m" and not boxed:
         pytest.xfail("raises error")
+    elif dtype.kind == "M" and not boxed:
+        pytest.xfail("Comes back as M8 instead of object")
 
     fill_value = pd.Series([fill_value], dtype=fill_dtype)[0]
 
@@ -566,19 +562,6 @@ def test_maybe_promote_any_with_timedelta64(
     else:
         if boxed and box_dtype is None and is_timedelta64_dtype(type(fill_value)):
             pytest.xfail("does not upcast correctly")
-        if (
-            not boxed
-            and is_timedelta64_dtype(type(fill_value))
-            and (
-                is_integer_dtype(dtype)
-                or is_float_dtype(dtype)
-                or is_complex_dtype(dtype)
-                or issubclass(dtype.type, np.bytes_)
-            )
-        ):
-            pytest.xfail("does not upcast correctly")
-        if box_dtype == "td_dtype":
-            pytest.xfail("falsely upcasts")
         if not boxed and is_datetime64_dtype(dtype):
             pytest.xfail("raises error")
 
@@ -612,7 +595,9 @@ def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype_reduced, bo
     fill_dtype = np.dtype(any_numpy_dtype_reduced)
     boxed, box_dtype = box  # read from parametrized fixture
 
-    if boxed and box_dtype is None and is_datetime_or_timedelta_dtype(fill_dtype):
+    if boxed and box_dtype is None and fill_dtype.kind == "m":
+        pytest.xfail("wrong missing value marker")
+    if boxed and box_dtype is None and fill_dtype.kind == "M":
         pytest.xfail("wrong missing value marker")
 
     # create array of given dtype; casts "1" to correct dtype
@@ -652,17 +637,6 @@ def test_maybe_promote_any_with_string(any_numpy_dtype_reduced, string_dtype, bo
 
     if is_datetime_or_timedelta_dtype(dtype) and box_dtype != object:
         pytest.xfail("does not upcast or raises")
-    if (
-        boxed
-        and box_dtype in (None, "str")
-        and (
-            is_integer_dtype(dtype)
-            or is_float_dtype(dtype)
-            or is_complex_dtype(dtype)
-            or issubclass(dtype.type, np.bytes_)
-        )
-    ):
-        pytest.xfail("does not upcast correctly")
 
     # create array of given dtype
     fill_value = "abc"
@@ -760,19 +734,6 @@ def test_maybe_promote_any_numpy_dtype_with_na(
         pytest.xfail("does not upcast to object")
     elif dtype == "uint64" and not boxed and fill_value == iNaT:
         pytest.xfail("does not upcast correctly")
-    elif is_datetime_or_timedelta_dtype(dtype) and boxed:
-        pytest.xfail("falsely upcasts to object")
-    elif (
-        boxed
-        and (
-            is_integer_dtype(dtype) or is_float_dtype(dtype) or is_complex_dtype(dtype)
-        )
-        and fill_value is not NaT
-        and dtype != "uint64"
-    ):
-        pytest.xfail("falsely upcasts to object")
-    elif boxed and dtype == "uint64" and (fill_value is np.nan or fill_value is None):
-        pytest.xfail("falsely upcasts to object")
     # below: opinionated that iNaT should be interpreted as missing value
     elif (
         not boxed

From dc6780475dc3dd613e14a09920f77d68f5f650e0 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 1 Oct 2019 06:07:52 -0700
Subject: [PATCH 16/22] CLN: Define and pin GroupBy properties without exec
 (#28651)

---
 ci/code_checks.sh              |  6 ++-
 pandas/core/groupby/generic.py | 84 +++++++++++++++++++---------------
 pandas/core/groupby/groupby.py |  2 -
 3 files changed, 51 insertions(+), 41 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index b03c4f2238445..e13738b98833a 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -125,6 +125,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     # invgrep -R --include="*.py*" -E "from numpy import nan " pandas  # GH#24822 not yet implemented since the offending imports have not all been removed
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of exec' ; echo $MSG
+    invgrep -R --include="*.py*" -E "[^a-zA-Z0-9_]exec\(" pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     MSG='Check for pytest warns' ; echo $MSG
     invgrep -r -E --include '*.py' 'pytest\.warns' pandas/tests/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -184,7 +188,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include="*.rst" ".. ipython ::" doc/source
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    MSG='Check that no file in the repo contains tailing whitespaces' ; echo $MSG
+    MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
     set -o pipefail
     if [[ "$AZURE" == "true" ]]; then
         # we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index f8f1455561c03..0ab19448043f6 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -11,7 +11,7 @@
 from functools import partial
 from textwrap import dedent
 import typing
-from typing import Any, Callable, FrozenSet, Iterator, Sequence, Type, Union
+from typing import Any, Callable, FrozenSet, Sequence, Type, Union
 import warnings
 
 import numpy as np
@@ -70,47 +70,63 @@
 ScalarResult = typing.TypeVar("ScalarResult")
 
 
-def whitelist_method_generator(
-    base_class: Type[GroupBy], klass: Type[FrameOrSeries], whitelist: FrozenSet[str]
-) -> Iterator[str]:
+def generate_property(name: str, klass: Type[FrameOrSeries]):
     """
-    Yields all GroupBy member defs for DataFrame/Series names in whitelist.
+    Create a property for a GroupBy subclass to dispatch to DataFrame/Series.
+
+    Parameters
+    ----------
+    name : str
+    klass : {DataFrame, Series}
+
+    Returns
+    -------
+    property
+    """
+
+    def prop(self):
+        return self._make_wrapper(name)
+
+    parent_method = getattr(klass, name)
+    prop.__doc__ = parent_method.__doc__ or ""
+    prop.__name__ = name
+    return property(prop)
+
+
+def pin_whitelisted_properties(klass: Type[FrameOrSeries], whitelist: FrozenSet[str]):
+    """
+    Create GroupBy member defs for DataFrame/Series names in a whitelist.
 
     Parameters
     ----------
-    base_class : Groupby class
-        base class
     klass : DataFrame or Series class
         class where members are defined.
-    whitelist : frozenset
+    whitelist : frozenset[str]
         Set of names of klass methods to be constructed
 
     Returns
     -------
-    The generator yields a sequence of strings, each suitable for exec'ing,
-    that define implementations of the named methods for DataFrameGroupBy
-    or SeriesGroupBy.
+    class decorator
 
+    Notes
+    -----
     Since we don't want to override methods explicitly defined in the
     base class, any such name is skipped.
     """
-    property_wrapper_template = """@property
-def %(name)s(self) :
-    \"""%(doc)s\"""
-    return self.__getattr__('%(name)s')"""
-
-    for name in whitelist:
-        # don't override anything that was explicitly defined
-        # in the base class
-        if hasattr(base_class, name):
-            continue
-        # ugly, but we need the name string itself in the method.
-        f = getattr(klass, name)
-        doc = f.__doc__
-        doc = doc if type(doc) == str else ""
-        wrapper_template = property_wrapper_template
-        params = {"name": name, "doc": doc}
-        yield wrapper_template % params
+
+    def pinner(cls):
+        for name in whitelist:
+            if hasattr(cls, name):
+                # don't override anything that was explicitly defined
+                #  in the base class
+                continue
+
+            prop = generate_property(name, klass)
+            setattr(cls, name, prop)
+
+        return cls
+
+    return pinner
 
 
 class NDFrameGroupBy(GroupBy):
@@ -747,13 +763,9 @@ def filter(self, func, dropna=True, *args, **kwargs):
         return self._apply_filter(indices, dropna)
 
 
+@pin_whitelisted_properties(Series, base.series_apply_whitelist)
 class SeriesGroupBy(GroupBy):
-    #
-    # Make class defs of attributes on SeriesGroupBy whitelist
-
     _apply_whitelist = base.series_apply_whitelist
-    for _def_str in whitelist_method_generator(GroupBy, Series, _apply_whitelist):
-        exec(_def_str)
 
     @property
     def _selection_name(self):
@@ -1368,15 +1380,11 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):
         return (filled / shifted) - 1
 
 
+@pin_whitelisted_properties(DataFrame, base.dataframe_apply_whitelist)
 class DataFrameGroupBy(NDFrameGroupBy):
 
     _apply_whitelist = base.dataframe_apply_whitelist
 
-    #
-    # Make class defs of attributes on DataFrameGroupBy whitelist.
-    for _def_str in whitelist_method_generator(GroupBy, DataFrame, _apply_whitelist):
-        exec(_def_str)
-
     _block_agg_axis = 1
 
     _agg_see_also_doc = dedent(
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 6facbe7e01c57..984954fe14bb5 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -562,8 +562,6 @@ def __getattr__(self, attr):
             return object.__getattribute__(self, attr)
         if attr in self.obj:
             return self[attr]
-        if hasattr(self.obj, attr):
-            return self._make_wrapper(attr)
 
         raise AttributeError(
             "%r object has no attribute %r" % (type(self).__name__, attr)

From c794bbfdd47e43adc06af5a6de7b3de211682b46 Mon Sep 17 00:00:00 2001
From: Oluokun Adedayo <dayoreke@gmail.com>
Date: Tue, 1 Oct 2019 15:55:26 +0100
Subject: [PATCH 17/22] DOC: Fixed PR08, PR09 doctring issues in
 pandas.core.groupby (#28709)

---
 pandas/core/groupby/generic.py |  2 +-
 pandas/core/groupby/groupby.py | 36 +++++++++++++++++-----------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 0ab19448043f6..b5aec189700ce 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -709,7 +709,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
         f : function
             Function to apply to each subframe. Should return True or False.
         dropna : Drop groups that do not pass the filter. True by default;
-            if False, groups that evaluate False are filled with NaNs.
+            If False, groups that evaluate False are filled with NaNs.
 
         Returns
         -------
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 984954fe14bb5..e93ce3ce93164 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -212,9 +212,9 @@ class providing the base-class of operations.
     string indicating the keyword of `callable` that expects the
     %(klass)s object.
 args : iterable, optional
-       positional arguments passed into `func`.
+       Positional arguments passed into `func`.
 kwargs : dict, optional
-         a dictionary of keyword arguments passed into `func`.
+         A dictionary of keyword arguments passed into `func`.
 
 Returns
 -------
@@ -664,11 +664,11 @@ def get_group(self, name, obj=None):
         Parameters
         ----------
         name : object
-            the name of the group to get as a DataFrame
+            The name of the group to get as a DataFrame.
         obj : DataFrame, default None
-            the DataFrame to take the DataFrame out of.  If
+            The DataFrame to take the DataFrame out of.  If
             it is None, the object groupby was called on will
-            be used
+            be used.
 
         Returns
         -------
@@ -1114,7 +1114,7 @@ def any(self, skipna=True):
         Parameters
         ----------
         skipna : bool, default True
-            Flag to ignore nan values during truth testing
+            Flag to ignore nan values during truth testing.
 
         Returns
         -------
@@ -1131,7 +1131,7 @@ def all(self, skipna=True):
         Parameters
         ----------
         skipna : bool, default True
-            Flag to ignore nan values during truth testing
+            Flag to ignore nan values during truth testing.
 
         Returns
         -------
@@ -1252,7 +1252,7 @@ def std(self, ddof=1, *args, **kwargs):
         Parameters
         ----------
         ddof : int, default 1
-            degrees of freedom
+            Degrees of freedom.
 
         Returns
         -------
@@ -1275,7 +1275,7 @@ def var(self, ddof=1, *args, **kwargs):
         Parameters
         ----------
         ddof : int, default 1
-            degrees of freedom
+            Degrees of freedom.
 
         Returns
         -------
@@ -1310,7 +1310,7 @@ def sem(self, ddof=1):
         Parameters
         ----------
         ddof : int, default 1
-            degrees of freedom
+            Degrees of freedom.
 
         Returns
         -------
@@ -1622,7 +1622,7 @@ def pad(self, limit=None):
         Parameters
         ----------
         limit : int, optional
-            limit of how many values to fill
+            Limit of how many values to fill.
 
         Returns
         -------
@@ -1648,7 +1648,7 @@ def backfill(self, limit=None):
         Parameters
         ----------
         limit : int, optional
-            limit of how many values to fill
+            Limit of how many values to fill.
 
         Returns
         -------
@@ -1680,10 +1680,10 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra
         Parameters
         ----------
         n : int or list of ints
-            a single nth value for the row or a list of nth values
+            A single nth value for the row or a list of nth values.
         dropna : None or str, optional
-            apply the specified dropna operation before counting which row is
-            the nth row. Needs to be None, 'any' or 'all'
+            Apply the specified dropna operation before counting which row is
+            the nth row. Needs to be None, 'any' or 'all'.
 
         Returns
         -------
@@ -2098,13 +2098,13 @@ def rank(
             * first: ranks assigned in order they appear in the array
             * dense: like 'min', but rank always increases by 1 between groups
         ascending : bool, default True
-            False for ranks by high (1) to low (N)
+            False for ranks by high (1) to low (N).
         na_option :  {'keep', 'top', 'bottom'}, default 'keep'
             * keep: leave NA values where they are
             * top: smallest rank if ascending
             * bottom: smallest rank if descending
         pct : bool, default False
-            Compute percentage rank of data within each group
+            Compute percentage rank of data within each group.
         axis : int, default 0
             The axis of the object over which to compute the rank.
 
@@ -2312,7 +2312,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         Parameters
         ----------
         periods : int, default 1
-            number of periods to shift
+            Number of periods to shift.
         freq : frequency string
         axis : axis to shift, default 0
         fill_value : optional

From 4e371c16ae1ff217a0f35a3527fdc02503e81881 Mon Sep 17 00:00:00 2001
From: "Laura Collard, PhD" <35954013+LauraCollard@users.noreply.github.com>
Date: Tue, 1 Oct 2019 16:43:52 +0100
Subject: [PATCH 18/22] DOC: Fixed PR06 docstring errors in pandas.DataFrame
 (#28718)

---
 pandas/core/frame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 16f34fee5e1ff..9467978f13d30 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7948,7 +7948,7 @@ def idxmin(self, axis=0, skipna=True):
         ----------
         axis : {0 or 'index', 1 or 'columns'}, default 0
             The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise
-        skipna : boolean, default True
+        skipna : bool, default True
             Exclude NA/null values. If an entire row/column is NA, the result
             will be NA.
 
@@ -7985,7 +7985,7 @@ def idxmax(self, axis=0, skipna=True):
         ----------
         axis : {0 or 'index', 1 or 'columns'}, default 0
             The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise
-        skipna : boolean, default True
+        skipna : bool, default True
             Exclude NA/null values. If an entire row/column is NA, the result
             will be NA.
 

From 21f6505c931c715bfceee790458f5d6b1fceb5f0 Mon Sep 17 00:00:00 2001
From: "Laura Collard, PhD" <35954013+LauraCollard@users.noreply.github.com>
Date: Tue, 1 Oct 2019 16:50:40 +0100
Subject: [PATCH 19/22] DOC: Fixed PR06 docstrings errors in
 pandas.arrays.IntervalArray (#28721)

---
 pandas/core/arrays/interval.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index 1f4b76a259f00..6dd0b116b3b0d 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -259,7 +259,7 @@ def _from_factorized(cls, values, original):
     closed : {'left', 'right', 'both', 'neither'}, default 'right'
         Whether the intervals are closed on the left-side, right-side, both
         or neither.
-    copy : boolean, default False
+    copy : bool, default False
         copy the data
     dtype : dtype or None, default None
         If None, dtype will be inferred
@@ -315,7 +315,7 @@ def from_breaks(cls, breaks, closed="right", copy=False, dtype=None):
         closed : {'left', 'right', 'both', 'neither'}, default 'right'
             Whether the intervals are closed on the left-side, right-side, both
             or neither.
-        copy : boolean, default False
+        copy : bool, default False
             Copy the data.
         dtype : dtype, optional
             If None, dtype will be inferred.
@@ -387,7 +387,7 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
     closed : {'left', 'right', 'both', 'neither'}, default 'right'
         Whether the intervals are closed on the left-side, right-side, both
         or neither.
-    copy : boolean, default False
+    copy : bool, default False
         by-default copy the data, this is compat only and ignored
     dtype : dtype or None, default None
         If None, dtype will be inferred
@@ -811,7 +811,7 @@ def value_counts(self, dropna=True):
 
         Parameters
         ----------
-        dropna : boolean, default True
+        dropna : bool, default True
             Don't include counts of NaN.
 
         Returns

From 7e05957c79bcf782db0ee5ca09296e2a833499a3 Mon Sep 17 00:00:00 2001
From: Josiah Baker <josiahfbaker@gmail.com>
Date: Tue, 1 Oct 2019 11:50:51 -0400
Subject: [PATCH 20/22] DOC: fix PR09,PR08 docstring errors in pandas.plotting
 (#28689)

---
 pandas/plotting/_misc.py | 91 +++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 47 deletions(-)

diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
index a8e86d9dfa997..74ce60c6116a9 100644
--- a/pandas/plotting/_misc.py
+++ b/pandas/plotting/_misc.py
@@ -14,9 +14,9 @@ def table(ax, data, rowLabels=None, colLabels=None, **kwargs):
     ----------
     ax : Matplotlib axes object
     data : DataFrame or Series
-        data for table contents
-    kwargs : keywords, optional
-        keyword arguments which passed to matplotlib.table.table.
+        Data for table contents.
+    **kwargs
+        Keyword arguments to be passed to matplotlib.table.table.
         If `rowLabels` or `colLabels` is not specified, data index or column
         name will be used.
 
@@ -82,7 +82,7 @@ def scatter_matrix(
     density_kwds=None,
     hist_kwds=None,
     range_padding=0.05,
-    **kwds
+    **kwargs
 ):
     """
     Draw a matrix of scatter plots.
@@ -91,28 +91,26 @@ def scatter_matrix(
     ----------
     frame : DataFrame
     alpha : float, optional
-        amount of transparency applied
+        Amount of transparency applied.
     figsize : (float,float), optional
-        a tuple (width, height) in inches
+        A tuple (width, height) in inches.
     ax : Matplotlib axis object, optional
     grid : bool, optional
-        setting this to True will show the grid
+        Setting this to True will show the grid.
     diagonal : {'hist', 'kde'}
-        pick between 'kde' and 'hist' for
-        either Kernel Density Estimation or Histogram
-        plot in the diagonal
+        Pick between 'kde' and 'hist' for either Kernel Density Estimation or
+        Histogram plot in the diagonal.
     marker : str, optional
-        Matplotlib marker type, default '.'
-    hist_kwds : other plotting keyword arguments
-        To be passed to hist function
-    density_kwds : other plotting keyword arguments
-        To be passed to kernel density estimate plot
-    range_padding : float, optional
-        relative extension of axis range in x and y
-        with respect to (x_max - x_min) or (y_max - y_min),
-        default 0.05
-    kwds : other plotting keyword arguments
-        To be passed to scatter function
+        Matplotlib marker type, default '.'.
+    density_kwds : keywords
+        Keyword arguments to be passed to kernel density estimate plot.
+    hist_kwds : keywords
+        Keyword arguments to be passed to hist function.
+    range_padding : float, default 0.05
+        Relative extension of axis range in x and y with respect to
+        (x_max - x_min) or (y_max - y_min).
+    **kwargs
+        Keyword arguments to be passed to scatter function.
 
     Returns
     -------
@@ -136,7 +134,7 @@ def scatter_matrix(
         density_kwds=density_kwds,
         hist_kwds=hist_kwds,
         range_padding=range_padding,
-        **kwds
+        **kwargs
     )
 
 
@@ -215,7 +213,7 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
 
 @deprecate_kwarg(old_arg_name="data", new_arg_name="frame")
 def andrews_curves(
-    frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwds
+    frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs
 ):
     """
     Generate a matplotlib plot of Andrews curves, for visualising clusters of
@@ -233,17 +231,17 @@ def andrews_curves(
     Parameters
     ----------
     frame : DataFrame
-        Data to be plotted, preferably normalized to (0.0, 1.0)
+        Data to be plotted, preferably normalized to (0.0, 1.0).
     class_column : Name of the column containing class names
     ax : matplotlib axes object, default None
     samples : Number of points to plot in each curve
     color : list or tuple, optional
-        Colors to use for the different classes
+        Colors to use for the different classes.
     colormap : str or matplotlib colormap object, default None
         Colormap to select colors from. If string, load colormap with that name
         from matplotlib.
-    kwds : keywords
-        Options to pass to matplotlib plotting method
+    **kwargs
+        Options to pass to matplotlib plotting method.
 
     Returns
     -------
@@ -257,7 +255,7 @@ def andrews_curves(
         samples=samples,
         color=color,
         colormap=colormap,
-        **kwds
+        **kwargs
     )
 
 
@@ -327,7 +325,7 @@ def parallel_coordinates(
     axvlines=True,
     axvlines_kwds=None,
     sort_labels=False,
-    **kwds
+    **kwargs
 ):
     """
     Parallel coordinates plotting.
@@ -336,30 +334,29 @@ def parallel_coordinates(
     ----------
     frame : DataFrame
     class_column : str
-        Column name containing class names
+        Column name containing class names.
     cols : list, optional
-        A list of column names to use
+        A list of column names to use.
     ax : matplotlib.axis, optional
-        matplotlib axis object
+        Matplotlib axis object.
     color : list or tuple, optional
-        Colors to use for the different classes
+        Colors to use for the different classes.
     use_columns : bool, optional
-        If true, columns will be used as xticks
+        If true, columns will be used as xticks.
     xticks : list or tuple, optional
-        A list of values to use for xticks
+        A list of values to use for xticks.
     colormap : str or matplotlib colormap, default None
         Colormap to use for line colors.
     axvlines : bool, optional
-        If true, vertical lines will be added at each xtick
+        If true, vertical lines will be added at each xtick.
     axvlines_kwds : keywords, optional
-        Options to be passed to axvline method for vertical lines
-    sort_labels : bool, False
-        Sort class_column labels, useful when assigning colors
+        Options to be passed to axvline method for vertical lines.
+    sort_labels : bool, default False
+        Sort class_column labels, useful when assigning colors.
 
         .. versionadded:: 0.20.0
-
-    kwds : keywords
-        Options to pass to matplotlib plotting method
+    **kwargs
+        Options to pass to matplotlib plotting method.
 
     Returns
     -------
@@ -388,7 +385,7 @@ def parallel_coordinates(
         axvlines=axvlines,
         axvlines_kwds=axvlines_kwds,
         sort_labels=sort_labels,
-        **kwds
+        **kwargs
     )
 
 
@@ -411,7 +408,7 @@ def lag_plot(series, lag=1, ax=None, **kwds):
     return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds)
 
 
-def autocorrelation_plot(series, ax=None, **kwds):
+def autocorrelation_plot(series, ax=None, **kwargs):
     """
     Autocorrelation plot for time series.
 
@@ -419,15 +416,15 @@ def autocorrelation_plot(series, ax=None, **kwds):
     ----------
     series : Time series
     ax : Matplotlib axis object, optional
-    kwds : keywords
-        Options to pass to matplotlib plotting method
+    **kwargs
+        Options to pass to matplotlib plotting method.
 
     Returns
     -------
     class:`matplotlib.axis.Axes`
     """
     plot_backend = _get_plot_backend("matplotlib")
-    return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwds)
+    return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs)
 
 
 def tsplot(series, plotf, ax=None, **kwargs):

From dd028fd106f1480a79f942033f6bb1bc4d9cfd35 Mon Sep 17 00:00:00 2001
From: Tola A <33249563+tolaa001@users.noreply.github.com>
Date: Tue, 1 Oct 2019 16:51:04 +0100
Subject: [PATCH 21/22] precursor to Split out test_pytables.py to sub-module
 of tests (#28715)

---
 pandas/tests/io/pytables/test_pytables.py | 859 +++++++++++-----------
 1 file changed, 436 insertions(+), 423 deletions(-)

diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py
index ae604b1141204..46d8ef04dd8e5 100644
--- a/pandas/tests/io/pytables/test_pytables.py
+++ b/pandas/tests/io/pytables/test_pytables.py
@@ -51,6 +51,19 @@
 tables = pytest.importorskip("tables")
 
 
+@pytest.fixture
+def setup_path():
+    """Fixture for setup path"""
+    return "tmp.__{}__.h5".format(tm.rands(10))
+
+
+@pytest.fixture(scope="class", autouse=True)
+def setup_mode():
+    tm.reset_testing_mode()
+    yield
+    tm.set_testing_mode()
+
+
 # TODO:
 # remove when gh-24839 is fixed; this affects numpy 1.16
 # and pytables 3.4.4
@@ -148,36 +161,16 @@ def _maybe_remove(store, key):
         pass
 
 
-class Base:
-    @classmethod
-    def setup_class(cls):
-
-        # Pytables 3.0.0 deprecates lots of things
-        tm.reset_testing_mode()
-
-    @classmethod
-    def teardown_class(cls):
-
-        # Pytables 3.0.0 deprecates lots of things
-        tm.set_testing_mode()
-
-    def setup_method(self, method):
-        self.path = "tmp.__{}__.h5".format(tm.rands(10))
-
-    def teardown_method(self, method):
-        pass
-
-
 @pytest.mark.single
-class TestHDFStore(Base):
-    def test_format_kwarg_in_constructor(self):
+class TestHDFStore:
+    def test_format_kwarg_in_constructor(self, setup_path):
         # GH 13291
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             with pytest.raises(ValueError):
                 HDFStore(path, format="table")
 
-    def test_context(self):
-        path = create_tempfile(self.path)
+    def test_context(self, setup_path):
+        path = create_tempfile(setup_path)
         try:
             with HDFStore(path) as tbl:
                 raise ValueError("blah")
@@ -196,8 +189,8 @@ def test_context(self):
         finally:
             safe_remove(path)
 
-    def test_conv_read_write(self):
-        path = create_tempfile(self.path)
+    def test_conv_read_write(self, setup_path):
+        path = create_tempfile(setup_path)
         try:
 
             def roundtrip(key, obj, **kwargs):
@@ -222,24 +215,24 @@ def roundtrip(key, obj, **kwargs):
         finally:
             safe_remove(path)
 
-    def test_long_strings(self):
+    def test_long_strings(self, setup_path):
 
         # GH6166
         df = DataFrame(
             {"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)
         )
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("df", df, data_columns=["a"])
 
             result = store.select("df")
             assert_frame_equal(df, result)
 
-    def test_api(self):
+    def test_api(self, setup_path):
 
         # GH4584
         # API issue when to_hdf doesn't accept append AND format args
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             df = tm.makeDataFrame()
             df.iloc[:10].to_hdf(path, "df", append=True, format="table")
@@ -251,7 +244,7 @@ def test_api(self):
             df.iloc[10:].to_hdf(path, "df", append=True, format="table")
             assert_frame_equal(read_hdf(path, "df"), df)
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             df = tm.makeDataFrame()
             df.iloc[:10].to_hdf(path, "df", append=True)
@@ -263,7 +256,7 @@ def test_api(self):
             df.iloc[10:].to_hdf(path, "df", append=True)
             assert_frame_equal(read_hdf(path, "df"), df)
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             df = tm.makeDataFrame()
             df.to_hdf(path, "df", append=False, format="fixed")
@@ -278,7 +271,7 @@ def test_api(self):
             df.to_hdf(path, "df")
             assert_frame_equal(read_hdf(path, "df"), df)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             path = store._path
             df = tm.makeDataFrame()
@@ -305,7 +298,7 @@ def test_api(self):
             store.append("df", df.iloc[10:], append=True, format=None)
             assert_frame_equal(store.select("df"), df)
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             # Invalid.
             df = tm.makeDataFrame()
 
@@ -326,10 +319,10 @@ def test_api(self):
         with pytest.raises(FileNotFoundError):
             read_hdf(path, "df")
 
-    def test_api_default_format(self):
+    def test_api_default_format(self, setup_path):
 
         # default_format option
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             df = tm.makeDataFrame()
 
             pd.set_option("io.hdf.default_format", "fixed")
@@ -349,7 +342,7 @@ def test_api_default_format(self):
 
             pd.set_option("io.hdf.default_format", None)
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             df = tm.makeDataFrame()
 
@@ -370,9 +363,9 @@ def test_api_default_format(self):
 
             pd.set_option("io.hdf.default_format", None)
 
-    def test_keys(self):
+    def test_keys(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store["a"] = tm.makeTimeSeries()
             store["b"] = tm.makeStringSeries()
             store["c"] = tm.makeDataFrame()
@@ -382,12 +375,12 @@ def test_keys(self):
             assert set(store.keys()) == expected
             assert set(store) == expected
 
-    def test_keys_ignore_hdf_softlink(self):
+    def test_keys_ignore_hdf_softlink(self, setup_path):
 
         # GH 20523
         # Puts a softlink into HDF file and rereads
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             df = DataFrame(dict(A=range(5), B=range(5)))
             store.put("df", df)
@@ -399,15 +392,15 @@ def test_keys_ignore_hdf_softlink(self):
             # Should ignore the softlink
             assert store.keys() == ["/df"]
 
-    def test_iter_empty(self):
+    def test_iter_empty(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             # GH 12221
             assert list(store) == []
 
-    def test_repr(self):
+    def test_repr(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             repr(store)
             store.info()
             store["a"] = tm.makeTimeSeries()
@@ -441,7 +434,7 @@ def test_repr(self):
             store.info()
 
         # storers
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             df = tm.makeDataFrame()
             store.append("df", df)
@@ -451,9 +444,9 @@ def test_repr(self):
             str(s)
 
     @ignore_natural_naming_warning
-    def test_contains(self):
+    def test_contains(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store["a"] = tm.makeTimeSeries()
             store["b"] = tm.makeDataFrame()
             store["foo/bar"] = tm.makeDataFrame()
@@ -470,9 +463,9 @@ def test_contains(self):
                 store["node())"] = tm.makeDataFrame()
             assert "node())" in store
 
-    def test_versioning(self):
+    def test_versioning(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store["a"] = tm.makeTimeSeries()
             store["b"] = tm.makeDataFrame()
             df = tm.makeTimeDataFrame()
@@ -493,13 +486,13 @@ def test_versioning(self):
             with pytest.raises(Exception):
                 store.select("df2")
 
-    def test_mode(self):
+    def test_mode(self, setup_path):
 
         df = tm.makeTimeDataFrame()
 
         def check(mode):
 
-            with ensure_clean_path(self.path) as path:
+            with ensure_clean_path(setup_path) as path:
 
                 # constructor
                 if mode in ["r", "r+"]:
@@ -511,7 +504,7 @@ def check(mode):
                     assert store._handle.mode == mode
                     store.close()
 
-            with ensure_clean_path(self.path) as path:
+            with ensure_clean_path(setup_path) as path:
 
                 # context
                 if mode in ["r", "r+"]:
@@ -522,7 +515,7 @@ def check(mode):
                     with HDFStore(path, mode=mode) as store:
                         assert store._handle.mode == mode
 
-            with ensure_clean_path(self.path) as path:
+            with ensure_clean_path(setup_path) as path:
 
                 # conv write
                 if mode in ["r", "r+"]:
@@ -543,7 +536,7 @@ def check(mode):
         def check_default_mode():
 
             # read_hdf uses default mode
-            with ensure_clean_path(self.path) as path:
+            with ensure_clean_path(setup_path) as path:
                 df.to_hdf(path, "df", mode="w")
                 result = read_hdf(path, "df")
                 assert_frame_equal(result, df)
@@ -554,9 +547,9 @@ def check_default_mode():
         check("w")
         check_default_mode()
 
-    def test_reopen_handle(self):
+    def test_reopen_handle(self, setup_path):
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             store = HDFStore(path, mode="a")
             store["a"] = tm.makeTimeSeries()
@@ -602,9 +595,9 @@ def test_reopen_handle(self):
             store.close()
             assert not store.is_open
 
-    def test_open_args(self):
+    def test_open_args(self, setup_path):
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             df = tm.makeDataFrame()
 
@@ -623,16 +616,16 @@ def test_open_args(self):
             # the file should not have actually been written
             assert not os.path.exists(path)
 
-    def test_flush(self):
+    def test_flush(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store["a"] = tm.makeTimeSeries()
             store.flush()
             store.flush(fsync=True)
 
-    def test_get(self):
+    def test_get(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store["a"] = tm.makeTimeSeries()
             left = store.get("a")
             right = store["a"]
@@ -666,7 +659,7 @@ def test_get(self):
             ),
         ],
     )
-    def test_walk(self, where, expected):
+    def test_walk(self, where, expected, setup_path):
         # GH10143
         objs = {
             "df1": pd.DataFrame([1, 2, 3]),
@@ -705,9 +698,9 @@ def test_walk(self, where, expected):
                     else:
                         tm.assert_series_equal(obj, objs[leaf])
 
-    def test_getattr(self):
+    def test_getattr(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             s = tm.makeTimeSeries()
             store["a"] = s
@@ -732,9 +725,9 @@ def test_getattr(self):
             for x in ["mode", "path", "handle", "complib"]:
                 getattr(store, "_{x}".format(x=x))
 
-    def test_put(self):
+    def test_put(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             ts = tm.makeTimeSeries()
             df = tm.makeTimeDataFrame()
@@ -763,9 +756,9 @@ def test_put(self):
             store.put("c", df[:10], format="table", append=False)
             tm.assert_frame_equal(df[:10], store["c"])
 
-    def test_put_string_index(self):
+    def test_put_string_index(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             index = Index(
                 ["I am a very long string index: {i}".format(i=i) for i in range(20)]
@@ -792,9 +785,9 @@ def test_put_string_index(self):
             store["b"] = df
             tm.assert_frame_equal(store["b"], df)
 
-    def test_put_compression(self):
+    def test_put_compression(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             df = tm.makeTimeDataFrame()
 
             store.put("c", df, format="table", complib="zlib")
@@ -805,10 +798,10 @@ def test_put_compression(self):
                 store.put("b", df, format="fixed", complib="zlib")
 
     @td.skip_if_windows_python_3
-    def test_put_compression_blosc(self):
+    def test_put_compression_blosc(self, setup_path):
         df = tm.makeTimeDataFrame()
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # can't compress if format='fixed'
             with pytest.raises(ValueError):
@@ -817,13 +810,13 @@ def test_put_compression_blosc(self):
             store.put("c", df, format="table", complib="blosc")
             tm.assert_frame_equal(store["c"], df)
 
-    def test_complibs_default_settings(self):
+    def test_complibs_default_settings(self, setup_path):
         # GH15943
         df = tm.makeDataFrame()
 
         # Set complevel and check if complib is automatically set to
         # default value
-        with ensure_clean_path(self.path) as tmpfile:
+        with ensure_clean_path(setup_path) as tmpfile:
             df.to_hdf(tmpfile, "df", complevel=9)
             result = pd.read_hdf(tmpfile, "df")
             tm.assert_frame_equal(result, df)
@@ -834,7 +827,7 @@ def test_complibs_default_settings(self):
                     assert node.filters.complib == "zlib"
 
         # Set complib and check to see if compression is disabled
-        with ensure_clean_path(self.path) as tmpfile:
+        with ensure_clean_path(setup_path) as tmpfile:
             df.to_hdf(tmpfile, "df", complib="zlib")
             result = pd.read_hdf(tmpfile, "df")
             tm.assert_frame_equal(result, df)
@@ -845,7 +838,7 @@ def test_complibs_default_settings(self):
                     assert node.filters.complib is None
 
         # Check if not setting complib or complevel results in no compression
-        with ensure_clean_path(self.path) as tmpfile:
+        with ensure_clean_path(setup_path) as tmpfile:
             df.to_hdf(tmpfile, "df")
             result = pd.read_hdf(tmpfile, "df")
             tm.assert_frame_equal(result, df)
@@ -856,7 +849,7 @@ def test_complibs_default_settings(self):
                     assert node.filters.complib is None
 
         # Check if file-defaults can be overridden on a per table basis
-        with ensure_clean_path(self.path) as tmpfile:
+        with ensure_clean_path(setup_path) as tmpfile:
             store = pd.HDFStore(tmpfile)
             store.append("dfc", df, complevel=9, complib="blosc")
             store.append("df", df)
@@ -870,7 +863,7 @@ def test_complibs_default_settings(self):
                     assert node.filters.complevel == 9
                     assert node.filters.complib == "blosc"
 
-    def test_complibs(self):
+    def test_complibs(self, setup_path):
         # GH14478
         df = tm.makeDataFrame()
 
@@ -887,7 +880,7 @@ def test_complibs(self):
         all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]
 
         for (lib, lvl) in all_tests:
-            with ensure_clean_path(self.path) as tmpfile:
+            with ensure_clean_path(setup_path) as tmpfile:
                 gname = "foo"
 
                 # Write and read file to see if data is consistent
@@ -906,13 +899,13 @@ def test_complibs(self):
                         assert node.filters.complib == lib
                 h5table.close()
 
-    def test_put_integer(self):
+    def test_put_integer(self, setup_path):
         # non-date, non-string index
         df = DataFrame(np.random.randn(50, 100))
-        self._check_roundtrip(df, tm.assert_frame_equal)
+        self._check_roundtrip(df, tm.assert_frame_equal, setup_path)
 
     @xfail_non_writeable
-    def test_put_mixed_type(self):
+    def test_put_mixed_type(self, setup_path):
         df = tm.makeTimeDataFrame()
         df["obj1"] = "foo"
         df["obj2"] = "bar"
@@ -928,7 +921,7 @@ def test_put_mixed_type(self):
         df.loc[3:6, ["obj1"]] = np.nan
         df = df._consolidate()._convert(datetime=True)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             _maybe_remove(store, "df")
 
             # PerformanceWarning
@@ -942,9 +935,9 @@ def test_put_mixed_type(self):
     @pytest.mark.filterwarnings(
         "ignore:object name:tables.exceptions.NaturalNameWarning"
     )
-    def test_append(self):
+    def test_append(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # this is allowed by almost always don't want to do it
             # tables.NaturalNameWarning):
@@ -1010,9 +1003,9 @@ def test_append(self):
                 store.append("uints", uint_data, data_columns=["u08", "u16", "u32"])
                 tm.assert_frame_equal(store["uints"], uint_data)
 
-    def test_append_series(self):
+    def test_append_series(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # basic
             ss = tm.makeStringSeries()
@@ -1056,11 +1049,11 @@ def test_append_series(self):
             store.append("mi", s)
             tm.assert_series_equal(store["mi"], s)
 
-    def test_store_index_types(self):
+    def test_store_index_types(self, setup_path):
         # GH5386
         # test storing various index types
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             def check(format, index):
                 df = DataFrame(np.random.randn(10, 2), columns=list("AB"))
@@ -1093,9 +1086,9 @@ def check(format, index):
     @pytest.mark.skipif(
         not is_platform_little_endian(), reason="reason platform is not little endian"
     )
-    def test_encoding(self):
+    def test_encoding(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             df = DataFrame(dict(A="foo", B="bar"), index=range(5))
             df.loc[2, "A"] = np.nan
             df.loc[3, "B"] = np.nan
@@ -1122,7 +1115,7 @@ def test_encoding(self):
         ],
     )
     @pytest.mark.parametrize("dtype", ["category", object])
-    def test_latin_encoding(self, dtype, val):
+    def test_latin_encoding(self, setup_path, dtype, val):
         enc = "latin-1"
         nan_rep = ""
         key = "data"
@@ -1130,7 +1123,7 @@ def test_latin_encoding(self, dtype, val):
         val = [x.decode(enc) if isinstance(x, bytes) else x for x in val]
         ser = pd.Series(val, dtype=dtype)
 
-        with ensure_clean_path(self.path) as store:
+        with ensure_clean_path(setup_path) as store:
             ser.to_hdf(store, key, format="table", encoding=enc, nan_rep=nan_rep)
             retr = read_hdf(store, key)
 
@@ -1147,9 +1140,9 @@ def test_latin_encoding(self, dtype, val):
         # for x in examples:
         #     roundtrip(s, nan_rep=b'\xf8\xfc')
 
-    def test_append_some_nans(self):
+    def test_append_some_nans(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             df = DataFrame(
                 {
                     "A": Series(np.random.randn(20)).astype("int32"),
@@ -1193,9 +1186,9 @@ def test_append_some_nans(self):
             store.append("df3", df3[10:])
             tm.assert_frame_equal(store["df3"], df3)
 
-    def test_append_all_nans(self):
+    def test_append_all_nans(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             df = DataFrame(
                 {"A1": np.random.randn(20), "A2": np.random.randn(20)},
@@ -1283,14 +1276,14 @@ def test_append_all_nans(self):
             {"col1": [0, np.nan, 2], "col2": [1, np.nan, np.nan]}
         )
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df_with_missing.to_hdf(path, "df_with_missing", format="table")
             reloaded = read_hdf(path, "df_with_missing")
             tm.assert_frame_equal(df_with_missing, reloaded)
 
-    def test_read_missing_key_close_store(self):
+    def test_read_missing_key_close_store(self, setup_path):
         # GH 25766
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df = pd.DataFrame({"a": range(2), "b": range(2)})
             df.to_hdf(path, "k1")
 
@@ -1301,9 +1294,9 @@ def test_read_missing_key_close_store(self):
             # read with KeyError before another write
             df.to_hdf(path, "k2")
 
-    def test_append_frame_column_oriented(self):
+    def test_append_frame_column_oriented(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # column oriented
             df = tm.makeTimeDataFrame()
@@ -1325,10 +1318,10 @@ def test_append_frame_column_oriented(self):
             with pytest.raises(TypeError):
                 store.select("df1", "columns=A and index>df.index[4]")
 
-    def test_append_with_different_block_ordering(self):
+    def test_append_with_different_block_ordering(self, setup_path):
 
         # GH 4096; using same frames, but different block orderings
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             for i in range(10):
 
@@ -1351,7 +1344,7 @@ def test_append_with_different_block_ordering(self):
 
         # test a different ordering but with more fields (like invalid
         # combinate)
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             df = DataFrame(np.random.randn(10, 2), columns=list("AB"), dtype="float64")
             df["int64"] = Series([1] * len(df), dtype="int64")
@@ -1368,9 +1361,9 @@ def test_append_with_different_block_ordering(self):
             with pytest.raises(ValueError):
                 store.append("df", df)
 
-    def test_append_with_strings(self):
+    def test_append_with_strings(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             with catch_warnings(record=True):
 
                 def check_col(key, name, size):
@@ -1444,7 +1437,7 @@ def check_col(key, name, size):
                 result = store.select("df")
                 tm.assert_frame_equal(result, df)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             def check_col(key, name, size):
                 assert getattr(
@@ -1484,9 +1477,9 @@ def check_col(key, name, size):
             with pytest.raises(ValueError):
                 store.append("df", df, min_itemsize={"foo": 20, "foobar": 20})
 
-    def test_append_with_empty_string(self):
+    def test_append_with_empty_string(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # with all empty strings (GH 12242)
             df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]})
@@ -1494,9 +1487,9 @@ def test_append_with_empty_string(self):
             store.append("df", df[-1:], min_itemsize={"x": 1})
             tm.assert_frame_equal(store.select("df"), df)
 
-    def test_to_hdf_with_min_itemsize(self):
+    def test_to_hdf_with_min_itemsize(self, setup_path):
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             # min_itemsize in index with to_hdf (GH 10381)
             df = tm.makeMixedDataFrame().set_index("C")
@@ -1516,20 +1509,20 @@ def test_to_hdf_with_min_itemsize(self):
     @pytest.mark.parametrize(
         "format", [pytest.param("fixed", marks=xfail_non_writeable), "table"]
     )
-    def test_to_hdf_errors(self, format):
+    def test_to_hdf_errors(self, format, setup_path):
 
         data = ["\ud800foo"]
         ser = pd.Series(data, index=pd.Index(data))
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             # GH 20835
             ser.to_hdf(path, "table", format=format, errors="surrogatepass")
 
             result = pd.read_hdf(path, "table", errors="surrogatepass")
             tm.assert_series_equal(result, ser)
 
-    def test_append_with_data_columns(self):
+    def test_append_with_data_columns(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             df = tm.makeTimeDataFrame()
             df.iloc[0, df.columns.get_loc("B")] = 1.0
             _maybe_remove(store, "df")
@@ -1570,7 +1563,7 @@ def check_col(key, name, size):
                     == size
                 )
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             _maybe_remove(store, "df")
             store.append(
                 "df", df_new, data_columns=["string"], min_itemsize={"string": 30}
@@ -1585,7 +1578,7 @@ def check_col(key, name, size):
             )
             check_col("df", "string", 30)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             df_new["string2"] = "foobarbah"
             df_new["string_block1"] = "foobarbah1"
             df_new["string_block2"] = "foobarbah2"
@@ -1600,7 +1593,7 @@ def check_col(key, name, size):
             check_col("df", "string2", 40)
             check_col("df", "values_block_1", 50)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             # multiple data columns
             df_new = df.copy()
             df_new.iloc[0, df_new.columns.get_loc("A")] = 1.0
@@ -1633,7 +1626,7 @@ def check_col(key, name, size):
             expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")]
             tm.assert_frame_equal(result, expected, check_index_type=False)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             # doc example
             df_dc = df.copy()
             df_dc["string"] = "foo"
@@ -1657,7 +1650,7 @@ def check_col(key, name, size):
             expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]
             tm.assert_frame_equal(result, expected, check_index_type=False)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             # doc example part 2
             np.random.seed(1234)
             index = date_range("1/1/2000", periods=8)
@@ -1681,9 +1674,9 @@ def check_col(key, name, size):
             expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]
             tm.assert_frame_equal(result, expected)
 
-    def test_create_table_index(self):
+    def test_create_table_index(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             with catch_warnings(record=True):
 
@@ -1713,7 +1706,7 @@ def col(t, column):
                 with pytest.raises(TypeError):
                     store.create_table_index("f2")
 
-    def test_append_hierarchical(self):
+    def test_append_hierarchical(self, setup_path):
         index = MultiIndex(
             levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
             codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
@@ -1721,7 +1714,7 @@ def test_append_hierarchical(self):
         )
         df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("mi", df)
             result = store.select("mi")
             tm.assert_frame_equal(result, df)
@@ -1737,7 +1730,7 @@ def test_append_hierarchical(self):
             expected = df.reindex(columns=["A", "B"])
             tm.assert_frame_equal(result, expected)
 
-    def test_column_multiindex(self):
+    def test_column_multiindex(self, setup_path):
         # GH 4710
         # recreate multi-indexes properly
 
@@ -1749,7 +1742,7 @@ def test_column_multiindex(self):
         if isinstance(expected.index, RangeIndex):
             expected.index = Int64Index(expected.index)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             store.put("df", df)
             tm.assert_frame_equal(
@@ -1767,7 +1760,7 @@ def test_column_multiindex(self):
                 store.put("df3", df, format="table", data_columns=True)
 
         # appending multi-column on existing table (see GH 6167)
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("df2", df)
             store.append("df2", df)
 
@@ -1781,18 +1774,18 @@ def test_column_multiindex(self):
         if isinstance(expected.index, RangeIndex):
             expected.index = Int64Index(expected.index)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             store.put("df1", df, format="table")
             tm.assert_frame_equal(
                 store["df1"], expected, check_index_type=True, check_column_type=True
             )
 
-    def test_store_multiindex(self):
+    def test_store_multiindex(self, setup_path):
 
         # validate multi-index names
         # GH 5527
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             def make_index(names=None):
                 return MultiIndex.from_tuples(
@@ -1858,7 +1851,7 @@ def make_index(names=None):
             store.append("df", df)
             tm.assert_frame_equal(store.select("df"), df)
 
-    def test_select_columns_in_where(self):
+    def test_select_columns_in_where(self, setup_path):
 
         # GH 6169
         # recreate multi-indexes when columns is passed
@@ -1872,7 +1865,7 @@ def test_select_columns_in_where(self):
         # With a DataFrame
         df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("df", df, format="table")
             expected = df[["A"]]
 
@@ -1882,29 +1875,29 @@ def test_select_columns_in_where(self):
 
         # With a Series
         s = Series(np.random.randn(10), index=index, name="A")
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("s", s, format="table")
             tm.assert_series_equal(store.select("s", where="columns=['A']"), s)
 
-    def test_mi_data_columns(self):
+    def test_mi_data_columns(self, setup_path):
         # GH 14435
         idx = pd.MultiIndex.from_arrays(
             [date_range("2000-01-01", periods=5), range(5)], names=["date", "id"]
         )
         df = pd.DataFrame({"a": [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("df", df, data_columns=True)
 
             actual = store.select("df", where="id == 1")
             expected = df.iloc[[1], :]
             tm.assert_frame_equal(actual, expected)
 
-    def test_pass_spec_to_storer(self):
+    def test_pass_spec_to_storer(self, setup_path):
 
         df = tm.makeDataFrame()
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("df", df)
             with pytest.raises(TypeError):
                 store.select("df", columns=["A"])
@@ -1912,9 +1905,9 @@ def test_pass_spec_to_storer(self):
                 store.select("df", where=[("columns=A")])
 
     @xfail_non_writeable
-    def test_append_misc(self):
+    def test_append_misc(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             df = tm.makeDataFrame()
             store.append("df", df, chunksize=1)
             result = store.select("df")
@@ -1927,7 +1920,7 @@ def test_append_misc(self):
         # more chunksize in append tests
         def check(obj, comparator):
             for c in [10, 200, 1000]:
-                with ensure_clean_store(self.path, mode="w") as store:
+                with ensure_clean_store(setup_path, mode="w") as store:
                     store.append("obj", obj, chunksize=c)
                     result = store.select("obj")
                     comparator(result, obj)
@@ -1942,7 +1935,7 @@ def check(obj, comparator):
         check(df, tm.assert_frame_equal)
 
         # empty frame, GH4273
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # 0 len
             df_empty = DataFrame(columns=list("ABC"))
@@ -1962,9 +1955,9 @@ def check(obj, comparator):
             store.put("df2", df)
             assert_frame_equal(store.select("df2"), df)
 
-    def test_append_raise(self):
+    def test_append_raise(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # test append with invalid input to get good error messages
 
@@ -2007,18 +2000,18 @@ def test_append_raise(self):
             with pytest.raises(ValueError):
                 store.append("df", df)
 
-    def test_table_index_incompatible_dtypes(self):
+    def test_table_index_incompatible_dtypes(self, setup_path):
         df1 = DataFrame({"a": [1, 2, 3]})
         df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3))
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("frame", df1, format="table")
             with pytest.raises(TypeError):
                 store.put("frame", df2, format="table", append=True)
 
-    def test_table_values_dtypes_roundtrip(self):
+    def test_table_values_dtypes_roundtrip(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8")
             store.append("df_f8", df1)
             assert_series_equal(df1.dtypes, store["df_f8"].dtypes)
@@ -2072,7 +2065,7 @@ def test_table_values_dtypes_roundtrip(self):
             expected = expected.sort_index()
             tm.assert_series_equal(result, expected)
 
-    def test_table_mixed_dtypes(self):
+    def test_table_mixed_dtypes(self, setup_path):
 
         # frame
         df = tm.makeDataFrame()
@@ -2090,13 +2083,13 @@ def test_table_mixed_dtypes(self):
         df.loc[3:6, ["obj1"]] = np.nan
         df = df._consolidate()._convert(datetime=True)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("df1_mixed", df)
             tm.assert_frame_equal(store.select("df1_mixed"), df)
 
-    def test_unimplemented_dtypes_table_columns(self):
+    def test_unimplemented_dtypes_table_columns(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             dtypes = [("date", datetime.date(2001, 1, 2))]
 
@@ -2114,7 +2107,7 @@ def test_unimplemented_dtypes_table_columns(self):
         df["datetime1"] = datetime.date(2001, 1, 2)
         df = df._consolidate()._convert(datetime=True)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             # this fails because we have a date in the object block......
             with pytest.raises(TypeError):
                 store.append("df_unimplemented", df)
@@ -2127,7 +2120,7 @@ def test_unimplemented_dtypes_table_columns(self):
             "exactly equal to 1.15.0: gh-22098"
         ),
     )
-    def test_calendar_roundtrip_issue(self):
+    def test_calendar_roundtrip_issue(self, setup_path):
 
         # 8591
         # doc example from tseries holiday section
@@ -2145,7 +2138,7 @@ def test_calendar_roundtrip_issue(self):
 
         s = Series(dts.weekday, dts).map(Series("Mon Tue Wed Thu Fri Sat Sun".split()))
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             store.put("fixed", s)
             result = store.select("fixed")
@@ -2155,18 +2148,18 @@ def test_calendar_roundtrip_issue(self):
             result = store.select("table")
             assert_series_equal(result, s)
 
-    def test_roundtrip_tz_aware_index(self):
+    def test_roundtrip_tz_aware_index(self, setup_path):
         # GH 17618
         time = pd.Timestamp("2000-01-01 01:00:00", tz="US/Eastern")
         df = pd.DataFrame(data=[0], index=[time])
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("frame", df, format="fixed")
             recons = store["frame"]
             tm.assert_frame_equal(recons, df)
             assert recons.index[0].value == 946706400000000000
 
-    def test_append_with_timedelta(self):
+    def test_append_with_timedelta(self, setup_path):
         # GH 3577
         # append timedelta
 
@@ -2182,7 +2175,7 @@ def test_append_with_timedelta(self):
         df["C"] = df["A"] - df["B"]
         df.loc[3:5, "C"] = np.nan
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # table
             _maybe_remove(store, "df")
@@ -2215,9 +2208,9 @@ def test_append_with_timedelta(self):
             result = store.select("df2")
             assert_frame_equal(result, df)
 
-    def test_remove(self):
+    def test_remove(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             ts = tm.makeTimeSeries()
             df = tm.makeDataFrame()
@@ -2255,9 +2248,9 @@ def test_remove(self):
             del store["b"]
             assert len(store) == 0
 
-    def test_invalid_terms(self):
+    def test_invalid_terms(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             with catch_warnings(record=True):
 
@@ -2279,7 +2272,7 @@ def test_invalid_terms(self):
                     store.select("df", "index>")
 
         # from the docs
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             dfq = DataFrame(
                 np.random.randn(10, 4),
                 columns=list("ABCD"),
@@ -2294,7 +2287,7 @@ def test_invalid_terms(self):
             read_hdf(path, "dfq", where="A>0 or C>0")
 
         # catch the invalid reference
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             dfq = DataFrame(
                 np.random.randn(10, 4),
                 columns=list("ABCD"),
@@ -2305,9 +2298,9 @@ def test_invalid_terms(self):
             with pytest.raises(ValueError):
                 read_hdf(path, "dfq", where="A>0 or C>0")
 
-    def test_same_name_scoping(self):
+    def test_same_name_scoping(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             import pandas as pd
 
@@ -2331,29 +2324,31 @@ def test_same_name_scoping(self):
             result = store.select("df", "index>datetime(2013,1,5)")
             assert_frame_equal(result, expected)
 
-    def test_series(self):
+    def test_series(self, setup_path):
 
         s = tm.makeStringSeries()
-        self._check_roundtrip(s, tm.assert_series_equal)
+        self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)
 
         ts = tm.makeTimeSeries()
-        self._check_roundtrip(ts, tm.assert_series_equal)
+        self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
 
         ts2 = Series(ts.index, Index(ts.index, dtype=object))
-        self._check_roundtrip(ts2, tm.assert_series_equal)
+        self._check_roundtrip(ts2, tm.assert_series_equal, path=setup_path)
 
         ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object))
-        self._check_roundtrip(ts3, tm.assert_series_equal, check_index_type=False)
+        self._check_roundtrip(
+            ts3, tm.assert_series_equal, path=setup_path, check_index_type=False
+        )
 
-    def test_float_index(self):
+    def test_float_index(self, setup_path):
 
         # GH #454
         index = np.random.randn(10)
         s = Series(np.random.randn(10), index=index)
-        self._check_roundtrip(s, tm.assert_series_equal)
+        self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)
 
     @xfail_non_writeable
-    def test_tuple_index(self):
+    def test_tuple_index(self, setup_path):
 
         # GH #492
         col = np.arange(10)
@@ -2363,11 +2358,11 @@ def test_tuple_index(self):
 
         with catch_warnings(record=True):
             simplefilter("ignore", pd.errors.PerformanceWarning)
-            self._check_roundtrip(DF, tm.assert_frame_equal)
+            self._check_roundtrip(DF, tm.assert_frame_equal, path=setup_path)
 
     @xfail_non_writeable
     @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
-    def test_index_types(self):
+    def test_index_types(self, setup_path):
 
         with catch_warnings(record=True):
             values = np.random.randn(2)
@@ -2378,54 +2373,54 @@ def test_index_types(self):
 
         with catch_warnings(record=True):
             ser = Series(values, [0, "y"])
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
         with catch_warnings(record=True):
             ser = Series(values, [datetime.datetime.today(), 0])
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
         with catch_warnings(record=True):
             ser = Series(values, ["y", 0])
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
         with catch_warnings(record=True):
             ser = Series(values, [datetime.date.today(), "a"])
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
         with catch_warnings(record=True):
 
             ser = Series(values, [0, "y"])
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
             ser = Series(values, [datetime.datetime.today(), 0])
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
             ser = Series(values, ["y", 0])
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
             ser = Series(values, [datetime.date.today(), "a"])
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
             ser = Series(values, [1.23, "b"])
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
             ser = Series(values, [1, 1.53])
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
             ser = Series(values, [1, 5])
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
             ser = Series(
                 values, [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 1, 2)]
             )
-            self._check_roundtrip(ser, func)
+            self._check_roundtrip(ser, func, path=setup_path)
 
-    def test_timeseries_preepoch(self):
+    def test_timeseries_preepoch(self, setup_path):
 
         dr = bdate_range("1/1/1940", "1/1/1960")
         ts = Series(np.random.randn(len(dr)), index=dr)
         try:
-            self._check_roundtrip(ts, tm.assert_series_equal)
+            self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
         except OverflowError:
             pytest.skip("known failer on some windows platforms")
 
@@ -2433,7 +2428,7 @@ def test_timeseries_preepoch(self):
     @pytest.mark.parametrize(
         "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)]
     )
-    def test_frame(self, compression):
+    def test_frame(self, compression, setup_path):
 
         df = tm.makeDataFrame()
 
@@ -2441,13 +2436,19 @@ def test_frame(self, compression):
         df.values[0, 0] = np.nan
         df.values[5, 3] = np.nan
 
-        self._check_roundtrip_table(df, tm.assert_frame_equal, compression=compression)
-        self._check_roundtrip(df, tm.assert_frame_equal, compression=compression)
+        self._check_roundtrip_table(
+            df, tm.assert_frame_equal, path=setup_path, compression=compression
+        )
+        self._check_roundtrip(
+            df, tm.assert_frame_equal, path=setup_path, compression=compression
+        )
 
         tdf = tm.makeTimeDataFrame()
-        self._check_roundtrip(tdf, tm.assert_frame_equal, compression=compression)
+        self._check_roundtrip(
+            tdf, tm.assert_frame_equal, path=setup_path, compression=compression
+        )
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             # not consolidated
             df["foo"] = np.random.randn(len(df))
             store["df"] = df
@@ -2455,38 +2456,38 @@ def test_frame(self, compression):
             assert recons._data.is_consolidated()
 
         # empty
-        self._check_roundtrip(df[:0], tm.assert_frame_equal)
+        self._check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path)
 
     @xfail_non_writeable
-    def test_empty_series_frame(self):
+    def test_empty_series_frame(self, setup_path):
         s0 = Series()
         s1 = Series(name="myseries")
         df0 = DataFrame()
         df1 = DataFrame(index=["a", "b", "c"])
         df2 = DataFrame(columns=["d", "e", "f"])
 
-        self._check_roundtrip(s0, tm.assert_series_equal)
-        self._check_roundtrip(s1, tm.assert_series_equal)
-        self._check_roundtrip(df0, tm.assert_frame_equal)
-        self._check_roundtrip(df1, tm.assert_frame_equal)
-        self._check_roundtrip(df2, tm.assert_frame_equal)
+        self._check_roundtrip(s0, tm.assert_series_equal, path=setup_path)
+        self._check_roundtrip(s1, tm.assert_series_equal, path=setup_path)
+        self._check_roundtrip(df0, tm.assert_frame_equal, path=setup_path)
+        self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)
+        self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)
 
     @xfail_non_writeable
     @pytest.mark.parametrize(
         "dtype", [np.int64, np.float64, np.object, "m8[ns]", "M8[ns]"]
     )
-    def test_empty_series(self, dtype):
+    def test_empty_series(self, dtype, setup_path):
         s = Series(dtype=dtype)
-        self._check_roundtrip(s, tm.assert_series_equal)
+        self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)
 
-    def test_can_serialize_dates(self):
+    def test_can_serialize_dates(self, setup_path):
 
         rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")]
         frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
 
-        self._check_roundtrip(frame, tm.assert_frame_equal)
+        self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)
 
-    def test_store_hierarchical(self):
+    def test_store_hierarchical(self, setup_path):
         index = MultiIndex(
             levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
             codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
@@ -2494,39 +2495,39 @@ def test_store_hierarchical(self):
         )
         frame = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])
 
-        self._check_roundtrip(frame, tm.assert_frame_equal)
-        self._check_roundtrip(frame.T, tm.assert_frame_equal)
-        self._check_roundtrip(frame["A"], tm.assert_series_equal)
+        self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)
+        self._check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path)
+        self._check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path)
 
         # check that the names are stored
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store["frame"] = frame
             recons = store["frame"]
             tm.assert_frame_equal(recons, frame)
 
-    def test_store_index_name(self):
+    def test_store_index_name(self, setup_path):
         df = tm.makeDataFrame()
         df.index.name = "foo"
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store["frame"] = df
             recons = store["frame"]
             tm.assert_frame_equal(recons, df)
 
-    def test_store_index_name_with_tz(self):
+    def test_store_index_name_with_tz(self, setup_path):
         # GH 13884
         df = pd.DataFrame({"A": [1, 2]})
         df.index = pd.DatetimeIndex([1234567890123456787, 1234567890123456788])
         df.index = df.index.tz_localize("UTC")
         df.index.name = "foo"
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("frame", df, format="table")
             recons = store["frame"]
             tm.assert_frame_equal(recons, df)
 
     @pytest.mark.parametrize("table_format", ["table", "fixed"])
-    def test_store_index_name_numpy_str(self, table_format):
+    def test_store_index_name_numpy_str(self, table_format, setup_path):
         # GH #13492
         idx = pd.Index(
             pd.to_datetime([datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)]),
@@ -2539,7 +2540,7 @@ def test_store_index_name_numpy_str(self, table_format):
         df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)
 
         # This used to fail, returning numpy strings instead of python strings.
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", format=table_format)
             df2 = read_hdf(path, "df")
 
@@ -2548,11 +2549,11 @@ def test_store_index_name_numpy_str(self, table_format):
             assert type(df2.index.name) == str
             assert type(df2.columns.name) == str
 
-    def test_store_series_name(self):
+    def test_store_series_name(self, setup_path):
         df = tm.makeDataFrame()
         series = df["A"]
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store["series"] = series
             recons = store["series"]
             tm.assert_series_equal(recons, series)
@@ -2561,7 +2562,7 @@ def test_store_series_name(self):
     @pytest.mark.parametrize(
         "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)]
     )
-    def test_store_mixed(self, compression):
+    def test_store_mixed(self, compression, setup_path):
         def _make_one():
             df = tm.makeDataFrame()
             df["obj1"] = "foo"
@@ -2575,10 +2576,10 @@ def _make_one():
         df1 = _make_one()
         df2 = _make_one()
 
-        self._check_roundtrip(df1, tm.assert_frame_equal)
-        self._check_roundtrip(df2, tm.assert_frame_equal)
+        self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)
+        self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store["obj"] = df1
             tm.assert_frame_equal(store["obj"], df1)
             store["obj"] = df2
@@ -2586,25 +2587,34 @@ def _make_one():
 
         # check that can store Series of all of these types
         self._check_roundtrip(
-            df1["obj1"], tm.assert_series_equal, compression=compression
+            df1["obj1"],
+            tm.assert_series_equal,
+            path=setup_path,
+            compression=compression,
         )
         self._check_roundtrip(
-            df1["bool1"], tm.assert_series_equal, compression=compression
+            df1["bool1"],
+            tm.assert_series_equal,
+            path=setup_path,
+            compression=compression,
         )
         self._check_roundtrip(
-            df1["int1"], tm.assert_series_equal, compression=compression
+            df1["int1"],
+            tm.assert_series_equal,
+            path=setup_path,
+            compression=compression,
         )
 
     @pytest.mark.filterwarnings(
         "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning"
     )
-    def test_select_with_dups(self):
+    def test_select_with_dups(self, setup_path):
 
         # single dtypes
         df = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"])
         df.index = date_range("20130101 9:30", periods=10, freq="T")
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("df", df)
 
             result = store.select("df")
@@ -2631,7 +2641,7 @@ def test_select_with_dups(self):
         )
         df.index = date_range("20130101 9:30", periods=10, freq="T")
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("df", df)
 
             result = store.select("df")
@@ -2651,7 +2661,7 @@ def test_select_with_dups(self):
             assert_frame_equal(result, expected, by_blocks=True)
 
         # duplicates on both index and columns
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("df", df)
             store.append("df", df)
 
@@ -2660,18 +2670,18 @@ def test_select_with_dups(self):
             result = store.select("df", columns=["B", "A"])
             assert_frame_equal(result, expected, by_blocks=True)
 
-    def test_overwrite_node(self):
+    def test_overwrite_node(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store["a"] = tm.makeTimeDataFrame()
             ts = tm.makeTimeSeries()
             store["a"] = ts
 
             tm.assert_series_equal(store["a"], ts)
 
-    def test_select(self):
+    def test_select(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             with catch_warnings(record=True):
 
@@ -2709,9 +2719,9 @@ def test_select(self):
                 expected = df[df.A > 0].reindex(columns=["C", "D"])
                 tm.assert_frame_equal(expected, result)
 
-    def test_select_dtypes(self):
+    def test_select_dtypes(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             # with a Timestamp data column (GH #2637)
             df = DataFrame(
                 dict(ts=bdate_range("2012-01-01", periods=300), A=np.random.randn(300))
@@ -2767,7 +2777,7 @@ def test_select_dtypes(self):
             expected = df.reindex(index=list(df.index)[0:10], columns=["A"])
             tm.assert_frame_equal(expected, result)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # floats w/o NaN
             df = DataFrame(dict(cols=range(11), values=range(11)), dtype="float64")
@@ -2806,7 +2816,7 @@ def test_select_dtypes(self):
 
         # test selection with comparison against numpy scalar
         # GH 11283
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             df = tm.makeDataFrame()
 
             expected = df[df["A"] > 0]
@@ -2816,9 +2826,9 @@ def test_select_dtypes(self):
             result = store.select("df", where=["A>np_zero"])
             tm.assert_frame_equal(expected, result)
 
-    def test_select_with_many_inputs(self):
+    def test_select_with_many_inputs(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             df = DataFrame(
                 dict(
@@ -2869,10 +2879,10 @@ def test_select_with_many_inputs(self):
             tm.assert_frame_equal(expected, result)
             assert len(result) == 100
 
-    def test_select_iterator(self):
+    def test_select_iterator(self, setup_path):
 
         # single table
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             df = tm.makeTimeDataFrame(500)
             _maybe_remove(store, "df")
@@ -2893,7 +2903,7 @@ def test_select_iterator(self):
             result = concat(results)
             tm.assert_frame_equal(result, expected)
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             df = tm.makeTimeDataFrame(500)
             df.to_hdf(path, "df_non_table")
@@ -2904,7 +2914,7 @@ def test_select_iterator(self):
             with pytest.raises(TypeError):
                 read_hdf(path, "df_non_table", iterator=True)
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             df = tm.makeTimeDataFrame(500)
             df.to_hdf(path, "df", format="table")
@@ -2918,7 +2928,7 @@ def test_select_iterator(self):
 
         # multiple
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             df1 = tm.makeTimeDataFrame(500)
             store.append("df1", df1, data_columns=True)
@@ -2939,14 +2949,14 @@ def test_select_iterator(self):
             result = concat(results)
             tm.assert_frame_equal(expected, result)
 
-    def test_select_iterator_complete_8014(self):
+    def test_select_iterator_complete_8014(self, setup_path):
 
         # GH 8014
         # using iterator and where clause
         chunksize = 1e4
 
         # no iterator
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             expected = tm.makeTimeDataFrame(100064, "S")
             _maybe_remove(store, "df")
@@ -2980,7 +2990,7 @@ def test_select_iterator_complete_8014(self):
             tm.assert_frame_equal(expected, result)
 
         # with iterator, full range
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             expected = tm.makeTimeDataFrame(100064, "S")
             _maybe_remove(store, "df")
@@ -3014,14 +3024,14 @@ def test_select_iterator_complete_8014(self):
             result = concat(results)
             tm.assert_frame_equal(expected, result)
 
-    def test_select_iterator_non_complete_8014(self):
+    def test_select_iterator_non_complete_8014(self, setup_path):
 
         # GH 8014
         # using iterator and where clause
         chunksize = 1e4
 
         # with iterator, non complete range
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             expected = tm.makeTimeDataFrame(100064, "S")
             _maybe_remove(store, "df")
@@ -3056,7 +3066,7 @@ def test_select_iterator_non_complete_8014(self):
             tm.assert_frame_equal(rexpected, result)
 
         # with iterator, empty where
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             expected = tm.makeTimeDataFrame(100064, "S")
             _maybe_remove(store, "df")
@@ -3069,7 +3079,7 @@ def test_select_iterator_non_complete_8014(self):
             results = [s for s in store.select("df", where=where, chunksize=chunksize)]
             assert 0 == len(results)
 
-    def test_select_iterator_many_empty_frames(self):
+    def test_select_iterator_many_empty_frames(self, setup_path):
 
         # GH 8014
         # using iterator and where clause can return many empty
@@ -3077,7 +3087,7 @@ def test_select_iterator_many_empty_frames(self):
         chunksize = int(1e4)
 
         # with iterator, range limited to the first chunk
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             expected = tm.makeTimeDataFrame(100000, "S")
             _maybe_remove(store, "df")
@@ -3134,14 +3144,14 @@ def test_select_iterator_many_empty_frames(self):
     @pytest.mark.filterwarnings(
         "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"
     )
-    def test_retain_index_attributes(self):
+    def test_retain_index_attributes(self, setup_path):
 
         # GH 3499, losing frequency info on index recreation
         df = DataFrame(
             dict(A=Series(range(3), index=date_range("2000-1-1", periods=3, freq="H")))
         )
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             _maybe_remove(store, "data")
             store.put("data", df, format="table")
 
@@ -3194,8 +3204,8 @@ def test_retain_index_attributes(self):
     @pytest.mark.filterwarnings(
         "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"
     )
-    def test_retain_index_attributes2(self):
-        with ensure_clean_path(self.path) as path:
+    def test_retain_index_attributes2(self, setup_path):
+        with ensure_clean_path(setup_path) as path:
 
             with catch_warnings(record=True):
 
@@ -3232,11 +3242,11 @@ def test_retain_index_attributes2(self):
 
             assert read_hdf(path, "data").index.name is None
 
-    def test_frame_select(self):
+    def test_frame_select(self, setup_path):
 
         df = tm.makeTimeDataFrame()
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("frame", df, format="table")
             date = df.index[len(df) // 2]
 
@@ -3265,14 +3275,14 @@ def test_frame_select(self):
             # with pytest.raises(ValueError):
             #     store.select('frame', [crit1, crit2])
 
-    def test_frame_select_complex(self):
+    def test_frame_select_complex(self, setup_path):
         # select via complex criteria
 
         df = tm.makeTimeDataFrame()
         df["string"] = "foo"
         df.loc[df.index[0:4], "string"] = "bar"
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("df", df, format="table", data_columns=["string"])
 
             # empty
@@ -3317,7 +3327,7 @@ def test_frame_select_complex(self):
             expected = df.loc[df.index > df.index[3]].reindex(columns=["A", "B"])
             tm.assert_frame_equal(result, expected)
 
-    def test_frame_select_complex2(self):
+    def test_frame_select_complex2(self, setup_path):
 
         with ensure_clean_path(["parms.hdf", "hist.hdf"]) as paths:
 
@@ -3381,13 +3391,13 @@ def test_frame_select_complex2(self):
 
             store.close()
 
-    def test_invalid_filtering(self):
+    def test_invalid_filtering(self, setup_path):
 
         # can't use more than one filter (atm)
 
         df = tm.makeTimeDataFrame()
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("df", df, format="table")
 
             # not implemented
@@ -3398,9 +3408,9 @@ def test_invalid_filtering(self):
             with pytest.raises(NotImplementedError):
                 store.select("df", "columns=['A','B'] & columns=['C']")
 
-    def test_string_select(self):
+    def test_string_select(self, setup_path):
         # GH 2973
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             df = tm.makeTimeDataFrame()
 
@@ -3440,11 +3450,11 @@ def test_string_select(self):
             expected = df[df.int != 2]
             assert_frame_equal(result, expected)
 
-    def test_read_column(self):
+    def test_read_column(self, setup_path):
 
         df = tm.makeTimeDataFrame()
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             _maybe_remove(store, "df")
 
             # GH 17912
@@ -3513,10 +3523,10 @@ def test_read_column(self):
             result = store.select_column("df4", "B")
             tm.assert_series_equal(result, expected)
 
-    def test_coordinates(self):
+    def test_coordinates(self, setup_path):
         df = tm.makeTimeDataFrame()
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             _maybe_remove(store, "df")
             store.append("df", df)
@@ -3561,7 +3571,7 @@ def test_coordinates(self):
             tm.assert_frame_equal(result, expected)
 
         # pass array/mask as the coordinates
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             df = DataFrame(
                 np.random.randn(1000, 2), index=date_range("20000101", periods=1000)
@@ -3617,13 +3627,13 @@ def test_coordinates(self):
             expected = df[5:10]
             tm.assert_frame_equal(result, expected)
 
-    def test_append_to_multiple(self):
+    def test_append_to_multiple(self, setup_path):
         df1 = tm.makeTimeDataFrame()
         df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
         df2["foo"] = "bar"
         df = concat([df1, df2], axis=1)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # exceptions
             with pytest.raises(ValueError):
@@ -3647,13 +3657,13 @@ def test_append_to_multiple(self):
             expected = df[(df.A > 0) & (df.B > 0)]
             tm.assert_frame_equal(result, expected)
 
-    def test_append_to_multiple_dropna(self):
+    def test_append_to_multiple_dropna(self, setup_path):
         df1 = tm.makeTimeDataFrame()
         df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
         df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
         df = concat([df1, df2], axis=1)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # dropna=True should guarantee rows are synchronized
             store.append_to_multiple(
@@ -3667,13 +3677,13 @@ def test_append_to_multiple_dropna(self):
     @pytest.mark.xfail(
         run=False, reason="append_to_multiple_dropna_false is not raising as failed"
     )
-    def test_append_to_multiple_dropna_false(self):
+    def test_append_to_multiple_dropna_false(self, setup_path):
         df1 = tm.makeTimeDataFrame()
         df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
         df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
         df = concat([df1, df2], axis=1)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # dropna=False shouldn't synchronize row indexes
             store.append_to_multiple(
@@ -3685,13 +3695,13 @@ def test_append_to_multiple_dropna_false(self):
 
             assert not store.select("df1a").index.equals(store.select("df2a").index)
 
-    def test_select_as_multiple(self):
+    def test_select_as_multiple(self, setup_path):
 
         df1 = tm.makeTimeDataFrame()
         df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
         df2["foo"] = "bar"
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # no tables stored
             with pytest.raises(Exception):
@@ -3759,9 +3769,9 @@ def test_select_as_multiple(self):
         LooseVersion(tables.__version__) < LooseVersion("3.1.0"),
         reason=("tables version does not support fix for nan selection bug: GH 4858"),
     )
-    def test_nan_selection_bug_4858(self):
+    def test_nan_selection_bug_4858(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             df = DataFrame(dict(cols=range(6), values=range(6)), dtype="float64")
             df["cols"] = (df["cols"] + 10).apply(str)
@@ -3777,9 +3787,9 @@ def test_nan_selection_bug_4858(self):
             result = store.select("df", where="values>2.0")
             assert_frame_equal(result, expected)
 
-    def test_start_stop_table(self):
+    def test_start_stop_table(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # table
             df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
@@ -3795,10 +3805,10 @@ def test_start_stop_table(self):
             expected = df.loc[30:40, ["A"]]
             tm.assert_frame_equal(result, expected)
 
-    def test_start_stop_multiple(self):
+    def test_start_stop_multiple(self, setup_path):
 
         # GH 16209
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             df = DataFrame({"foo": [1, 2], "bar": [1, 2]})
 
@@ -3811,9 +3821,9 @@ def test_start_stop_multiple(self):
             expected = df.loc[[0], ["foo", "bar"]]
             tm.assert_frame_equal(result, expected)
 
-    def test_start_stop_fixed(self):
+    def test_start_stop_fixed(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # fixed, GH 8287
             df = DataFrame(
@@ -3851,13 +3861,13 @@ def test_start_stop_fixed(self):
             df.iloc[3:5, 1:3] = np.nan
             df.iloc[8:10, -2] = np.nan
 
-    def test_select_filter_corner(self):
+    def test_select_filter_corner(self, setup_path):
 
         df = DataFrame(np.random.randn(50, 100))
         df.index = ["{c:3d}".format(c=c) for c in df.index]
         df.columns = ["{c:3d}".format(c=c) for c in df.columns]
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("frame", df, format="table")
 
             crit = "columns=df.columns[:75]"
@@ -3868,7 +3878,7 @@ def test_select_filter_corner(self):
             result = store.select("frame", [crit])
             tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]])
 
-    def test_path_pathlib(self):
+    def test_path_pathlib(self, setup_path):
         df = tm.makeDataFrame()
 
         result = tm.round_trip_pathlib(
@@ -3877,7 +3887,7 @@ def test_path_pathlib(self):
         tm.assert_frame_equal(df, result)
 
     @pytest.mark.parametrize("start, stop", [(0, 2), (1, 2), (None, None)])
-    def test_contiguous_mixed_data_table(self, start, stop):
+    def test_contiguous_mixed_data_table(self, start, stop, setup_path):
         # GH 17021
         # ValueError when reading a contiguous mixed-data table ft. VLArray
         df = DataFrame(
@@ -3887,13 +3897,13 @@ def test_contiguous_mixed_data_table(self, start, stop):
             }
         )
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("test_dataset", df)
 
             result = store.select("test_dataset", start=start, stop=stop)
             assert_frame_equal(df[start:stop], result)
 
-    def test_path_pathlib_hdfstore(self):
+    def test_path_pathlib_hdfstore(self, setup_path):
         df = tm.makeDataFrame()
 
         def writer(path):
@@ -3907,14 +3917,14 @@ def reader(path):
         result = tm.round_trip_pathlib(writer, reader)
         tm.assert_frame_equal(df, result)
 
-    def test_pickle_path_localpath(self):
+    def test_pickle_path_localpath(self, setup_path):
         df = tm.makeDataFrame()
         result = tm.round_trip_pathlib(
             lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df")
         )
         tm.assert_frame_equal(df, result)
 
-    def test_path_localpath_hdfstore(self):
+    def test_path_localpath_hdfstore(self, setup_path):
         df = tm.makeDataFrame()
 
         def writer(path):
@@ -3928,23 +3938,25 @@ def reader(path):
         result = tm.round_trip_localpath(writer, reader)
         tm.assert_frame_equal(df, result)
 
-    def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):
+    def _check_roundtrip(self, obj, comparator, path, compression=False, **kwargs):
 
         options = {}
         if compression:
             options["complib"] = _default_compressor
 
-        with ensure_clean_store(self.path, "w", **options) as store:
+        with ensure_clean_store(path, "w", **options) as store:
             store["obj"] = obj
             retrieved = store["obj"]
             comparator(retrieved, obj, **kwargs)
 
-    def _check_double_roundtrip(self, obj, comparator, compression=False, **kwargs):
+    def _check_double_roundtrip(
+        self, obj, comparator, path, compression=False, **kwargs
+    ):
         options = {}
         if compression:
             options["complib"] = compression or _default_compressor
 
-        with ensure_clean_store(self.path, "w", **options) as store:
+        with ensure_clean_store(path, "w", **options) as store:
             store["obj"] = obj
             retrieved = store["obj"]
             comparator(retrieved, obj, **kwargs)
@@ -3952,21 +3964,21 @@ def _check_double_roundtrip(self, obj, comparator, compression=False, **kwargs):
             again = store["obj"]
             comparator(again, obj, **kwargs)
 
-    def _check_roundtrip_table(self, obj, comparator, compression=False):
+    def _check_roundtrip_table(self, obj, comparator, path, compression=False):
         options = {}
         if compression:
             options["complib"] = _default_compressor
 
-        with ensure_clean_store(self.path, "w", **options) as store:
+        with ensure_clean_store(path, "w", **options) as store:
             store.put("obj", obj, format="table")
             retrieved = store["obj"]
 
             comparator(retrieved, obj)
 
-    def test_multiple_open_close(self):
+    def test_multiple_open_close(self, setup_path):
         # gh-4409: open & close multiple times
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             df = tm.makeDataFrame()
             df.to_hdf(path, "df", mode="w", format="table")
@@ -3980,7 +3992,7 @@ def test_multiple_open_close(self):
             assert "CLOSED" in store.info()
             assert not store.is_open
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             if pytables._table_file_open_policy_is_strict:
 
@@ -4042,7 +4054,7 @@ def test_multiple_open_close(self):
                 assert not store2.is_open
 
         # ops on a closed store
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
 
             df = tm.makeDataFrame()
             df.to_hdf(path, "df", mode="w", format="table")
@@ -4086,7 +4098,7 @@ def test_multiple_open_close(self):
             with pytest.raises(ClosedFileError, match="file is not open"):
                 store.select("df")
 
-    def test_pytables_native_read(self, datapath):
+    def test_pytables_native_read(self, datapath, setup_path):
         with ensure_clean_store(
             datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r"
         ) as store:
@@ -4096,7 +4108,7 @@ def test_pytables_native_read(self, datapath):
     @pytest.mark.skipif(
         is_platform_windows(), reason="native2 read fails oddly on windows"
     )
-    def test_pytables_native2_read(self, datapath):
+    def test_pytables_native2_read(self, datapath, setup_path):
         with ensure_clean_store(
             datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r"
         ) as store:
@@ -4105,7 +4117,7 @@ def test_pytables_native2_read(self, datapath):
             assert isinstance(d1, DataFrame)
 
     @xfail_non_writeable
-    def test_legacy_table_fixed_format_read_py2(self, datapath):
+    def test_legacy_table_fixed_format_read_py2(self, datapath, setup_path):
         # GH 24510
         # legacy table with fixed format written in Python 2
         with ensure_clean_store(
@@ -4119,7 +4131,7 @@ def test_legacy_table_fixed_format_read_py2(self, datapath):
             )
             assert_frame_equal(expected, result)
 
-    def test_legacy_table_read_py2(self, datapath):
+    def test_legacy_table_read_py2(self, datapath, setup_path):
         # issue: 24925
         # legacy table written in Python 2
         with ensure_clean_store(
@@ -4130,7 +4142,7 @@ def test_legacy_table_read_py2(self, datapath):
         expected = pd.DataFrame({"a": ["a", "b"], "b": [2, 3]})
         assert_frame_equal(expected, result)
 
-    def test_copy(self):
+    def test_copy(self, setup_path):
 
         with catch_warnings(record=True):
 
@@ -4179,7 +4191,7 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs):
             df = tm.makeDataFrame()
 
             try:
-                path = create_tempfile(self.path)
+                path = create_tempfile(setup_path)
                 st = HDFStore(path)
                 st.append("df", df, data_columns=["A"])
                 st.close()
@@ -4188,17 +4200,17 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs):
             finally:
                 safe_remove(path)
 
-    def test_store_datetime_fractional_secs(self):
+    def test_store_datetime_fractional_secs(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
             series = Series([0], [dt])
             store["a"] = series
             assert store["a"].index[0] == dt
 
-    def test_tseries_indices_series(self):
+    def test_tseries_indices_series(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             idx = tm.makeDateIndex(10)
             ser = Series(np.random.randn(len(idx)), idx)
             store["a"] = ser
@@ -4217,9 +4229,9 @@ def test_tseries_indices_series(self):
             assert result.index.freq == ser.index.freq
             tm.assert_class_equal(result.index, ser.index, obj="series index")
 
-    def test_tseries_indices_frame(self):
+    def test_tseries_indices_frame(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             idx = tm.makeDateIndex(10)
             df = DataFrame(np.random.randn(len(idx), 3), index=idx)
             store["a"] = df
@@ -4238,7 +4250,7 @@ def test_tseries_indices_frame(self):
             assert result.index.freq == df.index.freq
             tm.assert_class_equal(result.index, df.index, obj="dataframe index")
 
-    def test_unicode_index(self):
+    def test_unicode_index(self, setup_path):
 
         unicode_values = ["\u03c3", "\u03c3\u03c3"]
 
@@ -4246,30 +4258,30 @@ def test_unicode_index(self):
         with catch_warnings(record=True):
             simplefilter("ignore", pd.errors.PerformanceWarning)
             s = Series(np.random.randn(len(unicode_values)), unicode_values)
-            self._check_roundtrip(s, tm.assert_series_equal)
+            self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)
 
-    def test_unicode_longer_encoded(self):
+    def test_unicode_longer_encoded(self, setup_path):
         # GH 11234
         char = "\u0394"
         df = pd.DataFrame({"A": [char]})
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("df", df, format="table", encoding="utf-8")
             result = store.get("df")
             tm.assert_frame_equal(result, df)
 
         df = pd.DataFrame({"A": ["a", char], "B": ["b", "b"]})
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.put("df", df, format="table", encoding="utf-8")
             result = store.get("df")
             tm.assert_frame_equal(result, df)
 
     @xfail_non_writeable
-    def test_store_datetime_mixed(self):
+    def test_store_datetime_mixed(self, setup_path):
 
         df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]})
         ts = tm.makeTimeSeries()
         df["d"] = ts.index[:3]
-        self._check_roundtrip(df, tm.assert_frame_equal)
+        self._check_roundtrip(df, tm.assert_frame_equal, path=setup_path)
 
     # FIXME: don't leave commented-out code
     # def test_cant_write_multiindex_table(self):
@@ -4281,14 +4293,14 @@ def test_store_datetime_mixed(self):
     #     with pytest.raises(Exception):
     #         store.put('foo', df, format='table')
 
-    def test_append_with_diff_col_name_types_raises_value_error(self):
+    def test_append_with_diff_col_name_types_raises_value_error(self, setup_path):
         df = DataFrame(np.random.randn(10, 1))
         df2 = DataFrame({"a": np.random.randn(10)})
         df3 = DataFrame({(1, 2): np.random.randn(10)})
         df4 = DataFrame({("1", 2): np.random.randn(10)})
         df5 = DataFrame({("1", 2, object): np.random.randn(10)})
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             name = "df_{}".format(tm.rands(10))
             store.append(name, df)
 
@@ -4296,7 +4308,7 @@ def test_append_with_diff_col_name_types_raises_value_error(self):
                 with pytest.raises(ValueError):
                     store.append(name, d)
 
-    def test_query_with_nested_special_character(self):
+    def test_query_with_nested_special_character(self, setup_path):
         df = DataFrame(
             {
                 "a": ["a", "a", "c", "b", "test & test", "c", "b", "e"],
@@ -4304,14 +4316,14 @@ def test_query_with_nested_special_character(self):
             }
         )
         expected = df[df.a == "test & test"]
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("test", df, format="table", data_columns=True)
             result = store.select("test", 'a = "test & test"')
         tm.assert_frame_equal(expected, result)
 
-    def test_categorical(self):
+    def test_categorical(self, setup_path):
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # Basic
             _maybe_remove(store, "s")
@@ -4429,7 +4441,7 @@ def test_categorical(self):
             ):
                 store.select("df3/meta/s/meta")
 
-    def test_categorical_conversion(self):
+    def test_categorical_conversion(self, setup_path):
 
         # GH13322
         # Check that read_hdf with categorical columns doesn't return rows if
@@ -4443,7 +4455,7 @@ def test_categorical_conversion(self):
 
         # We are expecting an empty DataFrame matching types of df
         expected = df.iloc[[], :]
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", format="table", data_columns=True)
             result = read_hdf(path, "df", where="obsids=B")
             tm.assert_frame_equal(result, expected)
@@ -4454,12 +4466,12 @@ def test_categorical_conversion(self):
 
         # We are expecting an empty DataFrame matching types of df
         expected = df.iloc[[], :]
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", format="table", data_columns=True)
             result = read_hdf(path, "df", where="obsids=B")
             tm.assert_frame_equal(result, expected)
 
-    def test_categorical_nan_only_columns(self):
+    def test_categorical_nan_only_columns(self, setup_path):
         # GH18413
         # Check that read_hdf with categorical columns with NaN-only values can
         # be read back.
@@ -4475,15 +4487,15 @@ def test_categorical_nan_only_columns(self):
         df["b"] = df.b.astype("category")
         df["d"] = df.b.astype("category")
         expected = df
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", format="table", data_columns=True)
             result = read_hdf(path, "df")
             tm.assert_frame_equal(result, expected)
 
-    def test_duplicate_column_name(self):
+    def test_duplicate_column_name(self, setup_path):
         df = DataFrame(columns=["a", "a"], data=[[0, 0]])
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             with pytest.raises(ValueError):
                 df.to_hdf(path, "df", format="fixed")
 
@@ -4494,30 +4506,30 @@ def test_duplicate_column_name(self):
             assert df.equals(other)
             assert other.equals(df)
 
-    def test_round_trip_equals(self):
+    def test_round_trip_equals(self, setup_path):
         # GH 9330
         df = DataFrame({"B": [1, 2], "A": ["x", "y"]})
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", format="table")
             other = read_hdf(path, "df")
             tm.assert_frame_equal(df, other)
             assert df.equals(other)
             assert other.equals(df)
 
-    def test_preserve_timedeltaindex_type(self):
+    def test_preserve_timedeltaindex_type(self, setup_path):
         # GH9635
         # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve
         # the type of the index.
         df = DataFrame(np.random.normal(size=(10, 5)))
         df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example")
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             store["df"] = df
             assert_frame_equal(store["df"], df)
 
-    def test_columns_multiindex_modified(self):
+    def test_columns_multiindex_modified(self, setup_path):
         # BUG: 7212
         # read_hdf store.select modified the passed columns parameters
         # when multi-indexed.
@@ -4527,7 +4539,7 @@ def test_columns_multiindex_modified(self):
         df = df.set_index(keys="E", append=True)
 
         data_columns = df.index.names + df.columns.tolist()
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(
                 path,
                 "df",
@@ -4542,7 +4554,7 @@ def test_columns_multiindex_modified(self):
             assert cols2load_original == cols2load
 
     @ignore_natural_naming_warning
-    def test_to_hdf_with_object_column_names(self):
+    def test_to_hdf_with_object_column_names(self, setup_path):
         # GH9057
         # Writing HDF5 table format should only work for string-like
         # column types
@@ -4562,7 +4574,7 @@ def test_to_hdf_with_object_column_names(self):
 
         for index in types_should_fail:
             df = DataFrame(np.random.randn(10, 2), columns=index(2))
-            with ensure_clean_path(self.path) as path:
+            with ensure_clean_path(setup_path) as path:
                 with catch_warnings(record=True):
                     msg = "cannot have non-object label DataIndexableCol"
                     with pytest.raises(ValueError, match=msg):
@@ -4570,7 +4582,7 @@ def test_to_hdf_with_object_column_names(self):
 
         for index in types_should_run:
             df = DataFrame(np.random.randn(10, 2), columns=index(2))
-            with ensure_clean_path(self.path) as path:
+            with ensure_clean_path(setup_path) as path:
                 with catch_warnings(record=True):
                     df.to_hdf(path, "df", format="table", data_columns=True)
                     result = pd.read_hdf(
@@ -4578,14 +4590,14 @@ def test_to_hdf_with_object_column_names(self):
                     )
                     assert len(result)
 
-    def test_read_hdf_open_store(self):
+    def test_read_hdf_open_store(self, setup_path):
         # GH10330
         # No check for non-string path_or-buf, and no test of open store
         df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
         df.index.name = "letters"
         df = df.set_index(keys="E", append=True)
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", mode="w")
             direct = read_hdf(path, "df")
             store = HDFStore(path, mode="r")
@@ -4594,12 +4606,12 @@ def test_read_hdf_open_store(self):
             assert store.is_open
             store.close()
 
-    def test_read_hdf_iterator(self):
+    def test_read_hdf_iterator(self, setup_path):
         df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
         df.index.name = "letters"
         df = df.set_index(keys="E", append=True)
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", mode="w", format="t")
             direct = read_hdf(path, "df")
             iterator = read_hdf(path, "df", iterator=True)
@@ -4608,10 +4620,10 @@ def test_read_hdf_iterator(self):
             tm.assert_frame_equal(direct, indirect)
             iterator.store.close()
 
-    def test_read_hdf_errors(self):
+    def test_read_hdf_errors(self, setup_path):
         df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             with pytest.raises(IOError):
                 read_hdf(path, "key")
 
@@ -4626,20 +4638,20 @@ def test_read_hdf_generic_buffer_errors(self):
         with pytest.raises(NotImplementedError):
             read_hdf(BytesIO(b""), "df")
 
-    def test_invalid_complib(self):
+    def test_invalid_complib(self, setup_path):
         df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             with pytest.raises(ValueError):
                 df.to_hdf(path, "df", complib="foolib")
 
     # GH10443
 
-    def test_read_nokey(self):
+    def test_read_nokey(self, setup_path):
         df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
 
         # Categorical dtype not supported for "fixed" format. So no need
         # to test with that dtype in the dataframe here.
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", mode="a")
             reread = read_hdf(path)
             assert_frame_equal(df, reread)
@@ -4648,11 +4660,11 @@ def test_read_nokey(self):
             with pytest.raises(ValueError):
                 read_hdf(path)
 
-    def test_read_nokey_table(self):
+    def test_read_nokey_table(self, setup_path):
         # GH13231
         df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")})
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", mode="a", format="table")
             reread = read_hdf(path)
             assert_frame_equal(df, reread)
@@ -4661,8 +4673,8 @@ def test_read_nokey_table(self):
             with pytest.raises(ValueError):
                 read_hdf(path)
 
-    def test_read_nokey_empty(self):
-        with ensure_clean_path(self.path) as path:
+    def test_read_nokey_empty(self, setup_path):
+        with ensure_clean_path(setup_path) as path:
             store = HDFStore(path)
             store.close()
 
@@ -4670,7 +4682,7 @@ def test_read_nokey_empty(self):
                 read_hdf(path)
 
     @td.skip_if_no("pathlib")
-    def test_read_from_pathlib_path(self):
+    def test_read_from_pathlib_path(self, setup_path):
 
         # GH11773
         from pathlib import Path
@@ -4678,7 +4690,7 @@ def test_read_from_pathlib_path(self):
         expected = DataFrame(
             np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")
         )
-        with ensure_clean_path(self.path) as filename:
+        with ensure_clean_path(setup_path) as filename:
             path_obj = Path(filename)
 
             expected.to_hdf(path_obj, "df", mode="a")
@@ -4687,7 +4699,7 @@ def test_read_from_pathlib_path(self):
         tm.assert_frame_equal(expected, actual)
 
     @td.skip_if_no("py.path")
-    def test_read_from_py_localpath(self):
+    def test_read_from_py_localpath(self, setup_path):
 
         # GH11773
         from py.path import local as LocalPath
@@ -4695,7 +4707,7 @@ def test_read_from_py_localpath(self):
         expected = DataFrame(
             np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")
         )
-        with ensure_clean_path(self.path) as filename:
+        with ensure_clean_path(setup_path) as filename:
             path_obj = LocalPath(filename)
 
             expected.to_hdf(path_obj, "df", mode="a")
@@ -4703,11 +4715,11 @@ def test_read_from_py_localpath(self):
 
         tm.assert_frame_equal(expected, actual)
 
-    def test_query_long_float_literal(self):
+    def test_query_long_float_literal(self, setup_path):
         # GH 14241
         df = pd.DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]})
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("test", df, format="table", data_columns=True)
 
             cutoff = 1000000000.0006
@@ -4724,7 +4736,7 @@ def test_query_long_float_literal(self):
             expected = df.loc[[1], :]
             tm.assert_frame_equal(expected, result)
 
-    def test_query_compare_column_type(self):
+    def test_query_compare_column_type(self, setup_path):
         # GH 15492
         df = pd.DataFrame(
             {
@@ -4736,7 +4748,7 @@ def test_query_compare_column_type(self):
             columns=["date", "real_date", "float", "int"],
         )
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("test", df, format="table", data_columns=True)
 
             ts = pd.Timestamp("2014-01-01")  # noqa
@@ -4773,12 +4785,12 @@ def test_query_compare_column_type(self):
                     tm.assert_frame_equal(expected, result)
 
     @pytest.mark.parametrize("format", ["fixed", "table"])
-    def test_read_hdf_series_mode_r(self, format):
+    def test_read_hdf_series_mode_r(self, format, setup_path):
         # GH 16583
         # Tests that reading a Series saved to an HDF file
         # still works if a mode='r' argument is supplied
         series = tm.makeFloatSeries()
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             series.to_hdf(path, key="data", format=format)
             result = pd.read_hdf(path, key="data", mode="r")
         tm.assert_series_equal(result, series)
@@ -4836,26 +4848,26 @@ def test_select_empty_where(self, where):
             CategoricalIndex(list("abc")),
         ],
     )
-    def test_to_hdf_multiindex_extension_dtype(self, idx):
+    def test_to_hdf_multiindex_extension_dtype(self, idx, setup_path):
         # GH 7775
         mi = MultiIndex.from_arrays([idx, idx])
         df = pd.DataFrame(0, index=mi, columns=["a"])
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             with pytest.raises(NotImplementedError, match="Saving a MultiIndex"):
                 df.to_hdf(path, "df")
 
 
-class TestHDFComplexValues(Base):
+class TestHDFComplexValues:
     # GH10447
 
-    def test_complex_fixed(self):
+    def test_complex_fixed(self, setup_path):
         df = DataFrame(
             np.random.rand(4, 5).astype(np.complex64),
             index=list("abcd"),
             columns=list("ABCDE"),
         )
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df")
             reread = read_hdf(path, "df")
             assert_frame_equal(df, reread)
@@ -4865,19 +4877,19 @@ def test_complex_fixed(self):
             index=list("abcd"),
             columns=list("ABCDE"),
         )
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df")
             reread = read_hdf(path, "df")
             assert_frame_equal(df, reread)
 
-    def test_complex_table(self):
+    def test_complex_table(self, setup_path):
         df = DataFrame(
             np.random.rand(4, 5).astype(np.complex64),
             index=list("abcd"),
             columns=list("ABCDE"),
         )
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", format="table")
             reread = read_hdf(path, "df")
             assert_frame_equal(df, reread)
@@ -4888,13 +4900,13 @@ def test_complex_table(self):
             columns=list("ABCDE"),
         )
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", format="table", mode="w")
             reread = read_hdf(path, "df")
             assert_frame_equal(df, reread)
 
     @xfail_non_writeable
-    def test_complex_mixed_fixed(self):
+    def test_complex_mixed_fixed(self, setup_path):
         complex64 = np.array(
             [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64
         )
@@ -4911,12 +4923,12 @@ def test_complex_mixed_fixed(self):
             },
             index=list("abcd"),
         )
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df")
             reread = read_hdf(path, "df")
             assert_frame_equal(df, reread)
 
-    def test_complex_mixed_table(self):
+    def test_complex_mixed_table(self, setup_path):
         complex64 = np.array(
             [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64
         )
@@ -4934,17 +4946,17 @@ def test_complex_mixed_table(self):
             index=list("abcd"),
         )
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("df", df, data_columns=["A", "B"])
             result = store.select("df", where="A>2")
             assert_frame_equal(df.loc[df.A > 2], result)
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", format="table")
             reread = read_hdf(path, "df")
             assert_frame_equal(df, reread)
 
-    def test_complex_across_dimensions_fixed(self):
+    def test_complex_across_dimensions_fixed(self, setup_path):
         with catch_warnings(record=True):
             complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
             s = Series(complex128, index=list("abcd"))
@@ -4953,12 +4965,12 @@ def test_complex_across_dimensions_fixed(self):
             objs = [s, df]
             comps = [tm.assert_series_equal, tm.assert_frame_equal]
             for obj, comp in zip(objs, comps):
-                with ensure_clean_path(self.path) as path:
+                with ensure_clean_path(setup_path) as path:
                     obj.to_hdf(path, "obj", format="fixed")
                     reread = read_hdf(path, "obj")
                     comp(obj, reread)
 
-    def test_complex_across_dimensions(self):
+    def test_complex_across_dimensions(self, setup_path):
         complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
         s = Series(complex128, index=list("abcd"))
         df = DataFrame({"A": s, "B": s})
@@ -4968,12 +4980,12 @@ def test_complex_across_dimensions(self):
             objs = [df]
             comps = [tm.assert_frame_equal]
             for obj, comp in zip(objs, comps):
-                with ensure_clean_path(self.path) as path:
+                with ensure_clean_path(setup_path) as path:
                     obj.to_hdf(path, "obj", format="table")
                     reread = read_hdf(path, "obj")
                     comp(obj, reread)
 
-    def test_complex_indexing_error(self):
+    def test_complex_indexing_error(self, setup_path):
         complex128 = np.array(
             [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
         )
@@ -4981,36 +4993,37 @@ def test_complex_indexing_error(self):
             {"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128},
             index=list("abcd"),
         )
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             with pytest.raises(TypeError):
                 store.append("df", df, data_columns=["C"])
 
-    def test_complex_series_error(self):
+    def test_complex_series_error(self, setup_path):
         complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
         s = Series(complex128, index=list("abcd"))
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             with pytest.raises(TypeError):
                 s.to_hdf(path, "obj", format="t")
 
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             s.to_hdf(path, "obj", format="t", index=False)
             reread = read_hdf(path, "obj")
             tm.assert_series_equal(s, reread)
 
-    def test_complex_append(self):
+    def test_complex_append(self, setup_path):
         df = DataFrame(
             {"a": np.random.randn(100).astype(np.complex128), "b": np.random.randn(100)}
         )
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("df", df, data_columns=["b"])
             store.append("df", df)
             result = store.select("df")
             assert_frame_equal(pd.concat([df, df], 0), result)
 
 
-class TestTimezones(Base):
+# @pytest.mark.usefixtures("setup_path")
+class TestTimezones:
     def _compare_with_tz(self, a, b):
         tm.assert_frame_equal(a, b)
 
@@ -5024,7 +5037,7 @@ def _compare_with_tz(self, a, b):
                         "invalid tz comparison [{a_e}] [{b_e}]".format(a_e=a_e, b_e=b_e)
                     )
 
-    def test_append_with_timezones_dateutil(self):
+    def test_append_with_timezones_dateutil(self, setup_path):
 
         from datetime import timedelta
 
@@ -5035,7 +5048,7 @@ def test_append_with_timezones_dateutil(self):
         gettz = lambda x: maybe_get_tz("dateutil/" + x)
 
         # as columns
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             _maybe_remove(store, "df_tz")
             df = DataFrame(
@@ -5101,7 +5114,7 @@ def test_append_with_timezones_dateutil(self):
                 store.append("df_tz", df)
 
         # as index
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # GH 4098 example
             df = DataFrame(
@@ -5125,12 +5138,12 @@ def test_append_with_timezones_dateutil(self):
             result = store.select("df")
             assert_frame_equal(result, df)
 
-    def test_append_with_timezones_pytz(self):
+    def test_append_with_timezones_pytz(self, setup_path):
 
         from datetime import timedelta
 
         # as columns
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             _maybe_remove(store, "df_tz")
             df = DataFrame(
@@ -5195,7 +5208,7 @@ def test_append_with_timezones_pytz(self):
                 store.append("df_tz", df)
 
         # as index
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             # GH 4098 example
             df = DataFrame(
@@ -5219,7 +5232,7 @@ def test_append_with_timezones_pytz(self):
             result = store.select("df")
             assert_frame_equal(result, df)
 
-    def test_tseries_select_index_column(self):
+    def test_tseries_select_index_column(self, setup_path):
         # GH7777
         # selecting a UTC datetimeindex column did
         # not preserve UTC tzinfo set before storing
@@ -5228,7 +5241,7 @@ def test_tseries_select_index_column(self):
         rng = date_range("1/1/2000", "1/30/2000")
         frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("frame", frame)
             result = store.select_column("frame", "index")
             assert rng.tz == DatetimeIndex(result.values).tz
@@ -5237,7 +5250,7 @@ def test_tseries_select_index_column(self):
         rng = date_range("1/1/2000", "1/30/2000", tz="UTC")
         frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("frame", frame)
             result = store.select_column("frame", "index")
             assert rng.tz == result.dt.tz
@@ -5246,13 +5259,13 @@ def test_tseries_select_index_column(self):
         rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
         frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store.append("frame", frame)
             result = store.select_column("frame", "index")
             assert rng.tz == result.dt.tz
 
-    def test_timezones_fixed(self):
-        with ensure_clean_store(self.path) as store:
+    def test_timezones_fixed(self, setup_path):
+        with ensure_clean_store(setup_path) as store:
 
             # index
             rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
@@ -5277,24 +5290,24 @@ def test_timezones_fixed(self):
             result = store["df"]
             assert_frame_equal(result, df)
 
-    def test_fixed_offset_tz(self):
+    def test_fixed_offset_tz(self, setup_path):
         rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")
         frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
 
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             store["frame"] = frame
             recons = store["frame"]
             tm.assert_index_equal(recons.index, rng)
             assert rng.tz == recons.index.tz
 
     @td.skip_if_windows
-    def test_store_timezone(self):
+    def test_store_timezone(self, setup_path):
         # GH2852
         # issue storing datetime.date with a timezone as it resets when read
         # back in a new timezone
 
         # original method
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             today = datetime.date(2013, 9, 10)
             df = DataFrame([1, 2, 3], index=[today, today, today])
@@ -5303,7 +5316,7 @@ def test_store_timezone(self):
             assert_frame_equal(result, df)
 
         # with tz setting
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
 
             with set_timezone("EST5EDT"):
                 today = datetime.date(2013, 9, 10)
@@ -5315,7 +5328,7 @@ def test_store_timezone(self):
 
             assert_frame_equal(result, df)
 
-    def test_legacy_datetimetz_object(self, datapath):
+    def test_legacy_datetimetz_object(self, datapath, setup_path):
         # legacy from < 0.17.0
         # 8260
         expected = DataFrame(
@@ -5331,9 +5344,9 @@ def test_legacy_datetimetz_object(self, datapath):
             result = store["df"]
             assert_frame_equal(result, expected)
 
-    def test_dst_transitions(self):
+    def test_dst_transitions(self, setup_path):
         # make sure we are not failing on transitions
-        with ensure_clean_store(self.path) as store:
+        with ensure_clean_store(setup_path) as store:
             times = pd.date_range(
                 "2013-10-26 23:00",
                 "2013-10-27 01:00",
@@ -5349,7 +5362,7 @@ def test_dst_transitions(self):
                 result = store.select("df")
                 assert_frame_equal(result, df)
 
-    def test_read_with_where_tz_aware_index(self):
+    def test_read_with_where_tz_aware_index(self, setup_path):
         # GH 11926
         periods = 10
         dts = pd.date_range("20151201", periods=periods, freq="D", tz="UTC")
@@ -5357,13 +5370,13 @@ def test_read_with_where_tz_aware_index(self):
         expected = pd.DataFrame({"MYCOL": 0}, index=mi)
 
         key = "mykey"
-        with ensure_clean_path(self.path) as path:
+        with ensure_clean_path(setup_path) as path:
             with pd.HDFStore(path) as store:
                 store.append(key, expected, format="table", append=True)
             result = pd.read_hdf(path, key, where="DATE > 20151130")
             assert_frame_equal(result, expected)
 
-    def test_py2_created_with_datetimez(self, datapath):
+    def test_py2_created_with_datetimez(self, datapath, setup_path):
         # The test HDF5 file was created in Python 2, but could not be read in
         # Python 3.
         #

From c5bc9285293e7d5075ea9d14d728bced03924e30 Mon Sep 17 00:00:00 2001
From: Josiah Baker <josiahfbaker@gmail.com>
Date: Tue, 1 Oct 2019 12:05:53 -0400
Subject: [PATCH 22/22] fix merge conflict