pandas-dev · jreback · Mar 1, 2021 · Feb 19, 2021 · Feb 27, 2021
diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
@@ -2,8 +2,6 @@
 
 import numpy as np
 
-from pandas._libs import lib
-
 import pandas as pd
 
 from .pandas_vb_common import tm
@@ -16,19 +14,6 @@
         pass
 
 
-class MaybeConvertObjects:
-    def setup(self):
-        N = 10 ** 5
-
-        data = list(range(N))
-        data[0] = pd.NaT
-        data = np.array(data)
-        self.data = data
-
-    def time_maybe_convert_objects(self):
-        lib.maybe_convert_objects(self.data)
-
-
 class Factorize:
 
     params = [

diff --git a/asv_bench/benchmarks/attrs_caching.py b/asv_bench/benchmarks/attrs_caching.py
@@ -3,11 +3,6 @@
 import pandas as pd
 from pandas import DataFrame
 
-try:
-    from pandas.util import cache_readonly
-except ImportError:
-    from pandas.util.decorators import cache_readonly
-
 try:
     from pandas.core.construction import extract_array
 except ImportError:
@@ -53,17 +48,4 @@ def time_extract_array_numpy(self, dtype):
         extract_array(self.series, extract_numpy=True)
 
 
-class CacheReadonly:
-    def setup(self):
-        class Foo:
-            @cache_readonly
-            def prop(self):
-                return 5
-
-        self.obj = Foo()
-
-    def time_cache_readonly(self):
-        self.obj.prop
-
-
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
@@ -13,7 +13,6 @@
 from .pandas_vb_common import (
     datetime_dtypes,
     extension_dtypes,
-    lib,
     numeric_dtypes,
     string_dtypes,
 )
@@ -49,27 +48,6 @@ def time_pandas_dtype_invalid(self, dtype):
             pass
 
 
-class InferDtypes:
-    param_names = ["dtype"]
-    data_dict = {
-        "np-object": np.array([1] * 100000, dtype="O"),
-        "py-object": [1] * 100000,
-        "np-null": np.array([1] * 50000 + [np.nan] * 50000),
-        "py-null": [1] * 50000 + [None] * 50000,
-        "np-int": np.array([1] * 100000, dtype=int),
-        "np-floating": np.array([1.0] * 100000, dtype=float),
-        "empty": [],
-        "bytes": [b"a"] * 100000,
-    }
-    params = list(data_dict.keys())
-
-    def time_infer_skipna(self, dtype):
-        lib.infer_dtype(self.data_dict[dtype], skipna=True)
-
-    def time_infer(self, dtype):
-        lib.infer_dtype(self.data_dict[dtype], skipna=False)
-
-
 class SelectDtypes:
 
     params = [

diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py
@@ -125,6 +125,7 @@ def time_take1d(self, dtype):
 
 
 class ParallelKth:
+    # This depends exclusively on code in _libs/, could go in libs.py
 
     number = 1
     repeat = 5

diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py
@@ -1,3 +1,10 @@
+"""
+Benchmarks in this fiel depend exclusively on code in _libs/
+
+If a PR does not edit anything in _libs, it is very unlikely that benchmarks
+in this file will be affected.
+"""
+
 import numpy as np
 
 from pandas._libs import index as libindex

diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py
@@ -1,8 +1,20 @@
+"""
+The functions benchmarked in this file depend _almost_ exclusively on
+_libs, but not in a way that is easy to formalize.
+
+If a PR does not change anything in pandas/_libs/ or pandas/core/tools/, then
+it is likely that these benchmarks will be unaffected.
+"""
+
 import numpy as np
 
 from pandas import (
+    NaT,
     Series,
+    date_range,
+    to_datetime,
     to_numeric,
+    to_timedelta,
 )
 
 from .pandas_vb_common import (
@@ -69,6 +81,9 @@ def time_downcast(self, dtype, downcast):
 
 
 class MaybeConvertNumeric:
+    # maybe_convert_numeric depends _exclusively_ on _libs, could
+    #  go in benchmarks/libs.py
+
     def setup_cache(self):
         N = 10 ** 6
         arr = np.repeat([2 ** 63], N) + np.arange(N).astype("uint64")
@@ -81,4 +96,205 @@ def time_convert(self, data):
         lib.maybe_convert_numeric(data, set(), coerce_numeric=False)
 
 
+class MaybeConvertObjects:
+    # maybe_convert_objects depends _almost_ exclusively on _libs, but
+    #  does have some run-time imports from outside of _libs
+
+    def setup(self):
+        N = 10 ** 5
+
+        data = list(range(N))
+        data[0] = NaT
+        data = np.array(data)
+        self.data = data
+
+    def time_maybe_convert_objects(self):
+        lib.maybe_convert_objects(self.data)
+
+
+class ToDatetimeFromIntsFloats:
+    def setup(self):
+        self.ts_sec = Series(range(1521080307, 1521685107), dtype="int64")
+        self.ts_sec_float = self.ts_sec.astype("float64")
+
+        self.ts_nanosec = 1_000_000 * self.ts_sec
+        self.ts_nanosec_float = self.ts_nanosec.astype("float64")
+
+    # speed of int64 and float64 paths should be comparable
+
+    def time_nanosec_int64(self):
+        to_datetime(self.ts_nanosec, unit="ns")
+
+    def time_nanosec_float64(self):
+        to_datetime(self.ts_nanosec_float, unit="ns")
+
+    def time_sec_int64(self):
+        to_datetime(self.ts_sec, unit="s")
+
+    def time_sec_float64(self):
+        to_datetime(self.ts_sec_float, unit="s")
+
+
+class ToDatetimeYYYYMMDD:
+    def setup(self):
+        rng = date_range(start="1/1/2000", periods=10000, freq="D")
+        self.stringsD = Series(rng.strftime("%Y%m%d"))
+
+    def time_format_YYYYMMDD(self):
+        to_datetime(self.stringsD, format="%Y%m%d")
+
+
+class ToDatetimeCacheSmallCount:
+
+    params = ([True, False], [50, 500, 5000, 100000])
+    param_names = ["cache", "count"]
+
+    def setup(self, cache, count):
+        rng = date_range(start="1/1/1971", periods=count)
+        self.unique_date_strings = rng.strftime("%Y-%m-%d").tolist()
+
+    def time_unique_date_strings(self, cache, count):
+        to_datetime(self.unique_date_strings, cache=cache)
+
+
+class ToDatetimeISO8601:
+    def setup(self):
+        rng = date_range(start="1/1/2000", periods=20000, freq="H")
+        self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
+        self.strings_nosep = rng.strftime("%Y%m%d %H:%M:%S").tolist()
+        self.strings_tz_space = [
+            x.strftime("%Y-%m-%d %H:%M:%S") + " -0800" for x in rng
+        ]
+
+    def time_iso8601(self):
+        to_datetime(self.strings)
+
+    def time_iso8601_nosep(self):
+        to_datetime(self.strings_nosep)
+
+    def time_iso8601_format(self):
+        to_datetime(self.strings, format="%Y-%m-%d %H:%M:%S")
+
+    def time_iso8601_format_no_sep(self):
+        to_datetime(self.strings_nosep, format="%Y%m%d %H:%M:%S")
+
+    def time_iso8601_tz_spaceformat(self):
+        to_datetime(self.strings_tz_space)
+
+
+class ToDatetimeNONISO8601:
+    def setup(self):
+        N = 10000
+        half = N // 2
+        ts_string_1 = "March 1, 2018 12:00:00+0400"
+        ts_string_2 = "March 1, 2018 12:00:00+0500"
+        self.same_offset = [ts_string_1] * N
+        self.diff_offset = [ts_string_1] * half + [ts_string_2] * half
+
+    def time_same_offset(self):
+        to_datetime(self.same_offset)
+
+    def time_different_offset(self):
+        to_datetime(self.diff_offset)
+
+
+class ToDatetimeFormatQuarters:
+    def setup(self):
+        self.s = Series(["2Q2005", "2Q05", "2005Q1", "05Q1"] * 10000)
+
+    def time_infer_quarter(self):
+        to_datetime(self.s)
+
+
+class ToDatetimeFormat:
+    def setup(self):
+        N = 100000
+        self.s = Series(["19MAY11", "19MAY11:00:00:00"] * N)
+        self.s2 = self.s.str.replace(":\\S+$", "")
+
+        self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N
+        self.diff_offset = [
+            f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10)
+        ] * (N // 10)
+
+    def time_exact(self):
+        to_datetime(self.s2, format="%d%b%y")
+
+    def time_no_exact(self):
+        to_datetime(self.s, format="%d%b%y", exact=False)
+
+    def time_same_offset(self):
+        to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
+
+    def time_different_offset(self):
+        to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
+
+    def time_same_offset_to_utc(self):
+        to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
+
+    def time_different_offset_to_utc(self):
+        to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
+
+
+class ToDatetimeCache:
+
+    params = [True, False]
+    param_names = ["cache"]
+
+    def setup(self, cache):
+        N = 10000
+        self.unique_numeric_seconds = list(range(N))
+        self.dup_numeric_seconds = [1000] * N
+        self.dup_string_dates = ["2000-02-11"] * N
+        self.dup_string_with_tz = ["2000-02-11 15:00:00-0800"] * N
+
+    def time_unique_seconds_and_unit(self, cache):
+        to_datetime(self.unique_numeric_seconds, unit="s", cache=cache)
+
+    def time_dup_seconds_and_unit(self, cache):
+        to_datetime(self.dup_numeric_seconds, unit="s", cache=cache)
+
+    def time_dup_string_dates(self, cache):
+        to_datetime(self.dup_string_dates, cache=cache)
+
+    def time_dup_string_dates_and_format(self, cache):
+        to_datetime(self.dup_string_dates, format="%Y-%m-%d", cache=cache)
+
+    def time_dup_string_tzoffset_dates(self, cache):
+        to_datetime(self.dup_string_with_tz, cache=cache)
+
+
+class ToTimedelta:
+    def setup(self):
+        self.ints = np.random.randint(0, 60, size=10000)
+        self.str_days = []
+        self.str_seconds = []
+        for i in self.ints:
+            self.str_days.append(f"{i} days")
+            self.str_seconds.append(f"00:00:{i:02d}")
+
+    def time_convert_int(self):
+        to_timedelta(self.ints, unit="s")
+
+    def time_convert_string_days(self):
+        to_timedelta(self.str_days)
+
+    def time_convert_string_seconds(self):
+        to_timedelta(self.str_seconds)
+
+
+class ToTimedeltaErrors:
+
+    params = ["coerce", "ignore"]
+    param_names = ["errors"]
+
+    def setup(self, errors):
+        ints = np.random.randint(0, 60, size=10000)
+        self.arr = [f"{i} days" for i in ints]
+        self.arr[-1] = "apple"
+
+    def time_convert(self, errors):
+        to_timedelta(self.arr, errors=errors)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip