From 837eb71031ec9041aa94c073d3ffb51a511a11bd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 19 Feb 2020 10:22:54 -0800 Subject: [PATCH 1/4] Rename binary_ops->arithmetic --- asv_bench/benchmarks/{binary_ops.py => arithmetic.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename asv_bench/benchmarks/{binary_ops.py => arithmetic.py} (100%) diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/arithmetic.py similarity index 100% rename from asv_bench/benchmarks/binary_ops.py rename to asv_bench/benchmarks/arithmetic.py From aba30f1f06bccfe797c448729689a31e9060ae59 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 19 Feb 2020 10:30:02 -0800 Subject: [PATCH 2/4] REF: collect arithmetic benchmarks --- asv_bench/benchmarks/arithmetic.py | 146 +++++++++++++++++++++++++++ asv_bench/benchmarks/categoricals.py | 12 --- asv_bench/benchmarks/index_object.py | 26 ----- asv_bench/benchmarks/inference.py | 49 +-------- asv_bench/benchmarks/offset.py | 80 --------------- 5 files changed, 148 insertions(+), 165 deletions(-) delete mode 100644 asv_bench/benchmarks/offset.py diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 64e067d25a454..c807d3fc70af9 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -1,14 +1,23 @@ import operator +import warnings import numpy as np +import pandas as pd from pandas import DataFrame, Series, date_range +import pandas._testing as tm from pandas.core.algorithms import checked_add_with_arr +from .pandas_vb_common import numeric_dtypes + try: import pandas.core.computation.expressions as expr except ImportError: import pandas.computation.expressions as expr +try: + import pandas.tseries.holiday +except ImportError: + pass class IntFrameWithScalar: @@ -151,6 +160,89 @@ def time_timestamp_ops_diff_with_shift(self, tz): self.s - self.s.shift() +class CategoricalComparisons: + params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"] + param_names = ["op"] + + def setup(self, op): + N = 10 ** 5 + self.cat = pd.Categorical(list("aabbcd") * N, ordered=True) + + def time_categorical_op(self, op): + getattr(self.cat, op)("b") + + +class IndexArithmetic: + + params = ["float", "int"] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10 ** 6 + indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"} + self.index = getattr(tm, indexes[dtype])(N) + + def time_add(self, dtype): + self.index + 2 + + def time_subtract(self, dtype): + self.index - 2 + + def time_multiply(self, dtype): + self.index * 2 + + def time_divide(self, dtype): + self.index / 2 + + def time_modulo(self, dtype): + self.index % 2 + + +class NumericInferOps: + # from GH 7332 + params = numeric_dtypes + param_names = ["dtype"] + + def setup(self, dtype): + N = 5 * 10 ** 5 + self.df = DataFrame( + {"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)} + ) + + def time_add(self, dtype): + self.df["A"] + self.df["B"] + + def time_subtract(self, dtype): + self.df["A"] - self.df["B"] + + def time_multiply(self, dtype): + self.df["A"] * self.df["B"] + + def time_divide(self, dtype): + self.df["A"] / self.df["B"] + + def time_modulo(self, dtype): + self.df["A"] % self.df["B"] + + +class DateInferOps: + # from GH 7332 + def setup_cache(self): + N = 5 * 10 ** 5 + df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")}) + df["timedelta"] = df["datetime64"] - df["datetime64"] + return df + + def time_subtract_datetimes(self, df): + df["datetime64"] - df["datetime64"] + + def time_timedelta_plus_datetime(self, df): + df["timedelta"] + df["datetime64"] + + def time_add_timedeltas(self, df): + df["timedelta"] + df["timedelta"] + + class AddOverflowScalar: params = [1, -1, 0] @@ -188,4 +280,58 @@ def time_add_overflow_both_arg_nan(self): ) +hcal = pd.tseries.holiday.USFederalHolidayCalendar() +# These offsets currently raise a NotImplimentedError with .apply_index() +non_apply = [ + pd.offsets.Day(), + pd.offsets.BYearEnd(), + pd.offsets.BYearBegin(), + pd.offsets.BQuarterEnd(), + pd.offsets.BQuarterBegin(), + pd.offsets.BMonthEnd(), + pd.offsets.BMonthBegin(), + pd.offsets.CustomBusinessDay(), + pd.offsets.CustomBusinessDay(calendar=hcal), + pd.offsets.CustomBusinessMonthBegin(calendar=hcal), + pd.offsets.CustomBusinessMonthEnd(calendar=hcal), + pd.offsets.CustomBusinessMonthEnd(calendar=hcal), +] +other_offsets = [ + pd.offsets.YearEnd(), + pd.offsets.YearBegin(), + pd.offsets.QuarterEnd(), + pd.offsets.QuarterBegin(), + pd.offsets.MonthEnd(), + pd.offsets.MonthBegin(), + pd.offsets.DateOffset(months=2, days=2), + pd.offsets.BusinessDay(), + pd.offsets.SemiMonthEnd(), + pd.offsets.SemiMonthBegin(), +] +offsets = non_apply + other_offsets + + +class OffsetArrayArithmetic: + + params = offsets + param_names = ["offset"] + + def setup(self, offset): + N = 10000 + rng = pd.date_range(start="1/1/2000", periods=N, freq="T") + self.rng = rng + self.ser = pd.Series(rng) + + def time_add_series_offset(self, offset): + with warnings.catch_warnings(record=True): + self.ser + offset + + def time_add_dti_offset(self, offset): + with warnings.catch_warnings(record=True): + self.rng + offset + + def time_apply_index(self, offset): + offset.apply_index(self.rng) + + from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 1dcd52ac074a6..6f43a6fd3fc9b 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -63,18 +63,6 @@ def time_existing_series(self): pd.Categorical(self.series) -class CategoricalOps: - params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"] - param_names = ["op"] - - def setup(self, op): - N = 10 ** 5 - self.cat = pd.Categorical(list("aabbcd") * N, ordered=True) - - def time_categorical_op(self, op): - getattr(self.cat, op)("b") - - class Concat: def setup(self): N = 10 ** 5 diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index 103141545504b..cf51a4d35f805 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -63,32 +63,6 @@ def time_is_dates_only(self): self.dr._is_dates_only -class Ops: - - params = ["float", "int"] - param_names = ["dtype"] - - def setup(self, dtype): - N = 10 ** 6 - indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"} - self.index = getattr(tm, indexes[dtype])(N) - - def time_add(self, dtype): - self.index + 2 - - def time_subtract(self, dtype): - self.index - 2 - - def time_multiply(self, dtype): - self.index * 2 - - def time_divide(self, dtype): - self.index / 2 - - def time_modulo(self, dtype): - self.index % 2 - - class Range: def setup(self): self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3) diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 1a8d5ede52512..40b064229ae49 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -1,53 +1,8 @@ import numpy as np -from pandas import DataFrame, Series, to_numeric +from pandas import Series, to_numeric -from .pandas_vb_common import lib, numeric_dtypes, tm - - -class NumericInferOps: - # from GH 7332 - params = numeric_dtypes - param_names = ["dtype"] - - def setup(self, dtype): - N = 5 * 10 ** 5 - self.df = DataFrame( - {"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)} - ) - - def time_add(self, dtype): - self.df["A"] + self.df["B"] - - def time_subtract(self, dtype): - self.df["A"] - self.df["B"] - - def time_multiply(self, dtype): - self.df["A"] * self.df["B"] - - def time_divide(self, dtype): - self.df["A"] / self.df["B"] - - def time_modulo(self, dtype): - self.df["A"] % self.df["B"] - - -class DateInferOps: - # from GH 7332 - def setup_cache(self): - N = 5 * 10 ** 5 - df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")}) - df["timedelta"] = df["datetime64"] - df["datetime64"] - return df - - def time_subtract_datetimes(self, df): - df["datetime64"] - df["datetime64"] - - def time_timedelta_plus_datetime(self, df): - df["timedelta"] + df["datetime64"] - - def time_add_timedeltas(self, df): - df["timedelta"] + df["timedelta"] +from .pandas_vb_common import lib, tm class ToNumeric: diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py deleted file mode 100644 index 77ce1b2763bce..0000000000000 --- a/asv_bench/benchmarks/offset.py +++ /dev/null @@ -1,80 +0,0 @@ -import warnings - -import pandas as pd - -try: - import pandas.tseries.holiday -except ImportError: - pass - -hcal = pd.tseries.holiday.USFederalHolidayCalendar() -# These offsets currently raise a NotImplimentedError with .apply_index() -non_apply = [ - pd.offsets.Day(), - pd.offsets.BYearEnd(), - pd.offsets.BYearBegin(), - pd.offsets.BQuarterEnd(), - pd.offsets.BQuarterBegin(), - pd.offsets.BMonthEnd(), - pd.offsets.BMonthBegin(), - pd.offsets.CustomBusinessDay(), - pd.offsets.CustomBusinessDay(calendar=hcal), - pd.offsets.CustomBusinessMonthBegin(calendar=hcal), - pd.offsets.CustomBusinessMonthEnd(calendar=hcal), - pd.offsets.CustomBusinessMonthEnd(calendar=hcal), -] -other_offsets = [ - pd.offsets.YearEnd(), - pd.offsets.YearBegin(), - pd.offsets.QuarterEnd(), - pd.offsets.QuarterBegin(), - pd.offsets.MonthEnd(), - pd.offsets.MonthBegin(), - pd.offsets.DateOffset(months=2, days=2), - pd.offsets.BusinessDay(), - pd.offsets.SemiMonthEnd(), - pd.offsets.SemiMonthBegin(), -] -offsets = non_apply + other_offsets - - -class ApplyIndex: - - params = other_offsets - param_names = ["offset"] - - def setup(self, offset): - N = 10000 - self.rng = pd.date_range(start="1/1/2000", periods=N, freq="T") - - def time_apply_index(self, offset): - offset.apply_index(self.rng) - - -class OffsetSeriesArithmetic: - - params = offsets - param_names = ["offset"] - - def setup(self, offset): - N = 1000 - rng = pd.date_range(start="1/1/2000", periods=N, freq="T") - self.data = pd.Series(rng) - - def time_add_offset(self, offset): - with warnings.catch_warnings(record=True): - self.data + offset - - -class OffsetDatetimeIndexArithmetic: - - params = offsets - param_names = ["offset"] - - def setup(self, offset): - N = 1000 - self.data = pd.date_range(start="1/1/2000", periods=N, freq="T") - - def time_add_offset(self, offset): - with warnings.catch_warnings(record=True): - self.data + offset From d13ed041463977e2863d4cbb8060456d17537129 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 19 Feb 2020 10:33:18 -0800 Subject: [PATCH 3/4] REF: collect arithmetic asvs --- asv_bench/benchmarks/arithmetic.py | 23 ++++++++++++++++++++++- asv_bench/benchmarks/timedelta.py | 11 +---------- asv_bench/benchmarks/timeseries.py | 12 ------------ 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index c807d3fc70af9..978d5d5700d10 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import DataFrame, Series, Timestamp, date_range, to_timedelta import pandas._testing as tm from pandas.core.algorithms import checked_add_with_arr @@ -160,6 +160,27 @@ def time_timestamp_ops_diff_with_shift(self, tz): self.s - self.s.shift() +class IrregularOps: + def setup(self): + N = 10 ** 5 + idx = date_range(start="1/1/2000", periods=N, freq="s") + s = Series(np.random.randn(N), index=idx) + self.left = s.sample(frac=1) + self.right = s.sample(frac=1) + + def time_add(self): + self.left + self.right + + +class TimedeltaOps: + def setup(self): + self.td = to_timedelta(np.arange(1000000)) + self.ts = Timestamp("2000") + + def time_add_td_ts(self): + self.td + self.ts + + class CategoricalComparisons: params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"] param_names = ["op"] diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 37418d752f833..208c8f9d14a5e 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -5,7 +5,7 @@ import numpy as np -from pandas import DataFrame, Series, Timestamp, timedelta_range, to_timedelta +from pandas import DataFrame, Series, timedelta_range, to_timedelta class ToTimedelta: @@ -41,15 +41,6 @@ def time_convert(self, errors): to_timedelta(self.arr, errors=errors) -class TimedeltaOps: - def setup(self): - self.td = to_timedelta(np.arange(1000000)) - self.ts = Timestamp("2000") - - def time_add_td_ts(self): - self.td + self.ts - - class DatetimeAccessor: def setup_cache(self): N = 100000 diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index ba0b51922fd31..2f7ea8b9c0873 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -262,18 +262,6 @@ def time_get_slice(self, monotonic): self.s[:10000] -class IrregularOps: - def setup(self): - N = 10 ** 5 - idx = date_range(start="1/1/2000", periods=N, freq="s") - s = Series(np.random.randn(N), index=idx) - self.left = s.sample(frac=1) - self.right = s.sample(frac=1) - - def time_add(self): - self.left + self.right - - class Lookup: def setup(self): N = 1500000 From 84a44af9bf8ea572861b04721dd94e41838ea6a3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 19 Feb 2020 12:43:20 -0800 Subject: [PATCH 4/4] dont call apply_index for all offsets --- asv_bench/benchmarks/arithmetic.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 978d5d5700d10..d1e94f62967f4 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -351,6 +351,16 @@ def time_add_dti_offset(self, offset): with warnings.catch_warnings(record=True): self.rng + offset + +class ApplyIndex: + params = other_offsets + param_names = ["offset"] + + def setup(self, offset): + N = 10000 + rng = pd.date_range(start="1/1/2000", periods=N, freq="T") + self.rng = rng + def time_apply_index(self, offset): offset.apply_index(self.rng)