From c55e348f9b297dbe2b0e216cba60a81a85c55b49 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 27 Nov 2017 10:17:12 -0800 Subject: [PATCH 1/3] CLN: ASV FromDictwithTimestamp --- asv_bench/benchmarks/frame_ctor.py | 111 +++++++---------------------- 1 file changed, 26 insertions(+), 85 deletions(-) diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index 5fad7b682c2ed..9da8a483d3a09 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -2,14 +2,12 @@ import pandas.util.testing as tm from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range try: - from pandas.tseries import offsets + from pandas.tseries.offset import Nano, Hour except: + # For compatability with older versions(?) from pandas.core.datetools import * -# ---------------------------------------------------------------------- -# Creation from nested dict - class FromDicts(object): goal_time = 0.2 @@ -17,110 +15,53 @@ class FromDicts(object): def setup(self): np.random.seed(1234) N, K = 5000, 50 - self.index = tm.makeStringIndex(N) - self.columns = tm.makeStringIndex(K) - self.frame = DataFrame(np.random.randn(N, K), - index=self.index, - columns=self.columns) - self.data = self.frame.to_dict() + index = tm.makeStringIndex(N) + columns = tm.makeStringIndex(K) + frame = DataFrame(np.random.randn(N, K), index=index, columns=columns) + self.data = frame.to_dict() self.some_dict = list(self.data.values())[0] - self.dict_list = self.frame.to_dict(orient='records') + self.dict_list = frame.to_dict(orient='records') self.data2 = {i: {j: float(j) for j in range(100)} for i in range(2000)} - def time_frame_ctor_list_of_dict(self): + def time_list_of_dict(self): DataFrame(self.dict_list) - def time_frame_ctor_nested_dict(self): + def time_nested_dict(self): DataFrame(self.data) - def time_series_ctor_from_dict(self): + def time_dict(self): Series(self.some_dict) - def time_frame_ctor_nested_dict_int64(self): + def time_nested_dict_int64(self): # nested dict, integer indexes, regression described in #621 DataFrame(self.data2) -# from a mi-series - class FromSeries(object): + goal_time = 0.2 def setup(self): - self.mi = MultiIndex.from_product([range(100), range(100)]) - self.s = Series(np.random.randn(10000), index=self.mi) + mi = MultiIndex.from_product([range(100), range(100)]) + self.s = Series(np.random.randn(10000), index=mi) - def time_frame_from_mi_series(self): + def time_mi_series(self): DataFrame(self.s) -# ---------------------------------------------------------------------- -# From dict with DatetimeIndex with all offsets - -# dynamically generate benchmarks for every offset -# -# get_period_count & get_index_for_offset are there because blindly taking each -# offset times 1000 can easily go out of Timestamp bounds and raise errors. - - -def get_period_count(start_date, off): - ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (ten_offsets_in_days == 0): - return 1000 - else: - periods = 9 * (Timestamp.max - start_date).days // ten_offsets_in_days - return min(periods, 1000) - - -def get_index_for_offset(off): - start_date = Timestamp('1/1/1900') - return date_range(start_date, - periods=get_period_count(start_date, off), - freq=off) +class FromDictwithTimestamp(object): -all_offsets = offsets.__all__ -# extra cases -for off in ['FY5253', 'FY5253Quarter']: - all_offsets.pop(all_offsets.index(off)) - all_offsets.extend([off + '_1', off + '_2']) - - -class FromDictwithTimestampOffsets(object): - - params = [all_offsets, [1, 2]] - param_names = ['offset', 'n_steps'] - - offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1}, - 'LastWeekOfMonth': {'weekday': 1, 'week': 1}, - 'FY5253': {'startingMonth': 1, 'weekday': 1}, - 'FY5253Quarter': {'qtr_with_extra_week': 1, - 'startingMonth': 1, - 'weekday': 1}} - - offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']}, - 'FY5253Quarter': {'variation': ['nearest', 'last']}} + goal_time = 0.2 + params = [Nano(1), Hour(1)] + param_names = ['offset'] - def setup(self, offset, n_steps): + def setup(self, offset): + N = 10**3 np.random.seed(1234) - extra = False - if offset.endswith("_", None, -1): - extra = int(offset[-1]) - offset = offset[:-2] - - kwargs = {} - if offset in self.offset_kwargs: - kwargs = self.offset_kwargs[offset] - - if extra: - extras = self.offset_extra_cases[offset] - for extra_arg in extras: - kwargs[extra_arg] = extras[extra_arg][extra - 1] - - offset = getattr(offsets, offset) - self.idx = get_index_for_offset(offset(n_steps, **kwargs)) - self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) - self.d = self.df.to_dict() - - def time_frame_ctor(self, offset, n_steps): + idx = date_range(Timestamp('1/1/1900'), freq=offset, periods=N) + df = DataFrame(np.random.randn(N, 10), index=idx) + self.d = df.to_dict() + + def time_dict_with_timestamp_offsets(self, offset): DataFrame(self.d) From 47197282b4cb2856dc4c1133cf1a063607fe105e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 27 Nov 2017 10:25:45 -0800 Subject: [PATCH 2/3] Import error --- asv_bench/benchmarks/frame_ctor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index 9da8a483d3a09..20e3a5043ae82 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -2,8 +2,8 @@ import pandas.util.testing as tm from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range try: - from pandas.tseries.offset import Nano, Hour -except: + from pandas.tseries.offsets import Nano, Hour +except ImportError: # For compatability with older versions(?) from pandas.core.datetools import * From 4de542f6955ada2f63ade3de81db2627397ee630 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 10 Dec 2017 16:02:20 +0100 Subject: [PATCH 3/3] Update frame_ctor.py --- asv_bench/benchmarks/frame_ctor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index c82c5d4959c56..6761d48d25919 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -4,7 +4,7 @@ try: from pandas.tseries.offsets import Nano, Hour except ImportError: - # For compatability with older versions(?) + # For compatability with older versions from pandas.core.datetools import * # noqa from .pandas_vb_common import setup # noqa