From 30c43482e0c91e90d8a6ecb02ddc84c246854a4b Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Jul 2020 14:18:33 -0700 Subject: [PATCH 01/24] REF: standardize tz_convert_single usage --- pandas/_libs/tslibs/timestamps.pyx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e104b722ea119..426e6e0104f89 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1379,7 +1379,7 @@ default 'raise' cdef: npy_datetimestruct dts - int64_t value, value_tz + int64_t value object k, v datetime ts_input tzinfo_type tzobj @@ -1388,8 +1388,7 @@ default 'raise' tzobj = self.tzinfo value = self.value if tzobj is not None: - value_tz = tz_convert_single(value, tzobj, UTC) - value += value - value_tz + value = tz_convert_single(value, UTC, tzobj) # setup components dt64_to_dtstruct(value, &dts) From 02517fc8362879b107d985199981e5377eb30d4e Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Jul 2020 15:42:00 -0700 Subject: [PATCH 02/24] CLN: tz_convert is always from UTC --- pandas/_libs/tslibs/tzconversion.pyx | 5 ++-- pandas/tests/tslibs/test_conversion.py | 33 +++++++++++++++++--------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index d1d6bc40ef288..dc01210f2789f 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -388,8 +388,9 @@ def tz_convert(int64_t[:] vals, tzinfo tz1, tzinfo tz2): bint to_utc = is_utc(tz2) tzinfo tz - # See GH#17734 We should always be converting either from UTC or to UTC - assert is_utc(tz1) or to_utc + # See GH#17734 We should always be converting from UTC; otherwise + # should use tz_localize_to_utc. + assert is_utc(tz1) if len(vals) == 0: return np.array([], dtype=np.int64) diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index 3a7e06fb14a5f..5a16fea47e90d 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -20,33 +20,47 @@ def f(x): tm.assert_numpy_array_equal(result, expected) -def _compare_local_to_utc(tz_didx, utc_didx): +def _compare_local_to_utc(tz_didx, naive_didx): + # Check that tz_localize behaves the same vectorized and pointwise. def f(x): return tzconversion.tz_convert_single(x, tz_didx.tz, UTC) - result = tzconversion.tz_convert(utc_didx.asi8, tz_didx.tz, UTC) - expected = np.vectorize(f)(utc_didx.asi8) + err1 = err2 = None + try: + result = tzconversion.tz_localize_to_utc(naive_didx.asi8, tz_didx.tz) + err1 = None + except Exception as err: + err1 = err - tm.assert_numpy_array_equal(result, expected) + try: + expected = naive_didx.map(lambda x: x.tz_localize(tz_didx.tz)).asi8 + except Exception as err: + err2 = err + + if err1 is not None: + assert type(err1) == type(err2) + else: + assert err2 is None + tm.assert_numpy_array_equal(result, expected) def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture): tz = tz_aware_fixture tz_didx = date_range("2014-03-01", "2015-01-10", freq="H", tz=tz) - utc_didx = date_range("2014-03-01", "2015-01-10", freq="H") + naive_didx = date_range("2014-03-01", "2015-01-10", freq="H") _compare_utc_to_local(tz_didx) - _compare_local_to_utc(tz_didx, utc_didx) + _compare_local_to_utc(tz_didx, naive_didx) @pytest.mark.parametrize("freq", ["D", "A"]) def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq): tz = tz_aware_fixture tz_didx = date_range("2000-01-01", "2020-01-01", freq=freq, tz=tz) - utc_didx = date_range("2000-01-01", "2020-01-01", freq=freq) + naive_didx = date_range("2000-01-01", "2020-01-01", freq=freq) _compare_utc_to_local(tz_didx) - _compare_local_to_utc(tz_didx, utc_didx) + _compare_local_to_utc(tz_didx, naive_didx) @pytest.mark.parametrize( @@ -57,9 +71,6 @@ def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq): ], ) def test_tz_convert_corner(arr): - result = tzconversion.tz_convert(arr, timezones.maybe_get_tz("US/Eastern"), UTC) - tm.assert_numpy_array_equal(result, arr) - result = tzconversion.tz_convert(arr, UTC, timezones.maybe_get_tz("Asia/Tokyo")) tm.assert_numpy_array_equal(result, arr) From 830c4bdf8e7dbe42c8d58d98d0dfa26b431c7ac6 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Jul 2020 14:17:43 -0700 Subject: [PATCH 03/24] PERF: avoid copy in tz_convert dst cases --- pandas/_libs/tslibs/tzconversion.pyx | 55 +++++++++++++++++++--------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index dc01210f2789f..273781ee34f0a 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -552,29 +552,48 @@ cdef int64_t[:] _tz_convert_dst( int64_t[:] result = np.empty(n, dtype=np.int64) ndarray[int64_t] trans int64_t[:] deltas - int64_t v + int64_t v, delta + str typ # tz is assumed _not_ to be tzlocal; that should go # through _tz_convert_tzlocal_utc - trans, deltas, _ = get_dst_info(tz) - if not to_utc: - # We add `offset` below instead of subtracting it - deltas = -1 * np.array(deltas, dtype='i8') + trans, deltas, typ = get_dst_info(tz) - # Previously, this search was done pointwise to try and benefit - # from getting to skip searches for iNaTs. However, it seems call - # overhead dominates the search time so doing it once in bulk - # is substantially faster (GH#24603) - pos = trans.searchsorted(values, side='right') - 1 + if typ not in ["pytz", "dateutil"]: + # FixedOffset, we know len(deltas) == 1 + delta = deltas[0] - for i in range(n): - v = values[i] - if v == NPY_NAT: - result[i] = v - else: - if pos[i] < 0: - raise ValueError('First time before start of DST info') - result[i] = v - deltas[pos[i]] + for i in range(n): + v = values[i] + if v == NPY_NAT: + result[i] = v + else: + if to_utc: + result[i] = v - delta + else: + result[i] = v + delta + + else: + # Previously, this search was done pointwise to try and benefit + # from getting to skip searches for iNaTs. However, it seems call + # overhead dominates the search time so doing it once in bulk + # is substantially faster (GH#24603) + pos = trans.searchsorted(values, side="right") - 1 + + for i in range(n): + v = values[i] + if v == NPY_NAT: + result[i] = v + else: + if pos[i] < 0: + # TODO: How is this reached? Should we be checking for + # it elsewhere? + raise ValueError("First time before start of DST info") + + if to_utc: + result[i] = v - deltas[pos[i]] + else: + result[i] = v + deltas[pos[i]] return result From 6bc65703e3435dbc9bd8c7283900d483763bee46 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Jul 2020 17:08:31 -0700 Subject: [PATCH 04/24] ASVs --- asv_bench/benchmarks/tslibs/tz_convert.py | 30 +++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 asv_bench/benchmarks/tslibs/tz_convert.py diff --git a/asv_bench/benchmarks/tslibs/tz_convert.py b/asv_bench/benchmarks/tslibs/tz_convert.py new file mode 100644 index 0000000000000..823a739b52e7f --- /dev/null +++ b/asv_bench/benchmarks/tslibs/tz_convert.py @@ -0,0 +1,30 @@ +import numpy as np +from pytz import UTC + +from pandas._libs.tslibs.tzconversion import tz_convert, tz_localize_to_utc + +from .tslib import _sizes, _tzs + + +class TimeTZConvert: + params = ( + _sizes, + _tzs, + ) + param_names = ["size", "tz"] + + def setup(self, size, tz): + arr = np.random.randint(0, 10, size=size, dtype="i8") + self.i8data = arr + + def time_tz_convert_from_utc(self, size, tz): + # effectively: + # dti = DatetimeIndex(self.i8data, tz=tz) + # dti.tz_localize(None) + tz_convert(self.i8data, UTC, tz) + + def time_tz_localize_to_utc(self, size, tz): + # effectively: + # dti = DatetimeIndex(self.i8data) + # dti.tz_localize(tz, ambiguous="NaT", nonexistent="NaT") + tz_localize_to_utc(self.i8data, tz, ambiguous="NaT", nonexistent="NaT") From 7f7350b9d0bb4522daef6269bea29131d843522f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Jul 2020 17:52:44 -0700 Subject: [PATCH 05/24] asv fixup --- asv_bench/benchmarks/tslibs/tz_convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/tslibs/tz_convert.py b/asv_bench/benchmarks/tslibs/tz_convert.py index 823a739b52e7f..2a1f559bdf6d4 100644 --- a/asv_bench/benchmarks/tslibs/tz_convert.py +++ b/asv_bench/benchmarks/tslibs/tz_convert.py @@ -9,7 +9,7 @@ class TimeTZConvert: params = ( _sizes, - _tzs, + [x for x in _tzs if x is not None], ) param_names = ["size", "tz"] From 92cada7afddbd4db39494ede0ec0014044fe2fe8 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Jul 2020 20:55:35 -0700 Subject: [PATCH 06/24] REF: implement tz_localize_to_utc_single --- pandas/_libs/tslib.pyx | 7 ++--- pandas/_libs/tslibs/conversion.pyx | 9 +++--- pandas/_libs/tslibs/timestamps.pyx | 12 ++++---- pandas/_libs/tslibs/tzconversion.pxd | 3 ++ pandas/_libs/tslibs/tzconversion.pyx | 41 ++++++++++++++++++++++++++-- 5 files changed, 56 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index f494e74bde55f..e02ad6017efff 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -46,7 +46,6 @@ from pandas._libs.tslibs.timezones cimport ( get_dst_info, is_utc, is_tzlocal, - utc_pytz as UTC, ) from pandas._libs.tslibs.conversion cimport ( _TSObject, @@ -67,8 +66,8 @@ from pandas._libs.tslibs.timestamps cimport create_timestamp_from_ts, _Timestamp from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.tzconversion cimport ( - tz_convert_single, tz_convert_utc_to_tzlocal, + tz_localize_to_utc_single, ) # Note: this is the only non-tslibs intra-pandas dependency here @@ -269,7 +268,7 @@ def _test_parse_iso8601(ts: str): check_dts_bounds(&obj.dts) if out_local == 1: obj.tzinfo = pytz.FixedOffset(out_tzoffset) - obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC) + obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) return Timestamp(obj.value, tz=obj.tzinfo) else: return Timestamp(obj.value) @@ -727,7 +726,7 @@ cpdef array_to_datetime( # dateutil.tz.tzoffset objects out_tzoffset_vals.add(out_tzoffset * 60.) tz = pytz.FixedOffset(out_tzoffset) - value = tz_convert_single(value, tz, UTC) + value = tz_localize_to_utc_single(value, tz) out_local = 0 out_tzoffset = 0 else: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 95500f66db156..67931010ca873 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -39,10 +39,9 @@ from pandas._libs.tslibs.nattype cimport ( c_nat_strings as nat_strings, ) -from pandas._libs.tslibs.tzconversion import tz_localize_to_utc from pandas._libs.tslibs.tzconversion cimport ( tz_convert_utc_to_tzlocal, - tz_convert_single, + tz_localize_to_utc_single, ) # ---------------------------------------------------------------------- @@ -470,7 +469,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, value = dtstruct_to_dt64(&dts) obj.dts = dts obj.tzinfo = pytz.FixedOffset(tzoffset) - obj.value = tz_convert_single(value, obj.tzinfo, UTC) + obj.value = tz_localize_to_utc_single(value, obj.tzinfo) if tz is None: check_overflows(obj) return obj @@ -555,8 +554,8 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, object unit, ts = dtstruct_to_dt64(&dts) if tz is not None: # shift for _localize_tso - ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, - ambiguous='raise')[0] + ts = tz_localize_to_utc_single(ts, tz, + ambiguous="raise") except OutOfBoundsDatetime: # GH#19382 for just-barely-OutOfBounds falling back to dateutil diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 426e6e0104f89..5a52af7d2a2a8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -58,8 +58,10 @@ from pandas._libs.tslibs.timezones cimport ( is_utc, maybe_get_tz, treat_tz_as_pytz, utc_pytz as UTC, get_timezone, tz_compare, ) -from pandas._libs.tslibs.tzconversion cimport tz_convert_single -from pandas._libs.tslibs.tzconversion import tz_localize_to_utc +from pandas._libs.tslibs.tzconversion cimport ( + tz_convert_single, + tz_localize_to_utc_single, +) # ---------------------------------------------------------------------- # Constants @@ -1299,9 +1301,9 @@ default 'raise' tz = maybe_get_tz(tz) if not isinstance(ambiguous, str): ambiguous = [ambiguous] - value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, - ambiguous=ambiguous, - nonexistent=nonexistent)[0] + value = tz_localize_to_utc_single(self.value, tz, + ambiguous=ambiguous, + nonexistent=nonexistent) return Timestamp(value, tz=tz, freq=self.freq) else: if tz is None: diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 7f445d7549f45..7d102868256de 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -4,3 +4,6 @@ from numpy cimport int64_t cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz, bint* fold=*) cpdef int64_t tz_convert_single(int64_t val, tzinfo tz1, tzinfo tz2) +cdef int64_t tz_localize_to_utc_single( + int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=* +) except? -1 diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 273781ee34f0a..d778bada8cf71 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -20,10 +20,47 @@ from pandas._libs.tslibs.ccalendar cimport DAY_NANOS, HOUR_NANOS from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, dt64_to_dtstruct) -from pandas._libs.tslibs.timezones cimport get_dst_info, is_tzlocal, is_utc +from pandas._libs.tslibs.timezones cimport ( + get_dst_info, + get_utcoffset, + is_fixed_offset, + is_tzlocal, + is_utc, +) + + +cdef int64_t tz_localize_to_utc_single( + int64_t val, tzinfo tz, object ambiguous=None, object nonexistent=None, +) except? -1: + cdef: + int64_t delta + int64_t[:] deltas + + if val == NPY_NAT: + return val + + elif is_utc(tz) or tz is None: + return val + + elif is_tzlocal(tz): + return _tz_convert_tzlocal_utc(val, tz, to_utc=True) + + elif is_fixed_offset(tz): + # TODO: in this case we should be able to use get_utcoffset, + # that returns None for 'dateutil//usr/share/zoneinfo/Etc/GMT-9' + _, deltas, _ = get_dst_info(tz) + delta = deltas[0] + return val - delta + + else: + return tz_localize_to_utc( + np.array([val], dtype="i8"), + tz, + ambiguous=ambiguous, + nonexistent=nonexistent, + )[0] -# TODO: cdef scalar version to call from convert_str_to_tsobject @cython.boundscheck(False) @cython.wraparound(False) def tz_localize_to_utc(ndarray[int64_t] vals, tzinfo tz, object ambiguous=None, From 6e9afd7f47f5e9acb76cd8dd8b715d0322e3ae77 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Jul 2020 20:59:18 -0700 Subject: [PATCH 07/24] docstring --- pandas/_libs/tslibs/tzconversion.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index d778bada8cf71..98c40e109dbab 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -32,6 +32,7 @@ from pandas._libs.tslibs.timezones cimport ( cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=None, object nonexistent=None, ) except? -1: + """See tz_localize_to_utc.__doc__""" cdef: int64_t delta int64_t[:] deltas @@ -47,7 +48,7 @@ cdef int64_t tz_localize_to_utc_single( elif is_fixed_offset(tz): # TODO: in this case we should be able to use get_utcoffset, - # that returns None for 'dateutil//usr/share/zoneinfo/Etc/GMT-9' + # that returns None for e.g. 'dateutil//usr/share/zoneinfo/Etc/GMT-9' _, deltas, _ = get_dst_info(tz) delta = deltas[0] return val - delta From 617ab79cf818ce41a997b3e12f3060c1a5d75a47 Mon Sep 17 00:00:00 2001 From: Steffen Rehberg Date: Fri, 3 Jul 2020 16:14:18 +0200 Subject: [PATCH 08/24] DOC: Fix code formatting and typos in Series.tz_localize (#35110) --- pandas/core/generic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a66cade3b81b0..d892e2487b31c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9580,8 +9580,9 @@ def tz_localize( dtype: int64 If the DST transition causes nonexistent times, you can shift these - dates forward or backwards with a timedelta object or `'shift_forward'` - or `'shift_backwards'`. + dates forward or backward with a timedelta object or `'shift_forward'` + or `'shift_backward'`. + >>> s = pd.Series(range(2), ... index=pd.DatetimeIndex(['2015-03-29 02:30:00', ... '2015-03-29 03:30:00'])) From 9b39dbf226c00955ecd9e0fc2164006408416cf1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 3 Jul 2020 10:07:31 -0500 Subject: [PATCH 09/24] PERF: Fix quantile perf regression (#35101) --- pandas/util/_validators.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index bb6c6de441558..fa7201a5188a5 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -371,14 +371,13 @@ def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray: ValueError if percentiles are not in given interval([0, 1]). """ q_arr = np.asarray(q) - msg = ( - "percentiles should all be in the interval [0, 1]." - f"Try {q_arr / 100.0} instead." - ) + # Don't change this to an f-string. The string formatting + # is too expensive for cases where we don't need it. + msg = "percentiles should all be in the interval [0, 1]. Try {} instead." if q_arr.ndim == 0: if not 0 <= q_arr <= 1: - raise ValueError(msg) + raise ValueError(msg.format(q_arr / 100.0)) else: if not all(0 <= qs <= 1 for qs in q_arr): - raise ValueError(msg) + raise ValueError(msg.format(q_arr / 100.0)) return q_arr From 7a9d5cca2218475b96b24c9fc0de76b0e2d86c01 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 4 Jul 2020 16:30:15 +0100 Subject: [PATCH 10/24] CLN: convert lambda to function (#35117) --- pandas/io/formats/printing.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 36e774305b577..1cf79dc105901 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -276,9 +276,13 @@ class TableSchemaFormatter(BaseFormatter): formatters[mimetype].enabled = False -default_pprint = lambda x, max_seq_items=None: pprint_thing( - x, escape_chars=("\t", "\r", "\n"), quote_strings=True, max_seq_items=max_seq_items -) +def default_pprint(thing: Any, max_seq_items: Optional[int] = None) -> str: + return pprint_thing( + thing, + escape_chars=("\t", "\r", "\n"), + quote_strings=True, + max_seq_items=max_seq_items, + ) def format_object_summary( From 55be9e9773782309110950cbcfca34c4af6b2a70 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 5 Jul 2020 17:12:46 -0700 Subject: [PATCH 11/24] REF: dont consolidate in BlockManager.equals (#34962) * REF: dont consolidate in BlockManager.equals * doctest fixup * Remove Block.equals * simplify, comments --- pandas/core/internals/blocks.py | 27 +------------ pandas/core/internals/managers.py | 48 +++++++++++++++--------- pandas/tests/internals/test_internals.py | 4 +- 3 files changed, 33 insertions(+), 46 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6207785fb2975..d8779dae7c384 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -56,12 +56,7 @@ ABCPandasArray, ABCSeries, ) -from pandas.core.dtypes.missing import ( - _isna_compat, - array_equivalent, - is_valid_nat_for_dtype, - isna, -) +from pandas.core.dtypes.missing import _isna_compat, is_valid_nat_for_dtype, isna import pandas.core.algorithms as algos from pandas.core.array_algos.transforms import shift @@ -1383,11 +1378,6 @@ def where_func(cond, values, other): return result_blocks - def equals(self, other) -> bool: - if self.dtype != other.dtype or self.shape != other.shape: - return False - return array_equivalent(self.values, other.values) - def _unstack(self, unstacker, fill_value, new_placement): """ Return a list of unstacked blocks of self @@ -1881,9 +1871,6 @@ def where( return [self.make_block_same_class(result, placement=self.mgr_locs)] - def equals(self, other) -> bool: - return self.values.equals(other.values) - def _unstack(self, unstacker, fill_value, new_placement): # ExtensionArray-safe unstack. # We override ObjectBlock._unstack, which unstacks directly on the @@ -1929,12 +1916,6 @@ class NumericBlock(Block): class FloatOrComplexBlock(NumericBlock): __slots__ = () - def equals(self, other) -> bool: - if self.dtype != other.dtype or self.shape != other.shape: - return False - left, right = self.values, other.values - return ((left == right) | (np.isnan(left) & np.isnan(right))).all() - class FloatBlock(FloatOrComplexBlock): __slots__ = () @@ -2298,12 +2279,6 @@ def setitem(self, indexer, value): ) return newb.setitem(indexer, value) - def equals(self, other) -> bool: - # override for significant performance improvement - if self.dtype != other.dtype or self.shape != other.shape: - return False - return (self.values.view("i8") == other.values.view("i8")).all() - def quantile(self, qs, interpolation="linear", axis=0): naive = self.values.view("M8[ns]") diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b2f2277d9a7dc..c82670106d3b6 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -19,6 +19,7 @@ from pandas.core.dtypes.common import ( DT64NS_DTYPE, is_datetimelike_v_numeric, + is_dtype_equal, is_extension_array_dtype, is_list_like, is_numeric_v_string_like, @@ -27,9 +28,10 @@ from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries -from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.missing import array_equivalent, isna import pandas.core.algorithms as algos +from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseDtype from pandas.core.base import PandasObject import pandas.core.common as com @@ -1409,29 +1411,39 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True ) - def equals(self, other) -> bool: + def equals(self, other: "BlockManager") -> bool: self_axes, other_axes = self.axes, other.axes if len(self_axes) != len(other_axes): return False if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)): return False - self._consolidate_inplace() - other._consolidate_inplace() - if len(self.blocks) != len(other.blocks): - return False - # canonicalize block order, using a tuple combining the mgr_locs - # then type name because there might be unconsolidated - # blocks (say, Categorical) which can only be distinguished by - # the iteration order - def canonicalize(block): - return (block.mgr_locs.as_array.tolist(), block.dtype.name) - - self_blocks = sorted(self.blocks, key=canonicalize) - other_blocks = sorted(other.blocks, key=canonicalize) - return all( - block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks) - ) + if self.ndim == 1: + # For SingleBlockManager (i.e.Series) + if other.ndim != 1: + return False + left = self.blocks[0].values + right = other.blocks[0].values + if not is_dtype_equal(left.dtype, right.dtype): + return False + elif isinstance(left, ExtensionArray): + return left.equals(right) + else: + return array_equivalent(left, right) + + for i in range(len(self.items)): + # Check column-wise, return False if any column doesnt match + left = self.iget_values(i) + right = other.iget_values(i) + if not is_dtype_equal(left.dtype, right.dtype): + return False + elif isinstance(left, ExtensionArray): + if not left.equals(right): + return False + else: + if not array_equivalent(left, right): + return False + return True def unstack(self, unstacker, fill_value) -> "BlockManager": """ diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 5fd44d7cd74a9..06ccdd2484a2a 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -377,7 +377,7 @@ def test_copy(self, mgr): for blk, cp_blk in zip(mgr.blocks, cp.blocks): # view assertion - assert cp_blk.equals(blk) + tm.assert_equal(cp_blk.values, blk.values) if isinstance(blk.values, np.ndarray): assert cp_blk.values.base is blk.values.base else: @@ -389,7 +389,7 @@ def test_copy(self, mgr): # copy assertion we either have a None for a base or in case of # some blocks it is an array (e.g. datetimetz), but was copied - assert cp_blk.equals(blk) + tm.assert_equal(cp_blk.values, blk.values) if not isinstance(cp_blk.values, np.ndarray): assert cp_blk.values._data.base is not blk.values._data.base else: From b389653ef0c93458134d47c43bbe507395f5d5aa Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Mon, 6 Jul 2020 17:19:43 +0300 Subject: [PATCH 12/24] CI: pin isort version (#35136) --- environment.yml | 2 +- requirements-dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 2429f4ab3d699..24c0832b6fb4c 100644 --- a/environment.yml +++ b/environment.yml @@ -20,7 +20,7 @@ dependencies: - flake8<3.8.0 # temporary pin, GH#34150 - flake8-comprehensions>=3.1.0 # used by flake8, linting of unnecessary comprehensions - flake8-rst>=0.6.0,<=0.7.0 # linting of code blocks in rst files - - isort # check that imports are in the right order + - isort=4.3.21 # check that imports are in the right order - mypy=0.730 - pycodestyle # used by flake8 diff --git a/requirements-dev.txt b/requirements-dev.txt index 44c975a3b3cfb..eda0fa8f32b19 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -11,7 +11,7 @@ cpplint flake8<3.8.0 flake8-comprehensions>=3.1.0 flake8-rst>=0.6.0,<=0.7.0 -isort +isort==4.3.21 mypy==0.730 pycodestyle gitpython From c4309b69068ef27a8e989c9ee52a0fff327c00c0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Jul 2020 16:30:35 +0200 Subject: [PATCH 13/24] CI: pin sphinx <= 3.1.1 for autodoc failure (#35139) --- environment.yml | 2 +- requirements-dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 24c0832b6fb4c..80dbffebf6b9d 100644 --- a/environment.yml +++ b/environment.yml @@ -27,7 +27,7 @@ dependencies: # documentation - gitpython # obtain contributors from git for whatsnew - gitdb2=2.0.6 # GH-32060 - - sphinx + - sphinx<=3.1.1 # documentation (jupyter notebooks) - nbconvert>=5.4.1 diff --git a/requirements-dev.txt b/requirements-dev.txt index eda0fa8f32b19..886f400caf44f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -16,7 +16,7 @@ mypy==0.730 pycodestyle gitpython gitdb2==2.0.6 -sphinx +sphinx<=3.1.1 nbconvert>=5.4.1 nbsphinx pandoc From e1507dd668121c7d185866f26f43f3a708b4b9d3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 6 Jul 2020 10:19:36 -0500 Subject: [PATCH 14/24] Fix numpy warning (#35085) --- pandas/conftest.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index d74c43069574f..5fe4cc45b0006 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -256,9 +256,7 @@ def nselect_method(request): # ---------------------------------------------------------------- # Missing values & co. # ---------------------------------------------------------------- -@pytest.fixture( - params=[None, np.nan, pd.NaT, float("nan"), np.float("NaN"), pd.NA], ids=str -) +@pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), pd.NA], ids=str) def nulls_fixture(request): """ Fixture for each null type in pandas. From 8b06a5082a30893b461410abe11765d7a66bdf19 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 6 Jul 2020 11:01:30 -0700 Subject: [PATCH 15/24] TYP: type unit as str (#35099) --- pandas/_libs/tslib.pyx | 6 +++--- pandas/_libs/tslibs/conversion.pxd | 2 +- pandas/_libs/tslibs/conversion.pyx | 20 ++++++++++++++++---- pandas/_libs/tslibs/timedeltas.pxd | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 6 +++--- pandas/_libs/tslibs/timestamps.pyx | 1 + 6 files changed, 25 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e02ad6017efff..bef99280cfb9f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -362,8 +362,8 @@ def format_array_from_datetime( def array_with_unit_to_datetime( ndarray values, - object unit, - str errors='coerce' + str unit, + str errors="coerce" ): """ Convert the ndarray to datetime according to the time unit. @@ -383,7 +383,7 @@ def array_with_unit_to_datetime( ---------- values : ndarray of object Date-like objects to convert. - unit : object + unit : str Time unit to use during conversion. errors : str, default 'raise' Error behavior when parsing. diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 2cf75944a8196..0eb94fecf7d6b 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -13,7 +13,7 @@ cdef class _TSObject: bint fold -cdef convert_to_tsobject(object ts, tzinfo tz, object unit, +cdef convert_to_tsobject(object ts, tzinfo tz, str unit, bint dayfirst, bint yearfirst, int32_t nanos=*) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 67931010ca873..36a4a1f60d8b9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -55,8 +55,19 @@ TD64NS_DTYPE = np.dtype('m8[ns]') # Unit Conversion Helpers cdef inline int64_t cast_from_unit(object ts, str unit) except? -1: - """ return a casting of the unit represented to nanoseconds - round the fractional part of a float to our precision, p """ + """ + Return a casting of the unit represented to nanoseconds + round the fractional part of a float to our precision, p. + + Parameters + ---------- + ts : int, float, or None + unit : str + + Returns + ------- + int64_t + """ cdef: int64_t m int p @@ -306,7 +317,7 @@ cdef class _TSObject: return self.value -cdef convert_to_tsobject(object ts, tzinfo tz, object unit, +cdef convert_to_tsobject(object ts, tzinfo tz, str unit, bint dayfirst, bint yearfirst, int32_t nanos=0): """ Extract datetime and int64 from any of: @@ -496,7 +507,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, return obj -cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, object unit, +cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, bint dayfirst=False, bint yearfirst=False): """ @@ -512,6 +523,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, object unit, Value to be converted to _TSObject tz : tzinfo or None timezone for the timezone-aware output + unit : str or None dayfirst : bool, default False When parsing an ambiguous date string, interpret e.g. "3/4/1975" as April 3, as opposed to the standard US interpretation March 4. diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 70a418d7803d1..4142861e9ad38 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -3,7 +3,7 @@ from numpy cimport int64_t # Exposed for tslib, not intended for outside use. cpdef int64_t delta_to_nanoseconds(delta) except? -1 -cdef convert_to_timedelta64(object ts, object unit) +cdef convert_to_timedelta64(object ts, str unit) cdef bint is_any_td_scalar(object obj) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 2862e62e3d522..8f3a599bf107c 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -160,7 +160,7 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: raise TypeError(type(delta)) -cdef convert_to_timedelta64(object ts, object unit): +cdef convert_to_timedelta64(object ts, str unit): """ Convert an incoming object to a timedelta64 if possible. Before calling, unit must be standardized to avoid repeated unit conversion @@ -218,7 +218,7 @@ cdef convert_to_timedelta64(object ts, object unit): @cython.boundscheck(False) @cython.wraparound(False) -def array_to_timedelta64(object[:] values, unit=None, errors='raise'): +def array_to_timedelta64(object[:] values, str unit=None, str errors="raise"): """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. @@ -470,7 +470,7 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): return cast_from_unit(float(n), unit) -cpdef inline str parse_timedelta_unit(object unit): +cpdef inline str parse_timedelta_unit(str unit): """ Parameters ---------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 5a52af7d2a2a8..de9c8b131548a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1052,6 +1052,7 @@ class Timestamp(_Timestamp): nanosecond = hour tz = minute freq = None + unit = None if getattr(ts_input, 'tzinfo', None) is not None and tz is not None: raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " From 5fc5bc6c7393c178f113b06b9eaabc307ff1d3b9 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Jul 2020 14:32:56 -0700 Subject: [PATCH 16/24] CLN: never to_utc --- pandas/_libs/tslibs/tzconversion.pyx | 44 ++++++++-------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 98c40e109dbab..4aee6b1a59a89 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -384,27 +384,23 @@ cpdef int64_t tz_convert_single(int64_t val, tzinfo tz1, tzinfo tz2): """ cdef: int64_t arr[1] - bint to_utc = is_utc(tz2) tzinfo tz # See GH#17734 We should always be converting either from UTC or to UTC - assert is_utc(tz1) or to_utc + assert is_utc(tz1) if val == NPY_NAT: return val - if to_utc: - tz = tz1 - else: - tz = tz2 + tz = tz2 if is_utc(tz): return val elif is_tzlocal(tz): - return _tz_convert_tzlocal_utc(val, tz, to_utc=to_utc) + return _tz_convert_tzlocal_utc(val, tz, to_utc=False) else: arr[0] = val - return _tz_convert_dst(arr, tz, to_utc=to_utc)[0] + return _tz_convert_dst(arr, tz)[0] def tz_convert(int64_t[:] vals, tzinfo tz1, tzinfo tz2): @@ -423,7 +419,6 @@ def tz_convert(int64_t[:] vals, tzinfo tz1, tzinfo tz2): """ cdef: int64_t[:] converted - bint to_utc = is_utc(tz2) tzinfo tz # See GH#17734 We should always be converting from UTC; otherwise @@ -433,18 +428,14 @@ def tz_convert(int64_t[:] vals, tzinfo tz1, tzinfo tz2): if len(vals) == 0: return np.array([], dtype=np.int64) - if to_utc: - tz = tz1 - else: - tz = tz2 - - converted = _tz_convert_one_way(vals, tz, to_utc=to_utc) + tz = tz2 + converted = _tz_convert_one_way(vals, tz) return np.array(converted, dtype=np.int64) @cython.boundscheck(False) @cython.wraparound(False) -cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, tzinfo tz, bint to_utc): +cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, tzinfo tz): """ Convert the given values (in i8) either to UTC or from UTC. @@ -452,7 +443,6 @@ cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, tzinfo tz, bint to_utc): ---------- vals : int64 ndarray tz1 : tzinfo - to_utc : bool Returns ------- @@ -472,9 +462,9 @@ cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, tzinfo tz, bint to_utc): if val == NPY_NAT: converted[i] = NPY_NAT else: - converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc) + converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False) else: - converted = _tz_convert_dst(vals, tz, to_utc) + converted = _tz_convert_dst(vals, tz) return converted @@ -565,9 +555,7 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True, @cython.boundscheck(False) @cython.wraparound(False) -cdef int64_t[:] _tz_convert_dst( - const int64_t[:] values, tzinfo tz, bint to_utc=True, -): +cdef int64_t[:] _tz_convert_dst(const int64_t[:] values, tzinfo tz): """ tz_convert for non-UTC non-tzlocal cases where we have to check DST transitions pointwise. @@ -576,8 +564,6 @@ cdef int64_t[:] _tz_convert_dst( ---------- values : ndarray[int64_t] tz : tzinfo - to_utc : bool - True if converting _to_ UTC, False if converting _from_ utc Returns ------- @@ -607,10 +593,7 @@ cdef int64_t[:] _tz_convert_dst( if v == NPY_NAT: result[i] = v else: - if to_utc: - result[i] = v - delta - else: - result[i] = v + delta + result[i] = v + delta else: # Previously, this search was done pointwise to try and benefit @@ -629,9 +612,6 @@ cdef int64_t[:] _tz_convert_dst( # it elsewhere? raise ValueError("First time before start of DST info") - if to_utc: - result[i] = v - deltas[pos[i]] - else: - result[i] = v + deltas[pos[i]] + result[i] = v + deltas[pos[i]] return result From 865344fd57c7da1c44751ad2b80317d2fff9da84 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Jul 2020 18:52:40 -0700 Subject: [PATCH 17/24] CLN: remove always-UTC arg from tz_convert, tz_convert_single --- pandas/_libs/tslibs/offsets.pyx | 3 +-- pandas/_libs/tslibs/timestamps.pyx | 4 ++-- pandas/_libs/tslibs/tzconversion.pxd | 2 +- pandas/_libs/tslibs/tzconversion.pyx | 28 +++++++------------------- pandas/core/arrays/datetimes.py | 4 ++-- pandas/tests/tslibs/test_conversion.py | 9 +++------ pandas/tseries/frequencies.py | 3 +-- 7 files changed, 17 insertions(+), 36 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index e4d05e0d70e2f..ff3b8c4c079be 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -47,7 +47,6 @@ from pandas._libs.tslibs.np_datetime cimport ( dt64_to_dtstruct, pydate_to_dtstruct, ) -from pandas._libs.tslibs.timezones cimport utc_pytz as UTC from pandas._libs.tslibs.tzconversion cimport tz_convert_single from .dtypes cimport PeriodDtypeCode @@ -263,7 +262,7 @@ cdef _to_dt64D(dt): # equiv `Timestamp(dt).value` or `dt.timestamp() * 10**9` nanos = getattr(dt, "nanosecond", 0) i8 = convert_datetime_to_tsobject(dt, tz=None, nanos=nanos).value - dt = tz_convert_single(i8, UTC, dt.tzinfo) + dt = tz_convert_single(i8, dt.tzinfo) dt = np.int64(dt).astype('datetime64[ns]') else: dt = np.datetime64(dt) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index a2dacd9d36b14..4fa851bc4baea 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1309,7 +1309,7 @@ default 'raise' else: if tz is None: # reset tz - value = tz_convert_single(self.value, UTC, self.tz) + value = tz_convert_single(self.value, self.tz) return Timestamp(value, tz=tz, freq=self.freq) else: raise TypeError( @@ -1391,7 +1391,7 @@ default 'raise' tzobj = self.tzinfo value = self.value if tzobj is not None: - value = tz_convert_single(value, UTC, tzobj) + value = tz_convert_single(value, tzobj) # setup components dt64_to_dtstruct(value, &dts) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 7d102868256de..6934300457e88 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -3,7 +3,7 @@ from numpy cimport int64_t cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz, bint* fold=*) -cpdef int64_t tz_convert_single(int64_t val, tzinfo tz1, tzinfo tz2) +cpdef int64_t tz_convert_single(int64_t val, tzinfo tz) cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=* ) except? -1 diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 4aee6b1a59a89..e7dff3f71ac3f 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -366,17 +366,16 @@ cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz, bint* fold=NU return _tz_convert_tzlocal_utc(utc_val, tz, to_utc=False, fold=fold) -cpdef int64_t tz_convert_single(int64_t val, tzinfo tz1, tzinfo tz2): +cpdef int64_t tz_convert_single(int64_t val, tzinfo tz): """ - Convert the val (in i8) from timezone1 to timezone2 + Convert the val (in i8) from UTC to tz This is a single timezone version of tz_convert Parameters ---------- val : int64 - tz1 : tzinfo - tz2 : tzinfo + tz : tzinfo Returns ------- @@ -384,16 +383,10 @@ cpdef int64_t tz_convert_single(int64_t val, tzinfo tz1, tzinfo tz2): """ cdef: int64_t arr[1] - tzinfo tz - - # See GH#17734 We should always be converting either from UTC or to UTC - assert is_utc(tz1) if val == NPY_NAT: return val - tz = tz2 - if is_utc(tz): return val elif is_tzlocal(tz): @@ -403,15 +396,14 @@ cpdef int64_t tz_convert_single(int64_t val, tzinfo tz1, tzinfo tz2): return _tz_convert_dst(arr, tz)[0] -def tz_convert(int64_t[:] vals, tzinfo tz1, tzinfo tz2): +def tz_convert(int64_t[:] vals, tzinfo tz): """ - Convert the values (in i8) from timezone1 to timezone2 + Convert the values (in i8) from UTC to tz Parameters ---------- vals : int64 ndarray - tz1 : tzinfo - tz2 : tzinfo + tz : tzinfo Returns ------- @@ -419,16 +411,10 @@ def tz_convert(int64_t[:] vals, tzinfo tz1, tzinfo tz2): """ cdef: int64_t[:] converted - tzinfo tz - - # See GH#17734 We should always be converting from UTC; otherwise - # should use tz_localize_to_utc. - assert is_utc(tz1) if len(vals) == 0: return np.array([], dtype=np.int64) - tz = tz2 converted = _tz_convert_one_way(vals, tz) return np.array(converted, dtype=np.int64) @@ -442,7 +428,7 @@ cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, tzinfo tz): Parameters ---------- vals : int64 ndarray - tz1 : tzinfo + tz : tzinfo Returns ------- diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index fcfbaa4ac2a1c..8c8082cb8a6b8 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -724,7 +724,7 @@ def _local_timestamps(self): This is used to calculate time-of-day information as if the timestamps were timezone-naive. """ - return tzconversion.tz_convert(self.asi8, timezones.UTC, self.tz) + return tzconversion.tz_convert(self.asi8, self.tz) def tz_convert(self, tz): """ @@ -956,7 +956,7 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): if self.tz is not None: if tz is None: - new_dates = tzconversion.tz_convert(self.asi8, timezones.UTC, self.tz) + new_dates = tzconversion.tz_convert(self.asi8, self.tz) else: raise TypeError("Already tz-aware, use tz_convert to convert.") else: diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index 5a16fea47e90d..5a659dbdd35cc 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -12,9 +12,9 @@ def _compare_utc_to_local(tz_didx): def f(x): - return tzconversion.tz_convert_single(x, UTC, tz_didx.tz) + return tzconversion.tz_convert_single(x, tz_didx.tz) - result = tzconversion.tz_convert(tz_didx.asi8, UTC, tz_didx.tz) + result = tzconversion.tz_convert(tz_didx.asi8, tz_didx.tz) expected = np.vectorize(f)(tz_didx.asi8) tm.assert_numpy_array_equal(result, expected) @@ -22,9 +22,6 @@ def f(x): def _compare_local_to_utc(tz_didx, naive_didx): # Check that tz_localize behaves the same vectorized and pointwise. - def f(x): - return tzconversion.tz_convert_single(x, tz_didx.tz, UTC) - err1 = err2 = None try: result = tzconversion.tz_localize_to_utc(naive_didx.asi8, tz_didx.tz) @@ -71,7 +68,7 @@ def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq): ], ) def test_tz_convert_corner(arr): - result = tzconversion.tz_convert(arr, UTC, timezones.maybe_get_tz("Asia/Tokyo")) + result = tzconversion.tz_convert(arr, timezones.maybe_get_tz("Asia/Tokyo")) tm.assert_numpy_array_equal(result, arr) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index f94c8ef6550a5..f125d81dd87ac 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -21,7 +21,6 @@ ) from pandas._libs.tslibs.parsing import get_rule_month from pandas._libs.tslibs.resolution import month_position_check -from pandas._libs.tslibs.timezones import UTC from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -198,7 +197,7 @@ def __init__(self, index, warn: bool = True): # the timezone so they are in local time if hasattr(index, "tz"): if index.tz is not None: - self.i8values = tzconversion.tz_convert(self.i8values, UTC, index.tz) + self.i8values = tzconversion.tz_convert(self.i8values, index.tz) self.warn = warn From 6c171354dcaf4dfd38f793d990cb0eaa118a78fc Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 7 Jul 2020 09:44:07 -0700 Subject: [PATCH 18/24] fix docstring --- pandas/_libs/tslibs/tzconversion.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index e7dff3f71ac3f..36161e4cc5019 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -370,7 +370,7 @@ cpdef int64_t tz_convert_single(int64_t val, tzinfo tz): """ Convert the val (in i8) from UTC to tz - This is a single timezone version of tz_convert + This is a single value version of tz_convert. Parameters ---------- From 1855e7fb5c86b857eb6ada8e9fa3334236a3f7e7 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Jul 2020 09:44:04 -0700 Subject: [PATCH 19/24] rename --- pandas/_libs/tslibs/__init__.py | 4 ++-- pandas/_libs/tslibs/offsets.pyx | 4 ++-- pandas/_libs/tslibs/timestamps.pyx | 6 +++--- pandas/_libs/tslibs/tzconversion.pxd | 2 +- pandas/_libs/tslibs/tzconversion.pyx | 8 ++++---- pandas/core/arrays/datetimes.py | 4 ++-- pandas/tests/scalar/timestamp/test_timezones.py | 2 +- pandas/tests/tslibs/test_api.py | 2 +- pandas/tests/tslibs/test_conversion.py | 6 +++--- pandas/tseries/frequencies.py | 4 +++- 10 files changed, 22 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 76e356370de70..a758d79923035 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -14,7 +14,7 @@ "delta_to_nanoseconds", "ints_to_pytimedelta", "Timestamp", - "tz_convert_single", + "tz_convert_single_from_utc", "to_offset", "Tick", "BaseOffset", @@ -29,4 +29,4 @@ from .resolution import Resolution from .timedeltas import Timedelta, delta_to_nanoseconds, ints_to_pytimedelta from .timestamps import Timestamp -from .tzconversion import tz_convert_single +from .tzconversion import tz_convert_single_from_utc diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index ff3b8c4c079be..841d32b0d6d3e 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -47,7 +47,7 @@ from pandas._libs.tslibs.np_datetime cimport ( dt64_to_dtstruct, pydate_to_dtstruct, ) -from pandas._libs.tslibs.tzconversion cimport tz_convert_single +from pandas._libs.tslibs.tzconversion cimport tz_convert_single_from_utc from .dtypes cimport PeriodDtypeCode from .timedeltas cimport delta_to_nanoseconds @@ -262,7 +262,7 @@ cdef _to_dt64D(dt): # equiv `Timestamp(dt).value` or `dt.timestamp() * 10**9` nanos = getattr(dt, "nanosecond", 0) i8 = convert_datetime_to_tsobject(dt, tz=None, nanos=nanos).value - dt = tz_convert_single(i8, dt.tzinfo) + dt = tz_convert_single_from_utc(i8, dt.tzinfo) dt = np.int64(dt).astype('datetime64[ns]') else: dt = np.datetime64(dt) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 4fa851bc4baea..c634179fd7e66 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -59,7 +59,7 @@ from pandas._libs.tslibs.timezones cimport ( get_timezone, tz_compare, ) from pandas._libs.tslibs.tzconversion cimport ( - tz_convert_single, + tz_convert_single_from_utc, tz_localize_to_utc_single, ) @@ -1309,7 +1309,7 @@ default 'raise' else: if tz is None: # reset tz - value = tz_convert_single(self.value, self.tz) + value = tz_convert_single_from_utc(self.value, self.tz) return Timestamp(value, tz=tz, freq=self.freq) else: raise TypeError( @@ -1391,7 +1391,7 @@ default 'raise' tzobj = self.tzinfo value = self.value if tzobj is not None: - value = tz_convert_single(value, tzobj) + value = tz_convert_single_from_utc(value, tzobj) # setup components dt64_to_dtstruct(value, &dts) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 6934300457e88..fbcd139fa2f49 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -3,7 +3,7 @@ from numpy cimport int64_t cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz, bint* fold=*) -cpdef int64_t tz_convert_single(int64_t val, tzinfo tz) +cpdef int64_t tz_convert_single_from_utc(int64_t val, tzinfo tz) cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=* ) except? -1 diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 36161e4cc5019..797e2b21585e5 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -366,7 +366,7 @@ cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz, bint* fold=NU return _tz_convert_tzlocal_utc(utc_val, tz, to_utc=False, fold=fold) -cpdef int64_t tz_convert_single(int64_t val, tzinfo tz): +cpdef int64_t tz_convert_single_from_utc(int64_t val, tzinfo tz): """ Convert the val (in i8) from UTC to tz @@ -396,7 +396,7 @@ cpdef int64_t tz_convert_single(int64_t val, tzinfo tz): return _tz_convert_dst(arr, tz)[0] -def tz_convert(int64_t[:] vals, tzinfo tz): +def tz_convert_from_utc(int64_t[:] vals, tzinfo tz): """ Convert the values (in i8) from UTC to tz @@ -415,13 +415,13 @@ def tz_convert(int64_t[:] vals, tzinfo tz): if len(vals) == 0: return np.array([], dtype=np.int64) - converted = _tz_convert_one_way(vals, tz) + converted = _tz_convert_from_utc(vals, tz) return np.array(converted, dtype=np.int64) @cython.boundscheck(False) @cython.wraparound(False) -cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, tzinfo tz): +cdef int64_t[:] _tz_convert_from_utc(int64_t[:] vals, tzinfo tz): """ Convert the given values (in i8) either to UTC or from UTC. diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 8c8082cb8a6b8..26f476ce1426a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -724,7 +724,7 @@ def _local_timestamps(self): This is used to calculate time-of-day information as if the timestamps were timezone-naive. """ - return tzconversion.tz_convert(self.asi8, self.tz) + return tzconversion.tz_convert_from_utc(self.asi8, self.tz) def tz_convert(self, tz): """ @@ -956,7 +956,7 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): if self.tz is not None: if tz is None: - new_dates = tzconversion.tz_convert(self.asi8, self.tz) + new_dates = tzconversion.tz_convert_from_utc(self.asi8, self.tz) else: raise TypeError("Already tz-aware, use tz_convert to convert.") else: diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 9611c827be6fe..f65e699634ce5 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -334,7 +334,7 @@ def test_timestamp_to_datetime_tzoffset(self): def test_timestamp_constructor_near_dst_boundary(self): # GH#11481 & GH#15777 # Naive string timestamps were being localized incorrectly - # with tz_convert_single instead of tz_localize_to_utc + # with tz_convert_single_from_utc instead of tz_localize_to_utc for tz in ["Europe/Brussels", "Europe/Prague"]: result = Timestamp("2015-10-25 01:00", tz=tz) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 840a8c2fb68b1..8c314c74d4cf2 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -41,7 +41,7 @@ def test_namespace(): "delta_to_nanoseconds", "ints_to_pytimedelta", "localize_pydatetime", - "tz_convert_single", + "tz_convert_single_from_utc", "to_offset", ] diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index 5a659dbdd35cc..b35940c6bb95b 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -12,9 +12,9 @@ def _compare_utc_to_local(tz_didx): def f(x): - return tzconversion.tz_convert_single(x, tz_didx.tz) + return tzconversion.tz_convert_from_utc_single(x, tz_didx.tz) - result = tzconversion.tz_convert(tz_didx.asi8, tz_didx.tz) + result = tzconversion.tz_convert_from_utc(tz_didx.asi8, tz_didx.tz) expected = np.vectorize(f)(tz_didx.asi8) tm.assert_numpy_array_equal(result, expected) @@ -68,7 +68,7 @@ def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq): ], ) def test_tz_convert_corner(arr): - result = tzconversion.tz_convert(arr, timezones.maybe_get_tz("Asia/Tokyo")) + result = tzconversion.tz_convert_from_utc(arr, timezones.maybe_get_tz("Asia/Tokyo")) tm.assert_numpy_array_equal(result, arr) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index f125d81dd87ac..23e08c7550646 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -197,7 +197,9 @@ def __init__(self, index, warn: bool = True): # the timezone so they are in local time if hasattr(index, "tz"): if index.tz is not None: - self.i8values = tzconversion.tz_convert(self.i8values, index.tz) + self.i8values = tzconversion.tz_convert_from_utc( + self.i8values, index.tz + ) self.warn = warn From b928b543510bdb909f267747e99dc6d5feac22ea Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Jul 2020 10:12:26 -0700 Subject: [PATCH 20/24] rename typo fixup --- pandas/_libs/tslibs/__init__.py | 4 ++-- pandas/_libs/tslibs/offsets.pyx | 4 ++-- pandas/_libs/tslibs/timestamps.pyx | 6 +++--- pandas/_libs/tslibs/tzconversion.pxd | 2 +- pandas/_libs/tslibs/tzconversion.pyx | 2 +- pandas/tests/scalar/timestamp/test_timezones.py | 2 +- pandas/tests/tslibs/test_api.py | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index a758d79923035..188136a407665 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -14,7 +14,7 @@ "delta_to_nanoseconds", "ints_to_pytimedelta", "Timestamp", - "tz_convert_single_from_utc", + "tz_convert_from_utc_single", "to_offset", "Tick", "BaseOffset", @@ -29,4 +29,4 @@ from .resolution import Resolution from .timedeltas import Timedelta, delta_to_nanoseconds, ints_to_pytimedelta from .timestamps import Timestamp -from .tzconversion import tz_convert_single_from_utc +from .tzconversion import tz_convert_from_utc_single diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 841d32b0d6d3e..91066881d6915 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -47,7 +47,7 @@ from pandas._libs.tslibs.np_datetime cimport ( dt64_to_dtstruct, pydate_to_dtstruct, ) -from pandas._libs.tslibs.tzconversion cimport tz_convert_single_from_utc +from pandas._libs.tslibs.tzconversion cimport tz_convert_from_utc_single from .dtypes cimport PeriodDtypeCode from .timedeltas cimport delta_to_nanoseconds @@ -262,7 +262,7 @@ cdef _to_dt64D(dt): # equiv `Timestamp(dt).value` or `dt.timestamp() * 10**9` nanos = getattr(dt, "nanosecond", 0) i8 = convert_datetime_to_tsobject(dt, tz=None, nanos=nanos).value - dt = tz_convert_single_from_utc(i8, dt.tzinfo) + dt = tz_convert_from_utc_single(i8, dt.tzinfo) dt = np.int64(dt).astype('datetime64[ns]') else: dt = np.datetime64(dt) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c634179fd7e66..8cef685933863 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -59,7 +59,7 @@ from pandas._libs.tslibs.timezones cimport ( get_timezone, tz_compare, ) from pandas._libs.tslibs.tzconversion cimport ( - tz_convert_single_from_utc, + tz_convert_from_utc_single, tz_localize_to_utc_single, ) @@ -1309,7 +1309,7 @@ default 'raise' else: if tz is None: # reset tz - value = tz_convert_single_from_utc(self.value, self.tz) + value = tz_convert_from_utc_single(self.value, self.tz) return Timestamp(value, tz=tz, freq=self.freq) else: raise TypeError( @@ -1391,7 +1391,7 @@ default 'raise' tzobj = self.tzinfo value = self.value if tzobj is not None: - value = tz_convert_single_from_utc(value, tzobj) + value = tz_convert_from_utc_single(value, tzobj) # setup components dt64_to_dtstruct(value, &dts) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index fbcd139fa2f49..1990afd77a8fb 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -3,7 +3,7 @@ from numpy cimport int64_t cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz, bint* fold=*) -cpdef int64_t tz_convert_single_from_utc(int64_t val, tzinfo tz) +cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz) cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=* ) except? -1 diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 797e2b21585e5..9f4f848f7099d 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -366,7 +366,7 @@ cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz, bint* fold=NU return _tz_convert_tzlocal_utc(utc_val, tz, to_utc=False, fold=fold) -cpdef int64_t tz_convert_single_from_utc(int64_t val, tzinfo tz): +cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz): """ Convert the val (in i8) from UTC to tz diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index f65e699634ce5..83764aa184392 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -334,7 +334,7 @@ def test_timestamp_to_datetime_tzoffset(self): def test_timestamp_constructor_near_dst_boundary(self): # GH#11481 & GH#15777 # Naive string timestamps were being localized incorrectly - # with tz_convert_single_from_utc instead of tz_localize_to_utc + # with tz_convert_from_utc_single instead of tz_localize_to_utc for tz in ["Europe/Brussels", "Europe/Prague"]: result = Timestamp("2015-10-25 01:00", tz=tz) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 8c314c74d4cf2..1bf59323fc085 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -41,7 +41,7 @@ def test_namespace(): "delta_to_nanoseconds", "ints_to_pytimedelta", "localize_pydatetime", - "tz_convert_single_from_utc", + "tz_convert_from_utc_single", "to_offset", ] From 4305c90202de11cbea05128ecc137d8d9ccb4c3e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Jul 2020 10:35:28 -0700 Subject: [PATCH 21/24] update import --- asv_bench/benchmarks/tslibs/tz_convert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/tslibs/tz_convert.py b/asv_bench/benchmarks/tslibs/tz_convert.py index 2a1f559bdf6d4..50bb80adc080f 100644 --- a/asv_bench/benchmarks/tslibs/tz_convert.py +++ b/asv_bench/benchmarks/tslibs/tz_convert.py @@ -1,7 +1,7 @@ import numpy as np from pytz import UTC -from pandas._libs.tslibs.tzconversion import tz_convert, tz_localize_to_utc +from pandas._libs.tslibs.tzconversion import tz_convert_from_utc, tz_localize_to_utc from .tslib import _sizes, _tzs @@ -21,7 +21,7 @@ def time_tz_convert_from_utc(self, size, tz): # effectively: # dti = DatetimeIndex(self.i8data, tz=tz) # dti.tz_localize(None) - tz_convert(self.i8data, UTC, tz) + tz_convert_from_utc(self.i8data, UTC, tz) def time_tz_localize_to_utc(self, size, tz): # effectively: From 37a57962deba31417185f7457190bac73df3ac54 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Jul 2020 11:38:14 -0700 Subject: [PATCH 22/24] asv backcompat --- asv_bench/benchmarks/tslibs/tz_convert.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/tslibs/tz_convert.py b/asv_bench/benchmarks/tslibs/tz_convert.py index 50bb80adc080f..f9bac6055c4a5 100644 --- a/asv_bench/benchmarks/tslibs/tz_convert.py +++ b/asv_bench/benchmarks/tslibs/tz_convert.py @@ -1,10 +1,15 @@ import numpy as np from pytz import UTC -from pandas._libs.tslibs.tzconversion import tz_convert_from_utc, tz_localize_to_utc +from pandas._libs.tslibs.tzconversion import tz_localize_to_utc from .tslib import _sizes, _tzs +try: + from pandas._libs.tslibs.tzconversion import tz_convert_from_utc +except ImportError: + from pandas._libs.tslibs.tzconversion import tz_convert as tz_convert_from_utc + class TimeTZConvert: params = ( From 4757798cb0c32d3b85390ea22feac6dbadb797ad Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Jul 2020 13:06:35 -0700 Subject: [PATCH 23/24] dummy to force CI From 99e29af596b5ea152dde1e90d941d73fb8180f68 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Jul 2020 17:35:57 -0700 Subject: [PATCH 24/24] update benchmark --- asv_bench/benchmarks/tslibs/tz_convert.py | 10 +++++++++- pandas/_libs/tslibs/tzconversion.pyx | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/tslibs/tz_convert.py b/asv_bench/benchmarks/tslibs/tz_convert.py index 1a73a614c6455..c2c90024ca5bd 100644 --- a/asv_bench/benchmarks/tslibs/tz_convert.py +++ b/asv_bench/benchmarks/tslibs/tz_convert.py @@ -6,8 +6,10 @@ from .tslib import _sizes, _tzs try: + old_sig = False from pandas._libs.tslibs.tzconversion import tz_convert_from_utc except ImportError: + old_sig = True from pandas._libs.tslibs.tzconversion import tz_convert as tz_convert_from_utc @@ -26,7 +28,13 @@ def time_tz_convert_from_utc(self, size, tz): # effectively: # dti = DatetimeIndex(self.i8data, tz=tz) # dti.tz_localize(None) - tz_convert_from_utc(self.i8data, UTC, tz) + if size >= 10 ** 6 and str(tz) == "tzlocal()": + # asv fill will because each call takes 8+seconds + return + if old_sig: + tz_convert_from_utc(self.i8data, UTC, tz) + else: + tz_convert_from_utc(self.i8data, tz) def time_tz_localize_to_utc(self, size, tz): # effectively: diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 9f4f848f7099d..a6afd47d93479 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -370,7 +370,7 @@ cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz): """ Convert the val (in i8) from UTC to tz - This is a single value version of tz_convert. + This is a single value version of tz_convert_from_utc. Parameters ----------