From 434b102af08fa3e4555c65323b81af385b23c8a7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 17 Jun 2019 17:31:49 -0700 Subject: [PATCH 1/9] cleanup ported from different branch --- pandas/core/groupby/ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 38478be5a8e07..dd44bc6990d59 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -475,7 +475,8 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, else: if axis > 0: swapped = True - values = values.swapaxes(0, axis) + assert axis == 1, axis + values = values.T if arity > 1: raise NotImplementedError("arity of more than 1 is not " "supported for the 'how' argument") From 2e8a79a973e056d05bd0b042978bc68d48636b76 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 17 Jun 2019 20:31:52 -0700 Subject: [PATCH 2/9] Catch less inside try/except --- pandas/core/internals/blocks.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 652f70746f618..a131509a4ed10 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -722,16 +722,28 @@ def replace(self, to_replace, value, inplace=False, filter=None, try: values, to_replace = self._try_coerce_args(self.values, to_replace) - mask = missing.mask_missing(values, to_replace) - if filter is not None: - filtered_out = ~self.mgr_locs.isin(filter) - mask[filtered_out.nonzero()[0]] = False + except (TypeError, ValueError): + # GH 22083, TypeError or ValueError occurred within error handling + # causes infinite loop. Cast and retry only if not objectblock. + if is_object_dtype(self): + raise + + # try again with a compatible block + block = self.astype(object) + return block.replace(to_replace=original_to_replace, + value=value, + inplace=inplace, + filter=filter, + regex=regex, + convert=convert) + + mask = missing.mask_missing(values, to_replace) + if filter is not None: + filtered_out = ~self.mgr_locs.isin(filter) + mask[filtered_out.nonzero()[0]] = False + try: blocks = self.putmask(mask, value, inplace=inplace) - if convert: - blocks = [b.convert(by_item=True, numeric=False, - copy=not inplace) for b in blocks] - return blocks except (TypeError, ValueError): # GH 22083, TypeError or ValueError occurred within error handling # causes infinite loop. Cast and retry only if not objectblock. @@ -746,6 +758,10 @@ def replace(self, to_replace, value, inplace=False, filter=None, filter=filter, regex=regex, convert=convert) + if convert: + blocks = [b.convert(by_item=True, numeric=False, + copy=not inplace) for b in blocks] + return blocks def _replace_single(self, *args, **kwargs): """ no-op on a non-ObjectBlock """ From 5443217ae2e38e22668a22897ed8e32651fa930b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 18 Jun 2019 09:07:56 -0700 Subject: [PATCH 3/9] remove no longer needed path --- pandas/core/internals/blocks.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a131509a4ed10..925354f631c06 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -3061,11 +3061,6 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None, dtype = dtype or values.dtype klass = get_block_type(values, dtype) - elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values): - # TODO: This is no longer hit internally; does it need to be retained - # for e.g. pyarrow? - values = DatetimeArray._simple_new(values, dtype=dtype) - return klass(values, ndim=ndim, placement=placement) From 930c85a8bfea265f902f43287a96f28790b8bbc8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 20 Jun 2019 15:44:22 -0700 Subject: [PATCH 4/9] move _items_overlap_with_suffix --- pandas/core/internals/__init__.py | 2 +- pandas/core/internals/managers.py | 42 ------------------------ pandas/core/reshape/merge.py | 53 +++++++++++++++++++++++++++---- 3 files changed, 48 insertions(+), 49 deletions(-) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index d24dd2edd4e1d..174319a40f12a 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -8,5 +8,5 @@ from .managers import ( # noqa:F401 BlockManager, SingleBlockManager, create_block_manager_from_arrays, create_block_manager_from_blocks, - items_overlap_with_suffix, # reshape.merge + _transform_index, # reshape.merge concatenate_block_managers) # reshape.concat, reshape.merge diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index aff39d765dc95..5494b75ff9e4e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1859,48 +1859,6 @@ def _compare_or_regex_search(a, b, regex=False): return result -# TODO: this is no longer used in this module, could be moved to concat -def items_overlap_with_suffix(left, lsuffix, right, rsuffix): - """ - If two indices overlap, add suffixes to overlapping entries. - - If corresponding suffix is empty, the entry is simply converted to string. - - """ - to_rename = left.intersection(right) - if len(to_rename) == 0: - return left, right - else: - if not lsuffix and not rsuffix: - raise ValueError('columns overlap but no suffix specified: ' - '{rename}'.format(rename=to_rename)) - - def renamer(x, suffix): - """Rename the left and right indices. - - If there is overlap, and suffix is not None, add - suffix, otherwise, leave it as-is. - - Parameters - ---------- - x : original column name - suffix : str or None - - Returns - ------- - x : renamed column name - """ - if x in to_rename and suffix is not None: - return '{x}{suffix}'.format(x=x, suffix=suffix) - return x - - lrenamer = partial(renamer, suffix=lsuffix) - rrenamer = partial(renamer, suffix=rsuffix) - - return (_transform_index(left, lrenamer), - _transform_index(right, rrenamer)) - - def _transform_index(index, func, level=None): """ Apply function to all values found in index. diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index d21ad58e752c2..42d90773e124d 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -3,6 +3,7 @@ """ import copy +from functools import partial import string import warnings @@ -27,8 +28,7 @@ from pandas.core.arrays.categorical import _recode_for_categories import pandas.core.common as com from pandas.core.frame import _merge_doc -from pandas.core.internals import ( - concatenate_block_managers, items_overlap_with_suffix) +from pandas.core.internals import _transform_index, concatenate_block_managers import pandas.core.sorting as sorting from pandas.core.sorting import is_int64_overflow_possible @@ -555,8 +555,8 @@ def get_result(self): ldata, rdata = self.left._data, self.right._data lsuf, rsuf = self.suffixes - llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf, - rdata.items, rsuf) + llabels, rlabels = _items_overlap_with_suffix(ldata.items, lsuf, + rdata.items, rsuf) lindexers = {1: left_indexer} if left_indexer is not None else {} rindexers = {1: right_indexer} if right_indexer is not None else {} @@ -1303,8 +1303,8 @@ def get_result(self): ldata, rdata = self.left._data, self.right._data lsuf, rsuf = self.suffixes - llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf, - rdata.items, rsuf) + llabels, rlabels = _items_overlap_with_suffix(ldata.items, lsuf, + rdata.items, rsuf) if self.fill_method == 'ffill': left_join_indexer = libjoin.ffill_indexer(left_indexer) @@ -1809,3 +1809,44 @@ def validate_operand(obj): else: raise TypeError('Can only merge Series or DataFrame objects, ' 'a {obj} was passed'.format(obj=type(obj))) + + +def _items_overlap_with_suffix(left, lsuffix, right, rsuffix): + """ + If two indices overlap, add suffixes to overlapping entries. + + If corresponding suffix is empty, the entry is simply converted to string. + + """ + to_rename = left.intersection(right) + if len(to_rename) == 0: + return left, right + else: + if not lsuffix and not rsuffix: + raise ValueError('columns overlap but no suffix specified: ' + '{rename}'.format(rename=to_rename)) + + def renamer(x, suffix): + """Rename the left and right indices. + + If there is overlap, and suffix is not None, add + suffix, otherwise, leave it as-is. + + Parameters + ---------- + x : original column name + suffix : str or None + + Returns + ------- + x : renamed column name + """ + if x in to_rename and suffix is not None: + return '{x}{suffix}'.format(x=x, suffix=suffix) + return x + + lrenamer = partial(renamer, suffix=lsuffix) + rrenamer = partial(renamer, suffix=rsuffix) + + return (_transform_index(left, lrenamer), + _transform_index(right, rrenamer)) From 8c182581d8e451802e46370fb4755e4ac0acdbb7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 20 Jun 2019 15:46:10 -0700 Subject: [PATCH 5/9] restore pyarrow compat code --- pandas/core/internals/blocks.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 925354f631c06..a131509a4ed10 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -3061,6 +3061,11 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None, dtype = dtype or values.dtype klass = get_block_type(values, dtype) + elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values): + # TODO: This is no longer hit internally; does it need to be retained + # for e.g. pyarrow? + values = DatetimeArray._simple_new(values, dtype=dtype) + return klass(values, ndim=ndim, placement=placement) From 18a0c4dbc821515db929dce7f8e430d012f7aa7f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Jun 2019 14:14:29 -0500 Subject: [PATCH 6/9] mark tests that take 150s each --- pandas/tests/io/test_html.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 9f9fcabbfe42c..33268b637d44a 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -262,6 +262,7 @@ def test_bad_url_protocol(self): self.read_html('git://github.com', match='.*Water.*') @network + @pytest.mark.slow def test_invalid_url(self): try: with pytest.raises(URLError): From c43fa0658217213a5a7f49e0c187abb4f90a8532 Mon Sep 17 00:00:00 2001 From: How Si Wei Date: Fri, 28 Jun 2019 22:10:20 +0800 Subject: [PATCH 7/9] ENH: Support multiple opening hours intervals for BusinessHour (#26628) --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/tests/tseries/offsets/test_offsets.py | 355 +++++++++++++++++-- pandas/tseries/offsets.py | 284 ++++++++++----- 3 files changed, 526 insertions(+), 114 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index f4dd94b7d918b..1fd0257d93f45 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -158,6 +158,7 @@ Other enhancements - :meth:`DataFrame.describe` now formats integer percentiles without decimal point (:issue:`26660`) - Added support for reading SPSS .sav files using :func:`read_spss` (:issue:`26537`) - Added new option ``plotting.backend`` to be able to select a plotting backend different than the existing ``matplotlib`` one. Use ``pandas.set_option('plotting.backend', '')`` where ``' assert repr(self.offset6) == '' assert repr(self.offset7) == '<-2 * BusinessHours: BH=21:30-06:30>' + assert (repr(self.offset8) == + '') + assert (repr(self.offset9) == + '<3 * BusinessHours: BH=09:00-13:00,22:00-03:00>') + assert (repr(self.offset10) == + '<-1 * BusinessHour: BH=13:00-17:00,23:00-02:00>') def test_with_offset(self): expected = Timestamp('2014-07-01 13:00') @@ -791,25 +848,59 @@ def test_with_offset(self): assert self.d + BusinessHour() * 3 == expected assert self.d + BusinessHour(n=3) == expected - def test_eq(self): - for offset in [self.offset1, self.offset2, self.offset3, self.offset4]: - assert offset == offset + @pytest.mark.parametrize("offset_name", [ + "offset1", + "offset2", + "offset3", + "offset4", + "offset8", + "offset9", + "offset10" + ]) + def test_eq_attribute(self, offset_name): + offset = getattr(self, offset_name) + assert offset == offset + + @pytest.mark.parametrize("offset1,offset2", [ + (BusinessHour(start='09:00'), BusinessHour()), + (BusinessHour(start=['23:00', '13:00'], end=['12:00', '17:00']), + BusinessHour(start=['13:00', '23:00'], end=['17:00', '12:00'])), + ]) + def test_eq(self, offset1, offset2): + assert offset1 == offset2 - assert BusinessHour() != BusinessHour(-1) - assert BusinessHour(start='09:00') == BusinessHour() - assert BusinessHour(start='09:00') != BusinessHour(start='09:01') - assert (BusinessHour(start='09:00', end='17:00') != - BusinessHour(start='17:00', end='09:01')) + @pytest.mark.parametrize("offset1,offset2", [ + (BusinessHour(), BusinessHour(-1)), + (BusinessHour(start='09:00'), BusinessHour(start='09:01')), + (BusinessHour(start='09:00', end='17:00'), + BusinessHour(start='17:00', end='09:01')), + (BusinessHour(start=['13:00', '23:00'], end=['18:00', '07:00']), + BusinessHour(start=['13:00', '23:00'], end=['17:00', '12:00'])), + ]) + def test_neq(self, offset1, offset2): + assert offset1 != offset2 - def test_hash(self): - for offset in [self.offset1, self.offset2, self.offset3, self.offset4]: - assert hash(offset) == hash(offset) + @pytest.mark.parametrize("offset_name", [ + "offset1", + "offset2", + "offset3", + "offset4", + "offset8", + "offset9", + "offset10" + ]) + def test_hash(self, offset_name): + offset = getattr(self, offset_name) + assert offset == offset def test_call(self): assert self.offset1(self.d) == datetime(2014, 7, 1, 11) assert self.offset2(self.d) == datetime(2014, 7, 1, 13) assert self.offset3(self.d) == datetime(2014, 6, 30, 17) assert self.offset4(self.d) == datetime(2014, 6, 30, 14) + assert self.offset8(self.d) == datetime(2014, 7, 1, 11) + assert self.offset9(self.d) == datetime(2014, 7, 1, 22) + assert self.offset10(self.d) == datetime(2014, 7, 1, 1) def test_sub(self): # we have to override test_sub here because self.offset2 is not @@ -830,6 +921,9 @@ def testRollback1(self): assert self.offset5.rollback(self.d) == datetime(2014, 6, 30, 14, 30) assert self.offset6.rollback(self.d) == datetime(2014, 7, 1, 5, 0) assert self.offset7.rollback(self.d) == datetime(2014, 7, 1, 6, 30) + assert self.offset8.rollback(self.d) == self.d + assert self.offset9.rollback(self.d) == self.d + assert self.offset10.rollback(self.d) == datetime(2014, 7, 1, 2) d = datetime(2014, 7, 1, 0) assert self.offset1.rollback(d) == datetime(2014, 6, 30, 17) @@ -839,6 +933,9 @@ def testRollback1(self): assert self.offset5.rollback(d) == datetime(2014, 6, 30, 14, 30) assert self.offset6.rollback(d) == d assert self.offset7.rollback(d) == d + assert self.offset8.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset9.rollback(d) == d + assert self.offset10.rollback(d) == d assert self._offset(5).rollback(self.d) == self.d @@ -857,6 +954,9 @@ def testRollforward1(self): datetime(2014, 7, 1, 20, 0)) assert (self.offset7.rollforward(self.d) == datetime(2014, 7, 1, 21, 30)) + assert self.offset8.rollforward(self.d) == self.d + assert self.offset9.rollforward(self.d) == self.d + assert self.offset10.rollforward(self.d) == datetime(2014, 7, 1, 13) d = datetime(2014, 7, 1, 0) assert self.offset1.rollforward(d) == datetime(2014, 7, 1, 9) @@ -866,6 +966,9 @@ def testRollforward1(self): assert self.offset5.rollforward(d) == datetime(2014, 7, 1, 11) assert self.offset6.rollforward(d) == d assert self.offset7.rollforward(d) == d + assert self.offset8.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset9.rollforward(d) == d + assert self.offset10.rollforward(d) == d assert self._offset(5).rollforward(self.d) == self.d @@ -960,6 +1063,35 @@ def test_normalize(self, case): datetime(2014, 7, 6, 23, 0): False, datetime(2014, 7, 7, 3, 0): False})) + on_offset_cases.append((BusinessHour(start=['09:00', '13:00'], + end=['12:00', '17:00']), { + datetime(2014, 7, 1, 9): True, + datetime(2014, 7, 1, 8, 59): False, + datetime(2014, 7, 1, 8): False, + datetime(2014, 7, 1, 17): True, + datetime(2014, 7, 1, 17, 1): False, + datetime(2014, 7, 1, 18): False, + datetime(2014, 7, 5, 9): False, + datetime(2014, 7, 6, 12): False, + datetime(2014, 7, 1, 12, 30): False})) + + on_offset_cases.append((BusinessHour(start=['19:00', '23:00'], + end=['21:00', '05:00']), { + datetime(2014, 7, 1, 9, 0): False, + datetime(2014, 7, 1, 10, 0): False, + datetime(2014, 7, 1, 15): False, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12, 0): False, + datetime(2014, 7, 6, 12, 0): False, + datetime(2014, 7, 1, 19, 0): True, + datetime(2014, 7, 2, 0, 0): True, + datetime(2014, 7, 4, 23): True, + datetime(2014, 7, 5, 1): True, + datetime(2014, 7, 5, 5, 0): True, + datetime(2014, 7, 6, 23, 0): False, + datetime(2014, 7, 7, 3, 0): False, + datetime(2014, 7, 4, 22): False})) + @pytest.mark.parametrize('case', on_offset_cases) def test_onOffset(self, case): offset, cases = case @@ -1125,6 +1257,76 @@ def test_onOffset(self, case): datetime(2014, 7, 7, 18): (datetime(2014, 7, 7, 17), datetime(2014, 7, 8, 17))})) + opening_time_cases.append(([BusinessHour(start=['11:15', '15:00'], + end=['13:00', '20:00']), + BusinessHour(n=3, start=['11:15', '15:00'], + end=['12:00', '20:00']), + BusinessHour(start=['11:15', '15:00'], + end=['13:00', '17:00']), + BusinessHour(n=2, start=['11:15', '15:00'], + end=['12:00', '03:00']), + BusinessHour(n=3, start=['11:15', '15:00'], + end=['13:00', '16:00'])], { + datetime(2014, 7, 1, 11): (datetime(2014, 7, 1, 11, 15), + datetime(2014, 6, 30, 15)), + datetime(2014, 7, 1, 18): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15)), + datetime(2014, 7, 1, 23): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15)), + datetime(2014, 7, 2, 8): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15)), + datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15)), + datetime(2014, 7, 2, 10): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15)), + datetime(2014, 7, 2, 11, 15): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 2, 11, 15)), + datetime(2014, 7, 2, 11, 15, 1): (datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 11, 15)), + datetime(2014, 7, 5, 10): (datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15)), + datetime(2014, 7, 4, 10): (datetime(2014, 7, 4, 11, 15), + datetime(2014, 7, 3, 15)), + datetime(2014, 7, 4, 23): (datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15)), + datetime(2014, 7, 6, 10): (datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15)), + datetime(2014, 7, 7, 5): (datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15)), + datetime(2014, 7, 7, 9, 1): (datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15)), + datetime(2014, 7, 7, 12): (datetime(2014, 7, 7, 15), + datetime(2014, 7, 7, 11, 15))})) + + opening_time_cases.append(([BusinessHour(n=-1, start=['17:00', '08:00'], + end=['05:00', '10:00']), + BusinessHour(n=-2, start=['08:00', '17:00'], + end=['10:00', '03:00'])], { + datetime(2014, 7, 1, 11): (datetime(2014, 7, 1, 8), + datetime(2014, 7, 1, 17)), + datetime(2014, 7, 1, 18): (datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 8)), + datetime(2014, 7, 1, 23): (datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 8)), + datetime(2014, 7, 2, 8): (datetime(2014, 7, 2, 8), + datetime(2014, 7, 2, 8)), + datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 8), + datetime(2014, 7, 2, 17)), + datetime(2014, 7, 2, 16, 59): (datetime(2014, 7, 2, 8), + datetime(2014, 7, 2, 17)), + datetime(2014, 7, 5, 10): (datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8)), + datetime(2014, 7, 4, 10): (datetime(2014, 7, 4, 8), + datetime(2014, 7, 4, 17)), + datetime(2014, 7, 4, 23): (datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8)), + datetime(2014, 7, 6, 10): (datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8)), + datetime(2014, 7, 7, 5): (datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8)), + datetime(2014, 7, 7, 18): (datetime(2014, 7, 7, 17), + datetime(2014, 7, 8, 8))})) + @pytest.mark.parametrize('case', opening_time_cases) def test_opening_time(self, case): _offsets, cases = case @@ -1303,6 +1505,81 @@ def test_opening_time(self, case): datetime(2014, 7, 7, 3, 30, 30): datetime(2014, 7, 4, 22, 30, 30), datetime(2014, 7, 7, 3, 30, 20): datetime(2014, 7, 4, 22, 30, 20)})) + # multiple business hours + apply_cases.append((BusinessHour(start=['09:00', '14:00'], + end=['12:00', '18:00']), { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 17), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 1, 17, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 9), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 14), + # out of business hours + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 10), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 17, 30): datetime(2014, 7, 7, 9, 30), + datetime(2014, 7, 4, 17, 30, 30): datetime(2014, 7, 7, 9, 30, 30)})) + + apply_cases.append((BusinessHour(n=4, start=['09:00', '14:00'], + end=['12:00', '18:00']), { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 17), + datetime(2014, 7, 1, 13): datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 11), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 14), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 17), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 15), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 15), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 15), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 14), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 11, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 11, 30, 30)})) + + apply_cases.append((BusinessHour(n=-4, start=['09:00', '14:00'], + end=['12:00', '18:00']), { + datetime(2014, 7, 1, 11): datetime(2014, 6, 30, 16), + datetime(2014, 7, 1, 13): datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 15): datetime(2014, 6, 30, 18), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 10), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 11), + datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 12), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 12), + datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 12), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 12), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 12), + datetime(2014, 7, 4, 18): datetime(2014, 7, 4, 12), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 14, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 14, 30, 30)})) + + apply_cases.append((BusinessHour(n=-1, start=['19:00', '03:00'], + end=['01:00', '05:00']), { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 4), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 20): datetime(2014, 7, 2, 5), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 4): datetime(2014, 7, 2, 1), + datetime(2014, 7, 2, 19, 30): datetime(2014, 7, 2, 4, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 23), + datetime(2014, 7, 3, 6): datetime(2014, 7, 3, 4), + datetime(2014, 7, 4, 23): datetime(2014, 7, 4, 22), + datetime(2014, 7, 5, 0): datetime(2014, 7, 4, 23), + datetime(2014, 7, 5, 4): datetime(2014, 7, 5, 0), + datetime(2014, 7, 7, 3, 30): datetime(2014, 7, 5, 0, 30), + datetime(2014, 7, 7, 19, 30): datetime(2014, 7, 7, 4, 30), + datetime(2014, 7, 7, 19, 30, 30): datetime(2014, 7, 7, 4, 30, 30)})) + @pytest.mark.parametrize('case', apply_cases) def test_apply(self, case): offset, cases = case @@ -1359,6 +1636,42 @@ def test_apply(self, case): datetime(2014, 7, 7, 1): datetime(2014, 7, 15, 0), datetime(2014, 7, 7, 23, 30): datetime(2014, 7, 15, 21, 30)})) + # large n for multiple opening hours (3 days and 1 hour before) + apply_large_n_cases.append((BusinessHour(n=-25, start=['09:00', '14:00'], + end=['12:00', '19:00']), { + datetime(2014, 7, 1, 11): datetime(2014, 6, 26, 10), + datetime(2014, 7, 1, 13): datetime(2014, 6, 26, 11), + datetime(2014, 7, 1, 9): datetime(2014, 6, 25, 18), + datetime(2014, 7, 1, 10): datetime(2014, 6, 25, 19), + datetime(2014, 7, 3, 11): datetime(2014, 6, 30, 10), + datetime(2014, 7, 3, 8): datetime(2014, 6, 27, 18), + datetime(2014, 7, 3, 19): datetime(2014, 6, 30, 18), + datetime(2014, 7, 3, 23): datetime(2014, 6, 30, 18), + datetime(2014, 7, 4, 9): datetime(2014, 6, 30, 18), + datetime(2014, 7, 5, 15): datetime(2014, 7, 1, 18), + datetime(2014, 7, 6, 18): datetime(2014, 7, 1, 18), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 1, 18, 30), + datetime(2014, 7, 7, 10, 30, 30): datetime(2014, 7, 2, 9, 30, 30)})) + + # 5 days and 3 hours later + apply_large_n_cases.append((BusinessHour(28, start=['21:00', '03:00'], + end=['01:00', '04:00']), { + datetime(2014, 7, 1, 11): datetime(2014, 7, 9, 0), + datetime(2014, 7, 1, 22): datetime(2014, 7, 9, 3), + datetime(2014, 7, 1, 23): datetime(2014, 7, 9, 21), + datetime(2014, 7, 2, 2): datetime(2014, 7, 9, 23), + datetime(2014, 7, 3, 21): datetime(2014, 7, 11, 0), + datetime(2014, 7, 4, 1): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 2): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 3): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 21): datetime(2014, 7, 12, 0), + datetime(2014, 7, 5, 0): datetime(2014, 7, 14, 22), + datetime(2014, 7, 5, 1): datetime(2014, 7, 14, 23), + datetime(2014, 7, 5, 15): datetime(2014, 7, 14, 23), + datetime(2014, 7, 6, 18): datetime(2014, 7, 14, 23), + datetime(2014, 7, 7, 1): datetime(2014, 7, 14, 23), + datetime(2014, 7, 7, 23, 30): datetime(2014, 7, 15, 21, 30)})) + @pytest.mark.parametrize('case', apply_large_n_cases) def test_apply_large_n(self, case): offset, cases = case diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index ac20ad1669638..087c05574090c 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -17,6 +17,7 @@ from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.core.dtypes.generic import ABCPeriod +from pandas.core.dtypes.inference import is_list_like from pandas.core.tools.datetimes import to_datetime @@ -581,9 +582,44 @@ class BusinessHourMixin(BusinessMixin): def __init__(self, start='09:00', end='17:00', offset=timedelta(0)): # must be validated here to equality check - start = liboffsets._validate_business_time(start) + if not is_list_like(start): + start = [start] + if not len(start): + raise ValueError('Must include at least 1 start time') + + if not is_list_like(end): + end = [end] + if not len(end): + raise ValueError('Must include at least 1 end time') + + start = np.array([liboffsets._validate_business_time(x) + for x in start]) + end = np.array([liboffsets._validate_business_time(x) for x in end]) + + # Validation of input + if len(start) != len(end): + raise ValueError('number of starting time and ending time ' + 'must be the same') + num_openings = len(start) + + # sort starting and ending time by starting time + index = np.argsort(start) + + # convert to tuple so that start and end are hashable + start = tuple(start[index]) + end = tuple(end[index]) + + total_secs = 0 + for i in range(num_openings): + total_secs += self._get_business_hours_by_sec(start[i], end[i]) + total_secs += self._get_business_hours_by_sec( + end[i], start[(i + 1) % num_openings]) + if total_secs != 24 * 60 * 60: + raise ValueError('invalid starting and ending time(s): ' + 'opening hours should not touch or overlap with ' + 'one another') + object.__setattr__(self, "start", start) - end = liboffsets._validate_business_time(end) object.__setattr__(self, "end", end) object.__setattr__(self, "_offset", offset) @@ -605,62 +641,93 @@ def next_bday(self): else: return BusinessDay(n=nb_offset) - @cache_readonly - def _get_daytime_flag(self): - if self.start == self.end: - raise ValueError('start and end must not be the same') - elif self.start < self.end: - return True - else: - return False - - def _next_opening_time(self, other): + def _next_opening_time(self, other, sign=1): """ - If n is positive, return tomorrow's business day opening time. - Otherwise yesterday's business day's opening time. + If self.n and sign have the same sign, return the earliest opening time + later than or equal to current time. + Otherwise the latest opening time earlier than or equal to current + time. Opening time always locates on BusinessDay. - Otherwise, closing time may not if business hour extends over midnight. + However, closing time may not if business hour extends over midnight. + + Parameters + ---------- + other : datetime + Current time. + sign : int, default 1. + Either 1 or -1. Going forward in time if it has the same sign as + self.n. Going backward in time otherwise. + + Returns + ------- + result : datetime + Next opening time. """ + earliest_start = self.start[0] + latest_start = self.start[-1] + if not self.next_bday.onOffset(other): - other = other + self.next_bday + # today is not business day + other = other + sign * self.next_bday + if self.n * sign >= 0: + hour, minute = earliest_start.hour, earliest_start.minute + else: + hour, minute = latest_start.hour, latest_start.minute else: - if self.n >= 0 and self.start < other.time(): - other = other + self.next_bday - elif self.n < 0 and other.time() < self.start: - other = other + self.next_bday - return datetime(other.year, other.month, other.day, - self.start.hour, self.start.minute) + if self.n * sign >= 0: + if latest_start < other.time(): + # current time is after latest starting time in today + other = other + sign * self.next_bday + hour, minute = earliest_start.hour, earliest_start.minute + else: + # find earliest starting time no earlier than current time + for st in self.start: + if other.time() <= st: + hour, minute = st.hour, st.minute + break + else: + if other.time() < earliest_start: + # current time is before earliest starting time in today + other = other + sign * self.next_bday + hour, minute = latest_start.hour, latest_start.minute + else: + # find latest starting time no later than current time + for st in reversed(self.start): + if other.time() >= st: + hour, minute = st.hour, st.minute + break + + return datetime(other.year, other.month, other.day, hour, minute) def _prev_opening_time(self, other): """ - If n is positive, return yesterday's business day opening time. - Otherwise yesterday business day's opening time. + If n is positive, return the latest opening time earlier than or equal + to current time. + Otherwise the earliest opening time later than or equal to current + time. + + Parameters + ---------- + other : datetime + Current time. + + Returns + ------- + result : datetime + Previous opening time. """ - if not self.next_bday.onOffset(other): - other = other - self.next_bday - else: - if self.n >= 0 and other.time() < self.start: - other = other - self.next_bday - elif self.n < 0 and other.time() > self.start: - other = other - self.next_bday - return datetime(other.year, other.month, other.day, - self.start.hour, self.start.minute) + return self._next_opening_time(other, sign=-1) - @cache_readonly - def _get_business_hours_by_sec(self): + def _get_business_hours_by_sec(self, start, end): """ Return business hours in a day by seconds. """ - if self._get_daytime_flag: - # create dummy datetime to calculate businesshours in a day - dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute) - until = datetime(2014, 4, 1, self.end.hour, self.end.minute) - return (until - dtstart).total_seconds() - else: - dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute) - until = datetime(2014, 4, 2, self.end.hour, self.end.minute) - return (until - dtstart).total_seconds() + # create dummy datetime to calculate businesshours in a day + dtstart = datetime(2014, 4, 1, start.hour, start.minute) + day = 1 if start < end else 2 + until = datetime(2014, 4, day, end.hour, end.minute) + return int((until - dtstart).total_seconds()) @apply_wraps def rollback(self, dt): @@ -668,13 +735,11 @@ def rollback(self, dt): Roll provided date backward to next offset only if not on offset. """ if not self.onOffset(dt): - businesshours = self._get_business_hours_by_sec if self.n >= 0: - dt = self._prev_opening_time( - dt) + timedelta(seconds=businesshours) + dt = self._prev_opening_time(dt) else: - dt = self._next_opening_time( - dt) + timedelta(seconds=businesshours) + dt = self._next_opening_time(dt) + return self._get_closing_time(dt) return dt @apply_wraps @@ -689,11 +754,28 @@ def rollforward(self, dt): return self._prev_opening_time(dt) return dt + def _get_closing_time(self, dt): + """ + Get the closing time of a business hour interval by its opening time. + + Parameters + ---------- + dt : datetime + Opening time of a business hour interval. + + Returns + ------- + result : datetime + Corresponding closing time. + """ + for i, st in enumerate(self.start): + if st.hour == dt.hour and st.minute == dt.minute: + return dt + timedelta( + seconds=self._get_business_hours_by_sec(st, self.end[i])) + assert False + @apply_wraps def apply(self, other): - businesshours = self._get_business_hours_by_sec - bhdelta = timedelta(seconds=businesshours) - if isinstance(other, datetime): # used for detecting edge condition nanosecond = getattr(other, 'nanosecond', 0) @@ -703,63 +785,75 @@ def apply(self, other): other.hour, other.minute, other.second, other.microsecond) n = self.n + + # adjust other to reduce number of cases to handle if n >= 0: - if (other.time() == self.end or - not self._onOffset(other, businesshours)): + if (other.time() in self.end or + not self._onOffset(other)): other = self._next_opening_time(other) else: - if other.time() == self.start: + if other.time() in self.start: # adjustment to move to previous business day other = other - timedelta(seconds=1) - if not self._onOffset(other, businesshours): + if not self._onOffset(other): other = self._next_opening_time(other) - other = other + bhdelta + other = self._get_closing_time(other) + + # get total business hours by sec in one business day + businesshours = sum(self._get_business_hours_by_sec(st, en) + for st, en in zip(self.start, self.end)) bd, r = divmod(abs(n * 60), businesshours // 60) if n < 0: bd, r = -bd, -r + # adjust by business days first if bd != 0: skip_bd = BusinessDay(n=bd) # midnight business hour may not on BusinessDay if not self.next_bday.onOffset(other): - remain = other - self._prev_opening_time(other) - other = self._next_opening_time(other + skip_bd) + remain + prev_open = self._prev_opening_time(other) + remain = other - prev_open + other = prev_open + skip_bd + remain else: other = other + skip_bd - hours, minutes = divmod(r, 60) - result = other + timedelta(hours=hours, minutes=minutes) - - # because of previous adjustment, time will be larger than start - if n >= 0: - bday_edge = self._prev_opening_time(other) + bhdelta - if bday_edge < result: - bday_remain = result - bday_edge - result = self._next_opening_time(other) - result += bday_remain - else: - bday_edge = self._next_opening_time(other) - if bday_edge > result: - bday_remain = result - bday_edge - result = self._next_opening_time(result) + bhdelta - result += bday_remain + # remaining business hours to adjust + bhour_remain = timedelta(minutes=r) - # edge handling if n >= 0: - if result.time() == self.end: - result = self._next_opening_time(result) + while bhour_remain != timedelta(0): + # business hour left in this business time interval + bhour = self._get_closing_time( + self._prev_opening_time(other)) - other + if bhour_remain < bhour: + # finish adjusting if possible + other += bhour_remain + bhour_remain = timedelta(0) + else: + # go to next business time interval + bhour_remain -= bhour + other = self._next_opening_time(other + bhour) else: - if result.time() == self.start and nanosecond == 0: - # adjustment to move to previous business day - result = self._next_opening_time( - result - timedelta(seconds=1)) + bhdelta + while bhour_remain != timedelta(0): + # business hour left in this business time interval + bhour = self._next_opening_time(other) - other + if (bhour_remain > bhour or + bhour_remain == bhour and nanosecond != 0): + # finish adjusting if possible + other += bhour_remain + bhour_remain = timedelta(0) + else: + # go to next business time interval + bhour_remain -= bhour + other = self._get_closing_time( + self._next_opening_time( + other + bhour - timedelta(seconds=1))) - return result + return other else: - # TODO: Figure out the end of this sente raise ApplyTypeError( - 'Only know how to combine business hour with ') + 'Only know how to combine business hour with datetime') def onOffset(self, dt): if self.normalize and not _is_normalized(dt): @@ -770,10 +864,9 @@ def onOffset(self, dt): dt.minute, dt.second, dt.microsecond) # Valid BH can be on the different BusinessDay during midnight # Distinguish by the time spent from previous opening time - businesshours = self._get_business_hours_by_sec - return self._onOffset(dt, businesshours) + return self._onOffset(dt) - def _onOffset(self, dt, businesshours): + def _onOffset(self, dt): """ Slight speedups using calculated values. """ @@ -786,6 +879,11 @@ def _onOffset(self, dt, businesshours): else: op = self._next_opening_time(dt) span = (dt - op).total_seconds() + businesshours = 0 + for i, st in enumerate(self.start): + if op.hour == st.hour and op.minute == st.minute: + businesshours = self._get_business_hours_by_sec( + st, self.end[i]) if span <= businesshours: return True else: @@ -793,17 +891,17 @@ def _onOffset(self, dt, businesshours): def _repr_attrs(self): out = super()._repr_attrs() - start = self.start.strftime('%H:%M') - end = self.end.strftime('%H:%M') - attrs = ['{prefix}={start}-{end}'.format(prefix=self._prefix, - start=start, end=end)] + hours = ','.join('{}-{}'.format( + st.strftime('%H:%M'), en.strftime('%H:%M')) + for st, en in zip(self.start, self.end)) + attrs = ['{prefix}={hours}'.format(prefix=self._prefix, hours=hours)] out += ': ' + ', '.join(attrs) return out class BusinessHour(BusinessHourMixin, SingleConstructorOffset): """ - DateOffset subclass representing possibly n business days. + DateOffset subclass representing possibly n business hours. .. versionadded:: 0.16.1 """ From 05f3d439d268780b5672a73857714f6440f2df74 Mon Sep 17 00:00:00 2001 From: Mak Sze Chun Date: Fri, 28 Jun 2019 22:12:23 +0800 Subject: [PATCH 8/9] =?UTF-8?q?[CI]=20Add=20pytest-azurepipelines=20in=20?= =?UTF-8?q?=E2=80=8Bpandas-dev=20(#26620)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ci/deps/azure-35-compat.yaml | 1 + ci/deps/azure-36-locale.yaml | 1 + ci/deps/azure-36-locale_slow.yaml | 1 + ci/deps/azure-37-locale.yaml | 1 + ci/deps/azure-37-numpydev.yaml | 1 + ci/deps/azure-macos-35.yaml | 1 + ci/deps/azure-windows-36.yaml | 1 + ci/deps/azure-windows-37.yaml | 1 + 8 files changed, 8 insertions(+) diff --git a/ci/deps/azure-35-compat.yaml b/ci/deps/azure-35-compat.yaml index c783670e78d52..fe207d122657b 100644 --- a/ci/deps/azure-35-compat.yaml +++ b/ci/deps/azure-35-compat.yaml @@ -22,6 +22,7 @@ dependencies: - hypothesis>=3.58.0 - pytest-xdist - pytest-mock + - pytest-azurepipelines - pip - pip: # for python 3.5, pytest>=4.0.2 is not available in conda diff --git a/ci/deps/azure-36-locale.yaml b/ci/deps/azure-36-locale.yaml index fbb240734d45d..99fa4d5c9e160 100644 --- a/ci/deps/azure-36-locale.yaml +++ b/ci/deps/azure-36-locale.yaml @@ -23,6 +23,7 @@ dependencies: - pytest>=4.0.2 - pytest-xdist - pytest-mock + - pytest-azurepipelines - hypothesis>=3.58.0 - pip - pip: diff --git a/ci/deps/azure-36-locale_slow.yaml b/ci/deps/azure-36-locale_slow.yaml index 9ddc782da930e..2bf2bd74795d2 100644 --- a/ci/deps/azure-36-locale_slow.yaml +++ b/ci/deps/azure-36-locale_slow.yaml @@ -29,6 +29,7 @@ dependencies: - pytest>=4.0.2 - pytest-xdist - pytest-mock + - pytest-azurepipelines - moto - pip - pip: diff --git a/ci/deps/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml index 2ebb7dda86e36..bd8ba912d5298 100644 --- a/ci/deps/azure-37-locale.yaml +++ b/ci/deps/azure-37-locale.yaml @@ -28,6 +28,7 @@ dependencies: - pytest>=4.0.2 - pytest-xdist - pytest-mock + - pytest-azurepipelines - pip - pip: - hypothesis>=3.58.0 diff --git a/ci/deps/azure-37-numpydev.yaml b/ci/deps/azure-37-numpydev.yaml index 831f13fb421f0..c56dc819a90b1 100644 --- a/ci/deps/azure-37-numpydev.yaml +++ b/ci/deps/azure-37-numpydev.yaml @@ -17,3 +17,4 @@ dependencies: - "--pre" - "numpy" - "scipy" + - pytest-azurepipelines diff --git a/ci/deps/azure-macos-35.yaml b/ci/deps/azure-macos-35.yaml index 24c753e16d98d..0b96dd9762ef5 100644 --- a/ci/deps/azure-macos-35.yaml +++ b/ci/deps/azure-macos-35.yaml @@ -29,3 +29,4 @@ dependencies: - pytest-xdist - pytest-mock - hypothesis>=3.58.0 + - pytest-azurepipelines diff --git a/ci/deps/azure-windows-36.yaml b/ci/deps/azure-windows-36.yaml index b1795059091b9..b0f3f5389ac85 100644 --- a/ci/deps/azure-windows-36.yaml +++ b/ci/deps/azure-windows-36.yaml @@ -26,4 +26,5 @@ dependencies: - pytest>=4.0.2 - pytest-xdist - pytest-mock + - pytest-azurepipelines - hypothesis>=3.58.0 diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml index 5bdc29e0eec80..43504dec26953 100644 --- a/ci/deps/azure-windows-37.yaml +++ b/ci/deps/azure-windows-37.yaml @@ -28,6 +28,7 @@ dependencies: - pytest>=4.0.2 - pytest-xdist - pytest-mock + - pytest-azurepipelines - moto - hypothesis>=3.58.0 - pyreadstat From 5bee2e39975ed1dc8bbd21d8603fa08f201284f3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 28 Jun 2019 11:01:34 -0500 Subject: [PATCH 9/9] dedent --- pandas/core/reshape/merge.py | 59 ++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 42d90773e124d..549c69486ebfa 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1821,32 +1821,33 @@ def _items_overlap_with_suffix(left, lsuffix, right, rsuffix): to_rename = left.intersection(right) if len(to_rename) == 0: return left, right - else: - if not lsuffix and not rsuffix: - raise ValueError('columns overlap but no suffix specified: ' - '{rename}'.format(rename=to_rename)) - - def renamer(x, suffix): - """Rename the left and right indices. - - If there is overlap, and suffix is not None, add - suffix, otherwise, leave it as-is. - - Parameters - ---------- - x : original column name - suffix : str or None - - Returns - ------- - x : renamed column name - """ - if x in to_rename and suffix is not None: - return '{x}{suffix}'.format(x=x, suffix=suffix) - return x - - lrenamer = partial(renamer, suffix=lsuffix) - rrenamer = partial(renamer, suffix=rsuffix) - - return (_transform_index(left, lrenamer), - _transform_index(right, rrenamer)) + + if not lsuffix and not rsuffix: + raise ValueError('columns overlap but no suffix specified: ' + '{rename}'.format(rename=to_rename)) + + def renamer(x, suffix): + """ + Rename the left and right indices. + + If there is overlap, and suffix is not None, add + suffix, otherwise, leave it as-is. + + Parameters + ---------- + x : original column name + suffix : str or None + + Returns + ------- + x : renamed column name + """ + if x in to_rename and suffix is not None: + return '{x}{suffix}'.format(x=x, suffix=suffix) + return x + + lrenamer = partial(renamer, suffix=lsuffix) + rrenamer = partial(renamer, suffix=rsuffix) + + return (_transform_index(left, lrenamer), + _transform_index(right, rrenamer))