From 2e1800d5360c6997400592d55241bb75cff4fbb2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 15 Sep 2016 19:14:00 +0200 Subject: [PATCH 1/2] BUG: fix alignment in series ops (GH14227) --- pandas/core/ops.py | 13 ++++++++----- pandas/tests/series/test_operators.py | 8 ++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 237b9394dfc25..4fe40451b5452 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -622,11 +622,14 @@ def _align_method_SERIES(left, right, align_asobject=False): left, right = left.align(right, copy=False) - index, lidx, ridx = left.index.join(right.index, how='outer', - return_indexers=True) - # if DatetimeIndex have different tz, convert to UTC - left.index = index - right.index = index + if is_datetime64tz_dtype(left.index): + if left.index.tz != right.index.tz: + # if DatetimeIndex have different tz, convert to UTC + index, lidx, ridx = left.index.join( + right.index, how='outer', return_indexers=True) + + left.index = index + right.index = index return left, right diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 24c26276ea24d..f688ec2d43789 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1810,3 +1810,11 @@ def test_dti_tz_convert_to_utc(self): res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) assert_series_equal(res, Series([np.nan, 3, np.nan], index=base)) + + def test_op_duplicate_index(self): + # GH14227 + s1 = Series([1, 2], index=[1, 1]) + s2 = Series([10, 10], index=[1, 2]) + result = s1 + s2 + expected = pd.Series([11, 12, np.nan], index=[1, 1, 2]) + assert_series_equal(result, expected) From 66d35d19c8a9f481bfba6805e427411d9130b198 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 30 Sep 2016 10:43:56 +0200 Subject: [PATCH 2/2] BUG: fix align in case of different tz but aligning values --- pandas/core/generic.py | 22 +++++++++++++++++++++- pandas/core/ops.py | 9 --------- pandas/tseries/tests/test_timezones.py | 22 ++++++++++++++++++++++ 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a3cac2d6f9f2f..4aa1ac4a47090 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -20,6 +20,7 @@ is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype, + is_datetime64tz_dtype, is_list_like, is_dict_like, is_re_compilable) @@ -4438,13 +4439,23 @@ def _align_frame(self, other, join='outer', axis=None, level=None, left = left.fillna(axis=fill_axis, method=method, limit=limit) right = right.fillna(axis=fill_axis, method=method, limit=limit) + # if DatetimeIndex have different tz, convert to UTC + if is_datetime64tz_dtype(left.index): + if left.index.tz != right.index.tz: + if join_index is not None: + left.index = join_index + right.index = join_index + return left.__finalize__(self), right.__finalize__(other) def _align_series(self, other, join='outer', axis=None, level=None, copy=True, fill_value=None, method=None, limit=None, fill_axis=0): + + is_series = isinstance(self, ABCSeries) + # series/series compat, other must always be a Series - if isinstance(self, ABCSeries): + if is_series: if axis: raise ValueError('cannot align series to a series other than ' 'axis 0') @@ -4503,6 +4514,15 @@ def _align_series(self, other, join='outer', axis=None, level=None, left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis) right = right.fillna(fill_value, method=method, limit=limit) + + # if DatetimeIndex have different tz, convert to UTC + if is_series or (not is_series and axis == 0): + if is_datetime64tz_dtype(left.index): + if left.index.tz != right.index.tz: + if join_index is not None: + left.index = join_index + right.index = join_index + return left.__finalize__(self), right.__finalize__(other) def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 4fe40451b5452..7cff1104c50be 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -622,15 +622,6 @@ def _align_method_SERIES(left, right, align_asobject=False): left, right = left.align(right, copy=False) - if is_datetime64tz_dtype(left.index): - if left.index.tz != right.index.tz: - # if DatetimeIndex have different tz, convert to UTC - index, lidx, ridx = left.index.join( - right.index, how='outer', return_indexers=True) - - left.index = index - right.index = index - return left, right diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index b8247fe01b3f2..a85a606075911 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -1290,6 +1290,28 @@ def test_align_aware(self): self.assertEqual(df1.index.tz, new1.index.tz) self.assertEqual(df2.index.tz, new2.index.tz) + # # different timezones convert to UTC + + # frame + df1_central = df1.tz_convert('US/Central') + new1, new2 = df1.align(df1_central) + self.assertEqual(new1.index.tz, pytz.UTC) + self.assertEqual(new2.index.tz, pytz.UTC) + + # series + new1, new2 = df1[0].align(df1_central[0]) + self.assertEqual(new1.index.tz, pytz.UTC) + self.assertEqual(new2.index.tz, pytz.UTC) + + # combination + new1, new2 = df1.align(df1_central[0], axis=0) + self.assertEqual(new1.index.tz, pytz.UTC) + self.assertEqual(new2.index.tz, pytz.UTC) + + df1[0].align(df1_central, axis=0) + self.assertEqual(new1.index.tz, pytz.UTC) + self.assertEqual(new2.index.tz, pytz.UTC) + def test_append_aware(self): rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='US/Eastern')