Skip to content

Commit f6a1c41

Browse files
sinhrksjreback
authored andcommitted
BUG: subclassed .align returns normal DataFrame
closes #12983 Author: sinhrks <sinhrks@gmail.com> Closes #13037 from sinhrks/align_subclass and squashes the following commits: 32ced80 [sinhrks] BUG: subclassed .align returns normal DataFrame
1 parent db35ff4 commit f6a1c41

File tree

5 files changed

+98
-16
lines changed

5 files changed

+98
-16
lines changed

doc/source/whatsnew/v0.18.1.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,8 @@ Bug Fixes
510510
- Potential segfault in ``DataFrame.to_json`` when serialising ``datetime.time`` (:issue:`11473`).
511511
- Potential segfault in ``DataFrame.to_json`` when attempting to serialise 0d array (:issue:`11299`).
512512
- Segfault in ``to_json`` when attempting to serialise a ``DataFrame`` or ``Series`` with non-ndarray values (:issue:`10778`).
513-
513+
- Bug in ``.align`` when sub-classing not returning the sub-class (:issue:`12983`)
514+
- Bug in aligning a ``Series`` with a ``DataFrame`` (:issue:`13037`)
514515

515516

516517

pandas/core/generic.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4144,18 +4144,19 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
41444144
if isinstance(self, Series):
41454145
# this means other is a DataFrame, and we need to broadcast
41464146
# self
4147-
df = DataFrame(
4148-
dict((c, self) for c in other.columns),
4149-
**other._construct_axes_dict())
4147+
cons = self._constructor_expanddim
4148+
df = cons(dict((c, self) for c in other.columns),
4149+
**other._construct_axes_dict())
41504150
return df._align_frame(other, join=join, axis=axis,
41514151
level=level, copy=copy,
41524152
fill_value=fill_value, method=method,
41534153
limit=limit, fill_axis=fill_axis)
41544154
elif isinstance(other, Series):
41554155
# this means self is a DataFrame, and we need to broadcast
41564156
# other
4157-
df = DataFrame(dict((c, other) for c in self.columns),
4158-
**self._construct_axes_dict())
4157+
cons = other._constructor_expanddim
4158+
df = cons(dict((c, other) for c in self.columns),
4159+
**self._construct_axes_dict())
41594160
return self._align_frame(df, join=join, axis=axis, level=level,
41604161
copy=copy, fill_value=fill_value,
41614162
method=method, limit=limit,
@@ -4184,20 +4185,27 @@ def _align_frame(self, other, join='outer', axis=None, level=None,
41844185
ilidx, iridx = None, None
41854186
clidx, cridx = None, None
41864187

4188+
is_series = isinstance(self, ABCSeries)
4189+
41874190
if axis is None or axis == 0:
41884191
if not self.index.equals(other.index):
41894192
join_index, ilidx, iridx = self.index.join(
41904193
other.index, how=join, level=level, return_indexers=True)
41914194

41924195
if axis is None or axis == 1:
4193-
if not self.columns.equals(other.columns):
4196+
if not is_series and not self.columns.equals(other.columns):
41944197
join_columns, clidx, cridx = self.columns.join(
41954198
other.columns, how=join, level=level, return_indexers=True)
41964199

4197-
left = self._reindex_with_indexers({0: [join_index, ilidx],
4198-
1: [join_columns, clidx]},
4199-
copy=copy, fill_value=fill_value,
4200+
if is_series:
4201+
reindexers = {0: [join_index, ilidx]}
4202+
else:
4203+
reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]}
4204+
4205+
left = self._reindex_with_indexers(reindexers, copy=copy,
4206+
fill_value=fill_value,
42004207
allow_dups=True)
4208+
# other must be always DataFrame
42014209
right = other._reindex_with_indexers({0: [join_index, iridx],
42024210
1: [join_columns, cridx]},
42034211
copy=copy, fill_value=fill_value,
@@ -4212,10 +4220,8 @@ def _align_frame(self, other, join='outer', axis=None, level=None,
42124220
def _align_series(self, other, join='outer', axis=None, level=None,
42134221
copy=True, fill_value=None, method=None, limit=None,
42144222
fill_axis=0):
4215-
from pandas import DataFrame
4216-
4217-
# series/series compat
4218-
if isinstance(self, ABCSeries) and isinstance(other, ABCSeries):
4223+
# series/series compat, other must always be a Series
4224+
if isinstance(self, ABCSeries):
42194225
if axis:
42204226
raise ValueError('cannot align series to a series other than '
42214227
'axis 0')
@@ -4261,7 +4267,7 @@ def _align_series(self, other, join='outer', axis=None, level=None,
42614267
if copy and fdata is self._data:
42624268
fdata = fdata.copy()
42634269

4264-
left = DataFrame(fdata)
4270+
left = self._constructor(fdata)
42654271

42664272
if ridx is None:
42674273
right = other

pandas/tests/frame/test_axis_select_reindex.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,27 @@ def test_align_multiindex(self):
597597
assert_frame_equal(expr, res1r)
598598
assert_frame_equal(expr, res2l)
599599

600+
def test_align_series_combinations(self):
601+
df = pd.DataFrame({'a': [1, 3, 5],
602+
'b': [1, 3, 5]}, index=list('ACE'))
603+
s = pd.Series([1, 2, 4], index=list('ABD'), name='x')
604+
605+
# frame + series
606+
res1, res2 = df.align(s, axis=0)
607+
exp1 = pd.DataFrame({'a': [1, np.nan, 3, np.nan, 5],
608+
'b': [1, np.nan, 3, np.nan, 5]},
609+
index=list('ABCDE'))
610+
exp2 = pd.Series([1, 2, np.nan, 4, np.nan],
611+
index=list('ABCDE'), name='x')
612+
613+
tm.assert_frame_equal(res1, exp1)
614+
tm.assert_series_equal(res2, exp2)
615+
616+
# series + frame
617+
res1, res2 = s.align(df)
618+
tm.assert_series_equal(res1, exp2)
619+
tm.assert_frame_equal(res2, exp1)
620+
600621
def test_filter(self):
601622
# items
602623
filtered = self.frame.filter(['A', 'B', 'E'])

pandas/tests/frame/test_subclass.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from __future__ import print_function
44

5+
import numpy as np
6+
57
from pandas import DataFrame, Series, MultiIndex, Panel
68
import pandas as pd
79
import pandas.util.testing as tm
@@ -156,3 +158,55 @@ def bar(self):
156158
return self.i_dont_exist
157159
with tm.assertRaisesRegexp(AttributeError, '.*i_dont_exist.*'):
158160
A().bar
161+
162+
def test_subclass_align(self):
163+
# GH 12983
164+
df1 = tm.SubclassedDataFrame({'a': [1, 3, 5],
165+
'b': [1, 3, 5]}, index=list('ACE'))
166+
df2 = tm.SubclassedDataFrame({'c': [1, 2, 4],
167+
'd': [1, 2, 4]}, index=list('ABD'))
168+
169+
res1, res2 = df1.align(df2, axis=0)
170+
exp1 = tm.SubclassedDataFrame({'a': [1, np.nan, 3, np.nan, 5],
171+
'b': [1, np.nan, 3, np.nan, 5]},
172+
index=list('ABCDE'))
173+
exp2 = tm.SubclassedDataFrame({'c': [1, 2, np.nan, 4, np.nan],
174+
'd': [1, 2, np.nan, 4, np.nan]},
175+
index=list('ABCDE'))
176+
tm.assertIsInstance(res1, tm.SubclassedDataFrame)
177+
tm.assert_frame_equal(res1, exp1)
178+
tm.assertIsInstance(res2, tm.SubclassedDataFrame)
179+
tm.assert_frame_equal(res2, exp2)
180+
181+
res1, res2 = df1.a.align(df2.c)
182+
tm.assertIsInstance(res1, tm.SubclassedSeries)
183+
tm.assert_series_equal(res1, exp1.a)
184+
tm.assertIsInstance(res2, tm.SubclassedSeries)
185+
tm.assert_series_equal(res2, exp2.c)
186+
187+
def test_subclass_align_combinations(self):
188+
# GH 12983
189+
df = tm.SubclassedDataFrame({'a': [1, 3, 5],
190+
'b': [1, 3, 5]}, index=list('ACE'))
191+
s = tm.SubclassedSeries([1, 2, 4], index=list('ABD'), name='x')
192+
193+
# frame + series
194+
res1, res2 = df.align(s, axis=0)
195+
exp1 = pd.DataFrame({'a': [1, np.nan, 3, np.nan, 5],
196+
'b': [1, np.nan, 3, np.nan, 5]},
197+
index=list('ABCDE'))
198+
# name is lost when
199+
exp2 = pd.Series([1, 2, np.nan, 4, np.nan],
200+
index=list('ABCDE'), name='x')
201+
202+
tm.assertIsInstance(res1, tm.SubclassedDataFrame)
203+
tm.assert_frame_equal(res1, exp1)
204+
tm.assertIsInstance(res2, tm.SubclassedSeries)
205+
tm.assert_series_equal(res2, exp2)
206+
207+
# series + frame
208+
res1, res2 = s.align(df)
209+
tm.assertIsInstance(res1, tm.SubclassedSeries)
210+
tm.assert_series_equal(res1, exp2)
211+
tm.assertIsInstance(res2, tm.SubclassedDataFrame)
212+
tm.assert_frame_equal(res2, exp1)

pandas/util/testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2420,7 +2420,7 @@ def inner(*args, **kwargs):
24202420

24212421

24222422
class SubclassedSeries(Series):
2423-
_metadata = ['testattr']
2423+
_metadata = ['testattr', 'name']
24242424

24252425
@property
24262426
def _constructor(self):

0 commit comments

Comments
 (0)