-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: accept dict of column:dtype as dtype argument in DataFrame.astype #12086
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
0aeee8d
58dd71b
43989fd
f0e47a9
d0734ba
2a99394
4aa6323
4de83d2
c9ffd78
e5c18b4
00d4ec3
82f54bd
01dd111
feee089
b385799
2de2884
f637aa3
62bed0e
4e4a7d9
62fc481
20ea406
00e0f3e
2429ec5
009d1df
86f68e6
4b50149
eeccd05
070e877
2a120cf
fecb2ca
123f2ee
cc25040
72164a8
9d44e63
8e2f70b
f5c24d2
d2b5819
6f90340
82bdc1d
b88eb35
19ebee5
f8a11dd
afde718
9a6ce07
8662cb9
75714de
69ad08b
e0a2e3b
b638f18
8749273
da5fc17
b4e2d34
f2ce0ac
57ea76f
9662d91
a67ac2a
5d67720
456dcae
db43824
40b4bb4
4b05055
0f1666d
e8d9e79
ae2ca83
c2ea8fb
af4ed0f
70be8a9
721be62
ed4cd3a
cc1025a
d6f814c
9e7bfdd
c0850ea
352ae44
132c1c5
d191640
f3d7c18
8bbd2bc
2e3c82e
45bab82
fcd73ad
99e78da
ce56542
0c6226c
2061e9e
103f7d3
faf9b7d
eca7891
863cbc5
5a9b498
e90d411
b722222
3600bca
29ecec0
95a029b
9d8e1b5
c960523
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -143,7 +143,7 @@ def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): | |
|
||
@property | ||
def _constructor(self): | ||
"""Used when a manipulation result has the same dimesions as the | ||
"""Used when a manipulation result has the same dimensions as the | ||
original. | ||
""" | ||
raise AbstractMethodError(self) | ||
|
@@ -2930,22 +2930,30 @@ def blocks(self): | |
"""Internal property, property synonym for as_blocks()""" | ||
return self.as_blocks() | ||
|
||
def astype(self, dtype, copy=True, raise_on_error=True, **kwargs): | ||
def astype(self, dtype, copy=True, inplace=False, raise_on_error=True, | ||
**kwargs): | ||
""" | ||
Cast object to input numpy.dtype | ||
Return a copy when copy = True (be really careful with this!) | ||
|
||
Parameters | ||
---------- | ||
dtype : numpy.dtype or Python type | ||
copy : deprecated; use inplace instead | ||
inplace : boolean, default False | ||
Modify the NDFrame in place (do not create a new object) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. version added tag There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually I don't think we can have an The copy flag was just to prevent copies if we are already the same dtype. SO let's leave all of that alone. |
||
raise_on_error : raise on invalid input | ||
kwargs : keyword arguments to pass on to the constructor | ||
|
||
Returns | ||
------- | ||
casted : type of caller | ||
""" | ||
|
||
if inplace: | ||
new_data = self._data.astype(dtype=dtype, copy=False, | ||
raise_on_error=raise_on_error, | ||
**kwargs) | ||
self._update_inplace(new_data) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. verbose, you can do this like:
|
||
return | ||
mgr = self._data.astype(dtype=dtype, copy=copy, | ||
raise_on_error=raise_on_error, **kwargs) | ||
return self._constructor(mgr).__finalize__(self) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -372,6 +372,76 @@ def test_astype_str(self): | |
expected = DataFrame(['1.12345678901']) | ||
assert_frame_equal(result, expected) | ||
|
||
def test_astype_dict(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can test here, but test for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a more elegant way to handle the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can just test its ok, e.g. if |
||
# GH7271 | ||
a = Series(date_range('2010-01-04', periods=5)) | ||
b = Series(range(5)) | ||
c = Series([0.0, 0.2, 0.4, 0.6, 0.8]) | ||
d = Series(['1.0', '2', '3.14', '4', '5.4']) | ||
df = DataFrame({'a': a, 'b': b, 'c': c, 'd': d}) | ||
original = df.copy(deep=True) | ||
|
||
# change type of a subset of columns | ||
expected = DataFrame({ | ||
'a': a, | ||
'b': Series(['0', '1', '2', '3', '4']), | ||
'c': c, | ||
'd': Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype='float32')}) | ||
astyped = df.astype({'b': 'str', 'd': 'float32'}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
assert_frame_equal(astyped, expected) | ||
assert_frame_equal(df, original) | ||
self.assertEqual(astyped.b.dtype, 'object') | ||
self.assertEqual(astyped.d.dtype, 'float32') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't use assertEqual like this, simply use |
||
|
||
# change all columns | ||
assert_frame_equal(df.astype({'a': str, 'b': str, 'c': str, 'd': str}), | ||
df.astype(str)) | ||
assert_frame_equal(df, original) | ||
|
||
# error should be raised when using something other than column labels | ||
# in the keys of the dtype dict | ||
self.assertRaises(KeyError, df.astype, {'b': str, 2: str}) | ||
self.assertRaises(KeyError, df.astype, {'e': str}) | ||
assert_frame_equal(df, original) | ||
|
||
# if the dtypes provided are the same as the original dtypes, the | ||
# resulting DataFrame should be the same as the original DataFrame | ||
equiv = df.astype({col: df[col].dtype for col in df.columns}) | ||
assert_frame_equal(df, equiv) | ||
assert_frame_equal(df, original) | ||
|
||
# using inplace=True, the df should be changed | ||
output = df.astype({'b': 'str', 'd': 'float32'}, inplace=True) | ||
self.assertEqual(output, None) | ||
assert_frame_equal(df, expected) | ||
df.astype({'b': np.float32, 'c': 'float32', 'd': np.float32}, | ||
inplace=True) | ||
self.assertEqual(df.a.dtype, original.a.dtype) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again construct an expected |
||
self.assertEqual(df.b.dtype, 'float32') | ||
self.assertEqual(df.c.dtype, 'float32') | ||
self.assertEqual(df.d.dtype, 'float32') | ||
self.assertEqual(df.b[0], 0.0) | ||
df.astype({'b': str, 'c': 'float64', 'd': np.float64}, inplace=True) | ||
self.assertEqual(df.a.dtype, original.a.dtype) | ||
self.assertEqual(df.b.dtype, 'object') | ||
self.assertEqual(df.c.dtype, 'float64') | ||
self.assertEqual(df.d.dtype, 'float64') | ||
self.assertEqual(df.b[0], '0.0') | ||
|
||
def test_astype_inplace(self): | ||
# GH7271 | ||
df = DataFrame({'a': range(10), | ||
'b': range(2, 12), | ||
'c': np.arange(4.0, 14.0, dtype='float64')}) | ||
df.astype('float', inplace=True) | ||
for col in df.columns: | ||
self.assertTrue(df[col].map(lambda x: type(x) == float).all()) | ||
self.assertEqual(df[col].dtype, 'float64') | ||
df.astype('str', inplace=True) | ||
for col in df.columns: | ||
self.assertTrue(df[col].map(lambda x: type(x) == str).all()) | ||
self.assertEqual(df[col].dtype, 'object') | ||
|
||
def test_timedeltas(self): | ||
df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3, | ||
freq='D')), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -133,6 +133,19 @@ def test_astype_unicode(self): | |
reload(sys) # noqa | ||
sys.setdefaultencoding(former_encoding) | ||
|
||
def test_astype_inplace(self): | ||
s = Series(np.random.randn(5), name='foo') | ||
|
||
for dtype in ['float32', 'float64', 'int64', 'int32']: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
astyped = s.astype(dtype, inplace=False) | ||
self.assertEqual(astyped.dtype, dtype) | ||
self.assertEqual(astyped.name, s.name) | ||
|
||
for dtype in ['float32', 'float64', 'int64', 'int32']: | ||
s.astype(dtype, inplace=True) | ||
self.assertEqual(s.dtype, dtype) | ||
self.assertEqual(s.name, 'foo') | ||
|
||
def test_complexx(self): | ||
# GH4819 | ||
# complex access for ndarray compat | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why are you adding a method her? this should all be done in
generic.py