diff --git a/doc/source/release.rst b/doc/source/release.rst index a7469ba2e707b..7a271688c318b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -316,6 +316,8 @@ pandas 0.12 - Better error messages on inserting incompatible columns to a frame (:issue:`4107`) - Fixed bug in ``DataFrame.replace`` where a nested dict wasn't being iterated over when regex=False (:issue:`4115`) + - Fixed bug in ``convert_objects(convert_numeric=True)`` where a mixed numeric and + object Series/Frame was not converting properly (:issue:`4119`) pandas 0.11.0 diff --git a/pandas/core/series.py b/pandas/core/series.py index 5ea029b414fef..06abd1d5b4127 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -928,7 +928,7 @@ def astype(self, dtype): return self._constructor(values, index=self.index, name=self.name, dtype=values.dtype) - def convert_objects(self, convert_dates=True, convert_numeric=True, copy=True): + def convert_objects(self, convert_dates=True, convert_numeric=False, copy=True): """ Attempt to infer better dtype diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 270fb01a42033..f4474bfb5f853 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -379,11 +379,14 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, elif util.is_float_object(val): floats[i] = complexes[i] = val seen_float = 1 + elif util.is_integer_object(val): + floats[i] = ints[i] = val + seen_int = 1 elif val is None: floats[i] = complexes[i] = nan seen_float = 1 - elif len(val) == 0: - if convert_empty: + elif hasattr(val,'__len__') and len(val) == 0: + if convert_empty or coerce_numeric: floats[i] = complexes[i] = nan seen_float = 1 else: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index aae15fa6fd09f..3e45b69fb740a 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -5476,6 +5476,12 @@ def test_convert_objects(self): converted = self.mixed_frame.copy() self.assertRaises(Exception, converted['H'].astype, 'int32') + # mixed in a single column + df = DataFrame(dict(s = Series([1, 'na', 3 ,4]))) + result = df.convert_objects(convert_numeric=True) + expected = DataFrame(dict(s = Series([1, np.nan, 3 ,4]))) + assert_frame_equal(result, expected) + def test_convert_objects_no_conversion(self): mixed1 = DataFrame( {'a': [1, 2, 3], 'b': [4.0, 5, 6], 'c': ['x', 'y', 'z']}) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 09f3cc7b61f33..b4ad172ddf340 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -3430,25 +3430,36 @@ def test_convert_objects(self): s = Series([1., 2, 3],index=['a','b','c']) result = s.convert_objects(convert_dates=False,convert_numeric=True) - assert_series_equal(s,result) + assert_series_equal(result, s) # force numeric conversion r = s.copy().astype('O') r['a'] = '1' result = r.convert_objects(convert_dates=False,convert_numeric=True) - assert_series_equal(s,result) + assert_series_equal(result, s) r = s.copy().astype('O') r['a'] = '1.' result = r.convert_objects(convert_dates=False,convert_numeric=True) - assert_series_equal(s,result) + assert_series_equal(result, s) r = s.copy().astype('O') r['a'] = 'garbled' expected = s.copy() expected['a'] = np.nan result = r.convert_objects(convert_dates=False,convert_numeric=True) - assert_series_equal(expected,result) + assert_series_equal(result, expected) + + # GH 4119, not converting a mixed type (e.g.floats and object) + s = Series([1, 'na', 3 ,4]) + result = s.convert_objects(convert_numeric=True) + expected = Series([1,np.nan,3,4]) + assert_series_equal(result, expected) + + s = Series([1, '', 3 ,4]) + result = s.convert_objects(convert_numeric=True) + expected = Series([1,np.nan,3,4]) + assert_series_equal(result, expected) # dates s = Series([datetime(2001,1,1,0,0), datetime(2001,1,2,0,0), datetime(2001,1,3,0,0) ]) @@ -3456,18 +3467,17 @@ def test_convert_objects(self): result = s.convert_objects(convert_dates=True,convert_numeric=False) expected = Series([Timestamp('20010101'),Timestamp('20010102'),Timestamp('20010103')],dtype='M8[ns]') - assert_series_equal(expected,result) + assert_series_equal(result, expected) result = s.convert_objects(convert_dates='coerce',convert_numeric=False) - assert_series_equal(expected,result) result = s.convert_objects(convert_dates='coerce',convert_numeric=True) - assert_series_equal(expected,result) + assert_series_equal(result, expected) expected = Series([Timestamp('20010101'),Timestamp('20010102'),Timestamp('20010103'),lib.NaT,lib.NaT,lib.NaT,Timestamp('20010104'),Timestamp('20010105')],dtype='M8[ns]') result = s2.convert_objects(convert_dates='coerce',convert_numeric=False) - assert_series_equal(expected,result) + assert_series_equal(result, expected) result = s2.convert_objects(convert_dates='coerce',convert_numeric=True) - assert_series_equal(expected,result) + assert_series_equal(result, expected) # preserver all-nans (if convert_dates='coerce') s = Series(['foo','bar',1,1.0],dtype='O')