diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 128ddbd4a9ec3..eb58f46f0f3fe 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -186,7 +186,7 @@ Bug Fixes ~~~~~~~~~ - Bug in ``Series.astype("unicode")`` not calling ``unicode`` on the values correctly (:issue:`7758`) - +- Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a461dd0e247f2..17bef8dd28cf4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3539,6 +3539,7 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True): except Exception: pass + dtype = object if self._is_mixed_type else None if axis == 0: series_gen = (self.icol(i) for i in range(len(self.columns))) res_index = self.columns @@ -3547,7 +3548,7 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True): res_index = self.index res_columns = self.columns values = self.values - series_gen = (Series.from_array(arr, index=res_columns, name=name) + series_gen = (Series.from_array(arr, index=res_columns, name=name, dtype=dtype) for i, (arr, name) in enumerate(zip(values, res_index))) else: # pragma : no cover diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 2bd318ec2430f..f649baeb16278 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -25,7 +25,7 @@ from pandas.util.decorators import cache_readonly from pandas.tslib import Timestamp -from pandas import compat +from pandas import compat, _np_version_under1p7 from pandas.compat import range, map, zip, u from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type @@ -1290,6 +1290,16 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs): return rvalues.tolist() + def get_values(self, dtype=None): + # return object dtypes as datetime.timedeltas + if dtype == object: + if _np_version_under1p7: + return self.values.astype('object') + return lib.map_infer(self.values.ravel(), + lambda x: timedelta(microseconds=x.item()/1000) + ).reshape(self.values.shape) + return self.values + class BoolBlock(NumericBlock): __slots__ = () is_bool = True @@ -2595,7 +2605,7 @@ def as_matrix(self, items=None): else: mgr = self - if self._is_single_block: + if self._is_single_block or not self.is_mixed_type: return mgr.blocks[0].get_values() else: return mgr._interleave() @@ -3647,9 +3657,11 @@ def _lcd_dtype(l): has_non_numeric = have_dt64 or have_td64 or have_cat if (have_object or - (have_bool and have_numeric) or + (have_bool and (have_numeric or have_dt64 or have_td64)) or (have_numeric and has_non_numeric) or - have_cat): + have_cat or + have_dt64 or + have_td64): return np.dtype(object) elif have_bool: return np.dtype(bool) @@ -3670,10 +3682,6 @@ def _lcd_dtype(l): return np.dtype('int%s' % (lcd.itemsize * 8 * 2)) return lcd - elif have_dt64 and not have_float and not have_complex: - return np.dtype('M8[ns]') - elif have_td64 and not have_float and not have_complex: - return np.dtype('m8[ns]') elif have_complex: return np.dtype('c16') else: diff --git a/pandas/core/series.py b/pandas/core/series.py index eff558d875c4a..9abc8f22009b3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -237,14 +237,14 @@ def __init__(self, data=None, index=None, dtype=None, name=None, self._set_axis(0, index, fastpath=True) @classmethod - def from_array(cls, arr, index=None, name=None, copy=False, + def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, fastpath=False): # return a sparse series here if isinstance(arr, ABCSparseArray): from pandas.sparse.series import SparseSeries cls = SparseSeries - return cls(arr, index=index, name=name, copy=copy, fastpath=fastpath) + return cls(arr, index=index, name=name, dtype=dtype, copy=copy, fastpath=fastpath) @property def _constructor(self): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 2e1bbc88e36ff..df00edc46eed2 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -9635,6 +9635,15 @@ def test_apply(self): [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) self.assertRaises(ValueError, df.apply, lambda x: x, 2) + def test_apply_mixed_datetimelike(self): + tm._skip_if_not_numpy17_friendly() + + # mixed datetimelike + # GH 7778 + df = DataFrame({ 'A' : date_range('20130101',periods=3), 'B' : pd.to_timedelta(np.arange(3),unit='s') }) + result = df.apply(lambda x: x, axis=1) + assert_frame_equal(result, df) + def test_apply_empty(self): # empty applied = self.empty.apply(np.sqrt) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 8a9010084fd99..36dbced6eda8c 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -3,7 +3,7 @@ import nose import numpy as np -from pandas import Index, MultiIndex, DataFrame, Series +from pandas import Index, MultiIndex, DataFrame, Series, Categorical from pandas.compat import OrderedDict, lrange from pandas.sparse.array import SparseArray from pandas.core.internals import * @@ -41,9 +41,11 @@ def create_block(typestr, placement, item_shape=None, num_offset=0): * complex, c16, c8 * bool * object, string, O - * datetime, dt + * datetime, dt, M8[ns] + * timedelta, td, m8[ns] * sparse (SparseArray with fill_value=0.0) * sparse_na (SparseArray with fill_value=np.nan) + * category, category2 """ placement = BlockPlacement(placement) @@ -67,8 +69,14 @@ def create_block(typestr, placement, item_shape=None, num_offset=0): shape) elif typestr in ('bool'): values = np.ones(shape, dtype=np.bool_) - elif typestr in ('datetime', 'dt'): + elif typestr in ('datetime', 'dt', 'M8[ns]'): values = (mat * 1e9).astype('M8[ns]') + elif typestr in ('timedelta', 'td', 'm8[ns]'): + values = (mat * 1).astype('m8[ns]') + elif typestr in ('category'): + values = Categorical([1,1,2,2,3,3,3,3,4,4]) + elif typestr in ('category2'): + values = Categorical(['a','a','a','a','b','b','c','c','c','d']) elif typestr in ('sparse', 'sparse_na'): # FIXME: doesn't support num_rows != 10 assert shape[-1] == 10 @@ -556,7 +564,54 @@ def _compare(old_mgr, new_mgr): self.assertEqual(new_mgr.get('h').dtype, np.float16) def test_interleave(self): - pass + + + # self + for dtype in ['f8','i8','object','bool','complex','M8[ns]','m8[ns]']: + mgr = create_mgr('a: {0}'.format(dtype)) + self.assertEqual(mgr.as_matrix().dtype,dtype) + mgr = create_mgr('a: {0}; b: {0}'.format(dtype)) + self.assertEqual(mgr.as_matrix().dtype,dtype) + + # will be converted according the actual dtype of the underlying + mgr = create_mgr('a: category') + self.assertEqual(mgr.as_matrix().dtype,'i8') + mgr = create_mgr('a: category; b: category') + self.assertEqual(mgr.as_matrix().dtype,'i8'), + mgr = create_mgr('a: category; b: category2') + self.assertEqual(mgr.as_matrix().dtype,'object') + mgr = create_mgr('a: category2') + self.assertEqual(mgr.as_matrix().dtype,'object') + mgr = create_mgr('a: category2; b: category2') + self.assertEqual(mgr.as_matrix().dtype,'object') + + # combinations + mgr = create_mgr('a: f8') + self.assertEqual(mgr.as_matrix().dtype,'f8') + mgr = create_mgr('a: f8; b: i8') + self.assertEqual(mgr.as_matrix().dtype,'f8') + mgr = create_mgr('a: f4; b: i8') + self.assertEqual(mgr.as_matrix().dtype,'f4') + mgr = create_mgr('a: f4; b: i8; d: object') + self.assertEqual(mgr.as_matrix().dtype,'object') + mgr = create_mgr('a: bool; b: i8') + self.assertEqual(mgr.as_matrix().dtype,'object') + mgr = create_mgr('a: complex') + self.assertEqual(mgr.as_matrix().dtype,'complex') + mgr = create_mgr('a: f8; b: category') + self.assertEqual(mgr.as_matrix().dtype,'object') + mgr = create_mgr('a: M8[ns]; b: category') + self.assertEqual(mgr.as_matrix().dtype,'object') + mgr = create_mgr('a: M8[ns]; b: bool') + self.assertEqual(mgr.as_matrix().dtype,'object') + mgr = create_mgr('a: M8[ns]; b: i8') + self.assertEqual(mgr.as_matrix().dtype,'object') + mgr = create_mgr('a: m8[ns]; b: bool') + self.assertEqual(mgr.as_matrix().dtype,'object') + mgr = create_mgr('a: m8[ns]; b: i8') + self.assertEqual(mgr.as_matrix().dtype,'object') + mgr = create_mgr('a: M8[ns]; b: m8[ns]') + self.assertEqual(mgr.as_matrix().dtype,'object') def test_interleave_non_unique_cols(self): df = DataFrame([