|
19 | 19 | import pandas.core.algorithms as algos
|
20 | 20 | import pandas.core.common as com
|
21 | 21 | from pandas.core.common import(_possibly_downcast_to_dtype, isnull,
|
22 |
| - notnull, _DATELIKE_DTYPES) |
| 22 | + notnull, _DATELIKE_DTYPES, is_numeric_dtype, |
| 23 | + is_timedelta64_dtype, is_datetime64_dtype) |
23 | 24 |
|
24 | 25 | import pandas.lib as lib
|
| 26 | +from pandas.lib import Timestamp |
25 | 27 | import pandas.algos as _algos
|
26 | 28 | import pandas.hashtable as _hash
|
27 | 29 |
|
@@ -257,6 +259,16 @@ def indices(self):
|
257 | 259 | """ dict {group name -> group indices} """
|
258 | 260 | return self.grouper.indices
|
259 | 261 |
|
| 262 | + def _get_index(self, name): |
| 263 | + """ safe get index """ |
| 264 | + try: |
| 265 | + return self.indices[name] |
| 266 | + except: |
| 267 | + if isinstance(name, Timestamp): |
| 268 | + name = name.value |
| 269 | + return self.indices[name] |
| 270 | + raise |
| 271 | + |
260 | 272 | @property
|
261 | 273 | def name(self):
|
262 | 274 | if self._selection is None:
|
@@ -350,7 +362,7 @@ def get_group(self, name, obj=None):
|
350 | 362 | if obj is None:
|
351 | 363 | obj = self.obj
|
352 | 364 |
|
353 |
| - inds = self.indices[name] |
| 365 | + inds = self._get_index(name) |
354 | 366 | return obj.take(inds, axis=self.axis, convert=False)
|
355 | 367 |
|
356 | 368 | def __iter__(self):
|
@@ -676,7 +688,7 @@ def _try_cast(self, result, obj):
|
676 | 688 | def _cython_agg_general(self, how, numeric_only=True):
|
677 | 689 | output = {}
|
678 | 690 | for name, obj in self._iterate_slices():
|
679 |
| - is_numeric = _is_numeric_dtype(obj.dtype) |
| 691 | + is_numeric = is_numeric_dtype(obj.dtype) |
680 | 692 | if numeric_only and not is_numeric:
|
681 | 693 | continue
|
682 | 694 |
|
@@ -714,7 +726,7 @@ def _python_agg_general(self, func, *args, **kwargs):
|
714 | 726 |
|
715 | 727 | # since we are masking, make sure that we have a float object
|
716 | 728 | values = result
|
717 |
| - if _is_numeric_dtype(values.dtype): |
| 729 | + if is_numeric_dtype(values.dtype): |
718 | 730 | values = com.ensure_float(values)
|
719 | 731 |
|
720 | 732 | output[name] = self._try_cast(values[mask], result)
|
@@ -1080,7 +1092,7 @@ def aggregate(self, values, how, axis=0):
|
1080 | 1092 | raise NotImplementedError
|
1081 | 1093 | out_shape = (self.ngroups,) + values.shape[1:]
|
1082 | 1094 |
|
1083 |
| - if _is_numeric_dtype(values.dtype): |
| 1095 | + if is_numeric_dtype(values.dtype): |
1084 | 1096 | values = com.ensure_float(values)
|
1085 | 1097 | is_numeric = True
|
1086 | 1098 | else:
|
@@ -1474,6 +1486,15 @@ def __init__(self, index, grouper=None, name=None, level=None,
|
1474 | 1486 | self.grouper = None # Try for sanity
|
1475 | 1487 | raise AssertionError(errmsg)
|
1476 | 1488 |
|
| 1489 | + # if we have a date/time-like grouper, make sure that we have Timestamps like |
| 1490 | + if getattr(self.grouper,'dtype',None) is not None: |
| 1491 | + if is_datetime64_dtype(self.grouper): |
| 1492 | + from pandas import to_datetime |
| 1493 | + self.grouper = to_datetime(self.grouper) |
| 1494 | + elif is_timedelta64_dtype(self.grouper): |
| 1495 | + from pandas import to_timedelta |
| 1496 | + self.grouper = to_timedelta(self.grouper) |
| 1497 | + |
1477 | 1498 | def __repr__(self):
|
1478 | 1499 | return 'Grouping(%s)' % self.name
|
1479 | 1500 |
|
@@ -1821,7 +1842,7 @@ def transform(self, func, *args, **kwargs):
|
1821 | 1842 | # need to do a safe put here, as the dtype may be different
|
1822 | 1843 | # this needs to be an ndarray
|
1823 | 1844 | result = Series(result)
|
1824 |
| - result.iloc[self.indices[name]] = res |
| 1845 | + result.iloc[self._get_index(name)] = res |
1825 | 1846 | result = result.values
|
1826 | 1847 |
|
1827 | 1848 | # downcast if we can (and need)
|
@@ -1860,7 +1881,7 @@ def true_and_notnull(x, *args, **kwargs):
|
1860 | 1881 | return b and notnull(b)
|
1861 | 1882 |
|
1862 | 1883 | try:
|
1863 |
| - indices = [self.indices[name] if true_and_notnull(group) else [] |
| 1884 | + indices = [self._get_index(name) if true_and_notnull(group) else [] |
1864 | 1885 | for name, group in self]
|
1865 | 1886 | except ValueError:
|
1866 | 1887 | raise TypeError("the filter must return a boolean result")
|
@@ -1921,7 +1942,7 @@ def _cython_agg_blocks(self, how, numeric_only=True):
|
1921 | 1942 | for block in data.blocks:
|
1922 | 1943 | values = block.values
|
1923 | 1944 |
|
1924 |
| - is_numeric = _is_numeric_dtype(values.dtype) |
| 1945 | + is_numeric = is_numeric_dtype(values.dtype) |
1925 | 1946 |
|
1926 | 1947 | if numeric_only and not is_numeric:
|
1927 | 1948 | continue
|
@@ -2412,7 +2433,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
|
2412 | 2433 | res = path(group)
|
2413 | 2434 |
|
2414 | 2435 | def add_indices():
|
2415 |
| - indices.append(self.indices[name]) |
| 2436 | + indices.append(self._get_index(name)) |
2416 | 2437 |
|
2417 | 2438 | # interpret the result of the filter
|
2418 | 2439 | if isinstance(res, (bool, np.bool_)):
|
@@ -2973,12 +2994,6 @@ def _reorder_by_uniques(uniques, labels):
|
2973 | 2994 | }
|
2974 | 2995 |
|
2975 | 2996 |
|
2976 |
| -def _is_numeric_dtype(dt): |
2977 |
| - typ = dt.type |
2978 |
| - return (issubclass(typ, (np.number, np.bool_)) |
2979 |
| - and not issubclass(typ, (np.datetime64, np.timedelta64))) |
2980 |
| - |
2981 |
| - |
2982 | 2997 | def _intercept_function(func):
|
2983 | 2998 | return _func_table.get(func, func)
|
2984 | 2999 |
|
|
0 commit comments