Skip to content

Commit 0532ba3

Browse files
committed
Merge pull request #8264 from jreback/mi
BUG: Bug in inference in a MultiIndex with datetime.date inputs (GH7888)
2 parents 8b4bf16 + eff52e0 commit 0532ba3

File tree

4 files changed

+34
-13
lines changed

4 files changed

+34
-13
lines changed

doc/source/v0.15.0.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -676,8 +676,8 @@ Enhancements
676676

677677

678678

679-
- ``tz_localize`` now accepts the ``ambiguous`` keyword which allows for passing an array of bools
680-
indicating whether the date belongs in DST or not, 'NaT' for setting transition times to NaT,
679+
- ``tz_localize`` now accepts the ``ambiguous`` keyword which allows for passing an array of bools
680+
indicating whether the date belongs in DST or not, 'NaT' for setting transition times to NaT,
681681
'infer' for inferring DST/non-DST, and 'raise' (default) for an AmbiguousTimeError to be raised (:issue:`7943`).
682682
See :ref:`the docs<timeseries.timezone_ambiguous>` for more details.
683683

@@ -756,7 +756,7 @@ Bug Fixes
756756
- Bug in HDFStore iteration when passing a where (:issue:`8014`)
757757
- Bug in DataFrameGroupby.transform when transforming with a passed non-sorted key (:issue:`8046`)
758758
- Bug in repeated timeseries line and area plot may result in ``ValueError`` or incorrect kind (:issue:`7733`)
759-
759+
- Bug in inference in a MultiIndex with ``datetime.date`` inputs (:issue:`7888`)
760760

761761
- Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may reset nanosecond (:issue:`7697`)
762762
- Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may raise ``AttributeError`` if ``Timestamp`` has ``dateutil`` tzinfo (:issue:`7697`)

pandas/core/categorical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def __init__(self, values, levels=None, ordered=None, name=None, fastpath=False,
232232
# which is fine, but since factorize does this correctly no need here
233233
# this is an issue because _sanitize_array also coerces np.nan to a string
234234
# under certain versions of numpy as well
235-
values = com._possibly_infer_to_datetimelike(values)
235+
values = com._possibly_infer_to_datetimelike(values, convert_dates=True)
236236
if not isinstance(values, np.ndarray):
237237
values = _convert_to_list_like(values)
238238
from pandas.core.series import _sanitize_array

pandas/core/common.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1961,15 +1961,24 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):
19611961
return value
19621962

19631963

1964-
def _possibly_infer_to_datetimelike(value):
1965-
# we might have a array (or single object) that is datetime like,
1966-
# and no dtype is passed don't change the value unless we find a
1967-
# datetime/timedelta set
1964+
def _possibly_infer_to_datetimelike(value, convert_dates=False):
1965+
"""
1966+
we might have a array (or single object) that is datetime like,
1967+
and no dtype is passed don't change the value unless we find a
1968+
datetime/timedelta set
1969+
1970+
this is pretty strict in that a datetime/timedelta is REQUIRED
1971+
in addition to possible nulls/string likes
1972+
1973+
ONLY strings are NOT datetimelike
19681974
1969-
# this is pretty strict in that a datetime/timedelta is REQUIRED
1970-
# in addition to possible nulls/string likes
1975+
Parameters
1976+
----------
1977+
convert_dates : boolean, default False
1978+
if True try really hard to convert dates (such as datetime.date), other
1979+
leave inferred dtype 'date' alone
19711980
1972-
# ONLY strings are NOT datetimelike
1981+
"""
19731982

19741983
v = value
19751984
if not is_list_like(v):
@@ -2011,7 +2020,7 @@ def _try_timedelta(v):
20112020
sample = v[:min(3,len(v))]
20122021
inferred_type = lib.infer_dtype(sample)
20132022

2014-
if inferred_type in ['datetime', 'datetime64']:
2023+
if inferred_type in ['datetime', 'datetime64'] or (convert_dates and inferred_type in ['date']):
20152024
value = _try_datetime(v)
20162025
elif inferred_type in ['timedelta', 'timedelta64']:
20172026
value = _try_timedelta(v)

pandas/tests/test_multilevel.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
# pylint: disable-msg=W0612,E1101,W0141
22
import datetime
3+
import itertools
34
import nose
45

56
from numpy.random import randn
67
import numpy as np
78

89
from pandas.core.index import Index, MultiIndex
9-
from pandas import Panel, DataFrame, Series, notnull, isnull
10+
from pandas import Panel, DataFrame, Series, notnull, isnull, Timestamp
1011

1112
from pandas.util.testing import (assert_almost_equal,
1213
assert_series_equal,
@@ -2066,6 +2067,17 @@ def test_datetimeindex(self):
20662067
self.assertTrue(idx.levels[0].equals(expected1))
20672068
self.assertTrue(idx.levels[1].equals(idx2))
20682069

2070+
# from datetime combos
2071+
# GH 7888
2072+
date1 = datetime.date.today()
2073+
date2 = datetime.datetime.today()
2074+
date3 = Timestamp.today()
2075+
2076+
for d1, d2 in itertools.product([date1,date2,date3],[date1,date2,date3]):
2077+
index = pd.MultiIndex.from_product([[d1],[d2]])
2078+
self.assertIsInstance(index.levels[0],pd.DatetimeIndex)
2079+
self.assertIsInstance(index.levels[1],pd.DatetimeIndex)
2080+
20692081
def test_set_index_datetime(self):
20702082
# GH 3950
20712083
df = pd.DataFrame({'label':['a', 'a', 'a', 'b', 'b', 'b'],

0 commit comments

Comments
 (0)