Description
Setting values on datetime index with DST changes
start_date = pd.Timestamp('2017-10-27 00:00:00+0200', tz='Europe/Madrid')
end_date = pd.Timestamp('2017-10-30 00:00:00+0100', tz='Europe/Madrid')
df = pd.DataFrame(index=pd.date_range(start_date, end_date, closed='left', freq="10T"), columns=['value'])
# set value for non existing index (date is 2016), raises exception
df.loc[pd.Timestamp('2016-10-10 03:00:00', tz='Europe/Madrid'), 'value'] = 12
Exception raised from `df.loc`
AmbiguousTimeError Traceback (most recent call last) in () ----> 1 df.loc[pd.Timestamp('2016-10-10 03:00:00', tz='Europe/Madrid'), 'value'] = 12~/Envs/jupyter/lib/python3.6/site-packages/pandas/core/indexing.py in setitem(self, key, value)
192 key = com._apply_if_callable(key, self.obj)
193 indexer = self._get_setitem_indexer(key)
--> 194 self._setitem_with_indexer(indexer, value)
195
196 def _has_valid_type(self, k, axis):
~/Envs/jupyter/lib/python3.6/site-packages/pandas/core/indexing.py in _setitem_with_indexer(self, indexer, value)
368 # so the object is the same
369 index = self.obj._get_axis(i)
--> 370 labels = index.insert(len(index), key)
371 self.obj._data = self.obj.reindex(labels, axis=i)._data
372 self.obj._maybe_update_cacher(clear=True)
~/Envs/jupyter/lib/python3.6/site-packages/pandas/core/indexes/datetimes.py in insert(self, loc, item)
1734 new_dates = libts.tz_convert(new_dates, 'UTC', self.tz)
1735 return DatetimeIndex(new_dates, name=self.name, freq=freq,
-> 1736 tz=self.tz)
1737
1738 except (AttributeError, TypeError):
~/Envs/jupyter/lib/python3.6/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
116 else:
117 kwargs[new_arg_name] = new_arg_value
--> 118 return func(*args, **kwargs)
119 return wrapper
120 return _deprecate_kwarg
~/Envs/jupyter/lib/python3.6/site-packages/pandas/core/indexes/datetimes.py in new(cls, data, freq, start, end, periods, copy, name, tz, verify_integrity, normalize, closed, ambiguous, dtype, **kwargs)
388 ints = subarr.view('i8')
389 subarr = libts.tz_localize_to_utc(ints, tz,
--> 390 ambiguous=ambiguous)
391 subarr = subarr.view(_NS_DTYPE)
392
pandas/_libs/tslib.pyx in pandas._libs.tslib.tz_localize_to_utc()
AmbiguousTimeError: Cannot infer dst time from Timestamp('2017-10-29 02:00:00'), try using the 'ambiguous' argument
Setting values on datetime index with no DST changes
start_date = pd.Timestamp('2017-09-27 00:00:00+0200', tz='Europe/Madrid')
end_date = pd.Timestamp('2017-09-30 00:00:00+0100', tz='Europe/Madrid')
df = pd.DataFrame(index=pd.date_range(start_date, end_date, closed='left', freq="10T"), columns=['value'])
df.index
# set value for non existing index creates new row
df.loc[pd.Timestamp('2016-10-10 03:00:00', tz='Europe/Madrid'), 'value'] = 12
Problem description
There seems to be conflicting behaviors when setting values on non-existing index (a datetime that's not already in index) with .loc
.
(First code sample) When the datetime index range has DST changes .loc[index, field] = value
raises an AmbiguousTimeError.
(Second code sample) When the datetime index range does not have DST changes .loc[index, field] = value
creates a new row as normal
Output of pd.show_versions()
[paste the output of pd.show_versions()
here below this line]
INSTALLED VERSIONS
commit: None
python: 3.6.3.final.0
python-bits: 64
OS: Darwin
OS-release: 17.2.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: en_US.UTF-8
LANG: en_US.utf8
LOCALE: en_US.UTF-8
pandas: 0.21.0
pytest: None
pip: 9.0.1
setuptools: 36.7.2
Cython: None
numpy: 1.13.3
scipy: None
pyarrow: None
xarray: None
IPython: 6.2.1
sphinx: None
patsy: None
dateutil: 2.6.1
pytz: 2017.3
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: None
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: 1.0b10
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None