Closed
Description
I'm encountering a bug when I query for a multiindex dataframe with a timezoned DatetimeIndex in one of the multiindex levels.
This only happens
- for a multiindex with one of the levels as timestamps with timezones (As seen in [1]). If timestamps have no timezone set, there is no issue (As seen in [2])
- if the query returns no rows
- in pandas 0.17.* This was working fine in pandas 0.16.*
In [1]: periods = 10
...: dts = pd.date_range('20151201', periods=periods, freq='D', tz='UTC') #WITH TIMEZONE
...: mi = pd.MultiIndex.from_arrays([dts, range(periods)], names = ['DATE', 'NO'])
...: df = pd.DataFrame({'MYCOL':0}, index=mi)
...: file_path = 'table.h5'
...: key = 'mykey'
...: with pd.HDFStore(file_path, 'w') as store:
...: store.append(key, df, format='table', append=True)
...: dfres = store.select(key, where="""DATE > '20151220'""")
...: print(dfres)
...:
...:
Traceback (most recent call last):
File "<ipython-input-1-e0b7db50fd4d>", line 9, in <module>
dfres = store.select(key, where="""DATE > '20151220'""")
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/io/pytables.py", line 669, in select
return it.get_result()
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/io/pytables.py", line 1352, in get_result
results = self.func(self.start, self.stop, where)
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/io/pytables.py", line 662, in func
columns=columns, **kwargs)
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/io/pytables.py", line 4170, in read
df = super(AppendableMultiFrameTable, self).read(**kwargs)
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/io/pytables.py", line 4029, in read
df = concat(frames, axis=1, verify_integrity=False).consolidate()
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/tools/merge.py", line 813, in concat
return op.get_result()
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/tools/merge.py", line 995, in get_result
mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy)
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/core/internals.py", line 4456, in concatenate_block_managers
for placement, join_units in concat_plan]
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/core/internals.py", line 4456, in <listcomp>
for placement, join_units in concat_plan]
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/core/internals.py", line 4553, in concatenate_join_units
for ju in join_units]
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/core/internals.py", line 4553, in <listcomp>
for ju in join_units]
File "/export/data/anaconda/anaconda3.2.4/lib/python3.5/site-packages/pandas/core/internals.py", line 4801, in get_reindexed_values
missing_arr = np.empty(self.shape, dtype=empty_dtype)
TypeError: data type not understood
In [2]: periods = 10
...: dts = pd.date_range('20151201', periods=periods, freq='D') #WITHOUT TIMEZONE
...: mi = pd.MultiIndex.from_arrays([dts, range(periods)], names = ['DATE', 'NO'])
...: df = pd.DataFrame({'MYCOL':0}, index=mi)
...: file_path = 'table.h5'
...: key = 'mykey'
...: with pd.HDFStore(file_path, 'w') as store:
...: store.append(key, df, format='table', append=True)
...: dfres = store.select(key, where="""DATE > '20151220'""")
...: print(dfres)
...:
...:
Empty DataFrame
Columns: [MYCOL]
Index: []
In [3]: pd.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 3.5.1.final.0
python-bits: 64
OS: Linux
OS-release: 2.6.32-431.11.2.el6.x86_64
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
pandas: 0.17.1
nose: 1.3.7
pip: 7.1.2
setuptools: 19.1.1
Cython: 0.23.4
numpy: 1.10.2
scipy: 0.16.1
statsmodels: None
IPython: 4.0.1
sphinx: 1.3.1
patsy: 0.4.0
dateutil: 2.4.2
pytz: 2015.7
blosc: None
bottleneck: 1.0.0
tables: 3.2.2
numexpr: 2.4.4
matplotlib: 1.5.0
openpyxl: 2.2.6
xlrd: 0.9.4
xlwt: 1.0.0
xlsxwriter: 0.7.7
lxml: 3.5.0
bs4: 4.4.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 1.0.10
pymysql: None
psycopg2: None
Jinja2: None