diff --git a/doc/source/release.rst b/doc/source/release.rst index 4d628fac78cf0..0ef2c29af8139 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -277,6 +277,7 @@ API Changes - ``numexpr`` 2.2.2 fixes incompatiblity in PyTables 2.4 (:issue:`4908`) - ``flush`` now accepts an ``fsync`` parameter, which defaults to ``False`` (:issue:`5364`) + - ``unicode`` indices not supported on ``table`` formats (:issue:`5386`) - ``JSON`` - added ``date_unit`` parameter to specify resolution of timestamps. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5919589978903..97dc8dcdec73a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1919,6 +1919,10 @@ def set_version(self): def pandas_type(self): return _ensure_decoded(getattr(self.group._v_attrs, 'pandas_type', None)) + @property + def format_type(self): + return 'fixed' + def __unicode__(self): """ return a pretty representation of myself """ self.infer_axes() @@ -2146,7 +2150,8 @@ def write_index(self, key, index): self.write_sparse_intindex(key, index) else: setattr(self.attrs, '%s_variety' % key, 'regular') - converted = _convert_index(index, self.encoding).set_name('index') + converted = _convert_index(index, self.encoding, + self.format_type).set_name('index') self.write_array(key, converted.values) node = getattr(self.group, key) node._v_attrs.kind = converted.kind @@ -2192,7 +2197,8 @@ def write_multi_index(self, key, index): index.names)): # write the level level_key = '%s_level%d' % (key, i) - conv_level = _convert_index(lev, self.encoding).set_name(level_key) + conv_level = _convert_index(lev, self.encoding, + self.format_type).set_name(level_key) self.write_array(level_key, conv_level.values) node = getattr(self.group, level_key) node._v_attrs.kind = conv_level.kind @@ -2609,6 +2615,10 @@ def __init__(self, *args, **kwargs): def table_type_short(self): return self.table_type.split('_')[0] + @property + def format_type(self): + return 'table' + def __unicode__(self): """ return a pretty representatgion of myself """ self.infer_axes() @@ -2991,7 +3001,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, if i in axes: name = obj._AXIS_NAMES[i] index_axes_map[i] = _convert_index( - a, self.encoding).set_name(name).set_axis(i) + a, self.encoding, self.format_type).set_name(name).set_axis(i) else: # we might be able to change the axes on the appending data if @@ -3823,7 +3833,7 @@ def _get_info(info, name): idx = info[name] = dict() return idx -def _convert_index(index, encoding=None): +def _convert_index(index, encoding=None, format_type=None): index_name = getattr(index, 'name', None) if isinstance(index, DatetimeIndex): @@ -3870,9 +3880,13 @@ def _convert_index(index, encoding=None): converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize, index_name=index_name) elif inferred_type == 'unicode': - atom = _tables().ObjectAtom() - return IndexCol(np.asarray(values, dtype='O'), 'object', atom, - index_name=index_name) + if format_type == 'fixed': + atom = _tables().ObjectAtom() + return IndexCol(np.asarray(values, dtype='O'), 'object', atom, + index_name=index_name) + raise TypeError( + "[unicode] is not supported as a in index type for [{0}] formats".format(format_type)) + elif inferred_type == 'integer': # take a guess for now, hope the values fit atom = _tables().Int64Col() diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index a08073bd7bd35..598f374e0fcf7 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -755,6 +755,38 @@ def test_append_series(self): store.append('mi', s) tm.assert_series_equal(store['mi'], s) + def test_store_index_types(self): + # GH5386 + # test storing various index types + + with ensure_clean(self.path) as store: + + def check(format,index): + df = DataFrame(np.random.randn(10,2),columns=list('AB')) + df.index = index(len(df)) + + _maybe_remove(store, 'df') + store.put('df',df,format=format) + assert_frame_equal(df,store['df']) + + for index in [ tm.makeFloatIndex, tm.makeStringIndex, tm.makeIntIndex, + tm.makeDateIndex, tm.makePeriodIndex ]: + + check('table',index) + check('fixed',index) + + # unicode + index = tm.makeUnicodeIndex + if compat.PY3: + check('table',index) + check('fixed',index) + else: + + # only support for fixed types (and they have a perf warning) + self.assertRaises(TypeError, check, 'table', index) + with tm.assert_produces_warning(expected_warning=PerformanceWarning): + check('fixed',index) + def test_encoding(self): if sys.byteorder != 'little': diff --git a/pandas/util/testing.py b/pandas/util/testing.py index f40a8e1a5a9d6..2e4d1f3e8df74 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -336,7 +336,8 @@ def ensure_clean(filename=None, return_filelike=False): yield filename finally: try: - os.remove(filename) + if os.path.exists(filename): + os.remove(filename) except Exception as e: print(e)