From 1477e0587f52f95c065f0145e29a004bf4a2a439 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 5 Dec 2019 09:14:07 -0800 Subject: [PATCH] REF: do string itemsize casting earlier --- pandas/io/pytables.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1d24e15c504f2..3f0c7b93e428c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2313,7 +2313,7 @@ def set_kind(self): if self.typ is None: self.typ = getattr(self.description, self.cname, None) - def set_atom(self, block, itemsize: int, data_converted, use_str: bool): + def set_atom(self, block, data_converted, use_str: bool): """ create and setup my atom from the block b """ # short-cut certain block types @@ -2329,7 +2329,7 @@ def set_atom(self, block, itemsize: int, data_converted, use_str: bool): self.set_atom_complex(block) elif use_str: - self.set_atom_string(itemsize, data_converted) + self.set_atom_string(data_converted) else: # set as a data block self.set_atom_data(block) @@ -2337,11 +2337,12 @@ def set_atom(self, block, itemsize: int, data_converted, use_str: bool): def get_atom_string(self, shape, itemsize): return _tables().StringCol(itemsize=itemsize, shape=shape[0]) - def set_atom_string(self, itemsize: int, data_converted: np.ndarray): + def set_atom_string(self, data_converted: np.ndarray): + itemsize = data_converted.dtype.itemsize self.itemsize = itemsize self.kind = "string" self.typ = self.get_atom_string(data_converted.shape, itemsize) - self.set_data(data_converted.astype(f"|S{itemsize}", copy=False)) + self.set_data(data_converted) def get_atom_coltype(self, kind=None): """ return the PyTables column class for this column """ @@ -3896,7 +3897,7 @@ def get_blk_items(mgr, blocks): existing_col = None new_name = name or f"values_block_{i}" - itemsize, data_converted, use_str = _maybe_convert_for_string_atom( + data_converted, use_str = _maybe_convert_for_string_atom( new_name, b, existing_col=existing_col, @@ -3908,12 +3909,7 @@ def get_blk_items(mgr, blocks): col = klass.create_for_block(i=i, name=new_name, version=self.version) col.values = list(b_items) - col.set_atom( - block=b, - itemsize=itemsize, - data_converted=data_converted, - use_str=use_str, - ) + col.set_atom(block=b, data_converted=data_converted, use_str=use_str) col.update_info(self.info) col.set_pos(j) @@ -4785,7 +4781,7 @@ def _maybe_convert_for_string_atom( use_str = False if not block.is_object: - return block.dtype.itemsize, block.values, use_str + return block.values, use_str dtype_name = block.dtype.name inferred_type = lib.infer_dtype(block.values, skipna=False) @@ -4800,7 +4796,7 @@ def _maybe_convert_for_string_atom( ) elif not (inferred_type == "string" or dtype_name == "object"): - return block.dtype.itemsize, block.values, use_str + return block.values, use_str use_str = True @@ -4844,7 +4840,8 @@ def _maybe_convert_for_string_atom( if eci > itemsize: itemsize = eci - return itemsize, data_converted, use_str + data_converted = data_converted.astype(f"|S{itemsize}", copy=False) + return data_converted, use_str def _convert_string_array(data, encoding, errors, itemsize=None):