Skip to content

REF: Standardize coercion in set_data #30084

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 5, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 25 additions & 21 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2270,15 +2270,25 @@ def __eq__(self, other: Any) -> bool:
for a in ["name", "cname", "dtype", "pos"]
)

def set_data(self, data, dtype=None):
def set_data(self, data: Union[np.ndarray, ABCExtensionArray]):
assert data is not None

if is_categorical_dtype(data.dtype):
data = data.codes

# For datetime64tz we need to drop the TZ in tests TODO: why?
dtype_name = data.dtype.name.split("[")[0]

if data.dtype.kind in ["m", "M"]:
data = np.asarray(data.view("i8"))
# TODO: we used to reshape for the dt64tz case, but no longer
# doing that doesnt seem to break anything. why?

self.data = data
if data is not None:
if dtype is not None:
self.dtype = dtype
self.set_kind()
elif self.dtype is None:
self.dtype = data.dtype.name
self.set_kind()

if self.dtype is None:
self.dtype = dtype_name
self.set_kind()

def take_data(self):
""" return the data & release the memory """
Expand Down Expand Up @@ -2363,12 +2373,12 @@ def set_atom_complex(self, block):
self.kind = block.dtype.name
itemsize = int(self.kind.split("complex")[-1]) // 8
self.typ = _tables().ComplexCol(itemsize=itemsize, shape=block.shape[0])
self.set_data(block.values.astype(self.typ.type, copy=False))
self.set_data(block.values)

def set_atom_data(self, block):
self.kind = block.dtype.name
self.typ = self.get_atom_data(block)
self.set_data(block.values.astype(self.typ.type, copy=False))
self.set_data(block.values)

def set_atom_categorical(self, block):
# currently only supports a 1-D categorical
Expand All @@ -2384,7 +2394,7 @@ def set_atom_categorical(self, block):
# write the codes; must be in a block shape
self.ordered = values.ordered
self.typ = self.get_atom_data(block, kind=codes.dtype.name)
self.set_data(codes)
self.set_data(block.values)

# write the categories
self.meta = "category"
Expand All @@ -2396,31 +2406,24 @@ def get_atom_datetime64(self, block):
def set_atom_datetime64(self, block):
self.kind = "datetime64"
self.typ = self.get_atom_datetime64(block)
values = block.values.view("i8")
self.set_data(values, "datetime64")
self.set_data(block.values)

def set_atom_datetime64tz(self, block):

values = block.values

# convert this column to i8 in UTC, and save the tz
values = values.asi8.reshape(block.shape)

# store a converted timezone
self.tz = _get_tz(block.values.tz)

self.kind = "datetime64"
self.typ = self.get_atom_datetime64(block)
self.set_data(values, "datetime64")
self.set_data(block.values)

def get_atom_timedelta64(self, block):
return _tables().Int64Col(shape=block.shape[0])

def set_atom_timedelta64(self, block):
self.kind = "timedelta64"
self.typ = self.get_atom_timedelta64(block)
values = block.values.view("i8")
self.set_data(values, "timedelta64")
self.set_data(block.values)

@property
def shape(self):
Expand Down Expand Up @@ -2454,6 +2457,7 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
if values.dtype.fields is not None:
values = values[self.cname]

# NB: unlike in the other calls to set_data, self.dtype may not be None here
self.set_data(values)

# use the meta if needed
Expand Down