-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
REF: remove last major pytables state-altering #30372
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 2 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
5ce5aa5
REF: create a new table instead of altering old one
jbrockmendel d01606d
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel e1a12fe
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel 09cfa5c
typo fixup
jbrockmendel ce6b445
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel 444e02e
formalize docstring
jbrockmendel File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3145,15 +3145,25 @@ class Table(Fixed): | |
info: Dict | ||
|
||
def __init__( | ||
self, parent: HDFStore, group: "Node", encoding=None, errors: str = "strict" | ||
self, | ||
parent: HDFStore, | ||
group: "Node", | ||
encoding=None, | ||
errors: str = "strict", | ||
index_axes=None, | ||
non_index_axes=None, | ||
values_axes=None, | ||
data_columns=None, | ||
info=None, | ||
nan_rep=None, | ||
): | ||
super().__init__(parent, group, encoding=encoding, errors=errors) | ||
self.index_axes = [] | ||
self.non_index_axes = [] | ||
self.values_axes = [] | ||
self.data_columns = [] | ||
self.info = dict() | ||
self.nan_rep = None | ||
self.index_axes = index_axes or [] | ||
self.non_index_axes = non_index_axes or [] | ||
self.values_axes = values_axes or [] | ||
self.data_columns = data_columns or [] | ||
self.info = info or dict() | ||
self.nan_rep = nan_rep | ||
|
||
@property | ||
def table_type_short(self) -> str: | ||
|
@@ -3635,23 +3645,21 @@ def _create_axes( | |
data_columns=None, | ||
min_itemsize=None, | ||
): | ||
""" create and return the axes | ||
legacy tables create an indexable column, indexable index, | ||
non-indexable fields | ||
|
||
Parameters | ||
---------- | ||
axes: a list of the axes in order to create (names or numbers of | ||
the axes) | ||
obj : the object to create axes on | ||
validate: validate the obj against an existing object already | ||
written | ||
min_itemsize: a dict of the min size for a column in bytes | ||
nan_rep : a values to use for string column nan_rep | ||
encoding : the encoding for string values | ||
data_columns : a list of columns that we want to create separate to | ||
allow indexing (or True will force all columns) | ||
""" | ||
Create and return the axes. | ||
|
||
Parameters | ||
---------- | ||
axes: a list of the axes in order to create (names or numbers of the axes) | ||
obj : DataFrame | ||
The object to create axes on. | ||
validate: bool, default True | ||
Whethr to validate the obj against an existing object already written. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whether |
||
min_itemsize: a dict of the min size for a column in bytes | ||
nan_rep : a values to use for string column nan_rep | ||
encoding : the encoding for string values | ||
data_columns : a list of columns that we want to create separate to | ||
allow indexing (or True will force all columns) | ||
""" | ||
|
||
if not isinstance(obj, DataFrame): | ||
|
@@ -3670,15 +3678,15 @@ def _create_axes( | |
|
||
# do we have an existing table (if so, use its axes & data_columns) | ||
if self.infer_axes(): | ||
existing_table = self.copy() | ||
table_exists = True | ||
axes = [a.axis for a in self.index_axes] | ||
data_columns = self.data_columns | ||
data_columns = list(self.data_columns) | ||
nan_rep = self.nan_rep | ||
new_info = self.info | ||
# TODO: do we always have validate=True here? | ||
else: | ||
existing_table = None | ||
new_info = self.info | ||
table_exists = False | ||
|
||
new_info = self.info | ||
|
||
assert self.ndim == 2 # with next check, we must have len(axes) == 1 | ||
# currently support on ndim-1 axes | ||
|
@@ -3700,9 +3708,9 @@ def _create_axes( | |
a = obj.axes[idx] | ||
# we might be able to change the axes on the appending data if necessary | ||
append_axis = list(a) | ||
if existing_table is not None: | ||
if table_exists: | ||
indexer = len(new_non_index_axes) # i.e. 0 | ||
exist_axis = existing_table.non_index_axes[indexer][1] | ||
exist_axis = self.non_index_axes[indexer][1] | ||
if not array_equivalent(np.array(append_axis), np.array(exist_axis)): | ||
|
||
# ahah! -> reindex | ||
|
@@ -3721,8 +3729,8 @@ def _create_axes( | |
# Now we can construct our new index axis | ||
idx = axes[0] | ||
a = obj.axes[idx] | ||
index_name = obj._AXIS_NAMES[idx] | ||
new_index = _convert_index(index_name, a, self.encoding, self.errors) | ||
axis_name = obj._AXIS_NAMES[idx] | ||
new_index = _convert_index(axis_name, a, self.encoding, self.errors) | ||
new_index.axis = idx | ||
|
||
# Because we are always 2D, there is only one new_index, so | ||
|
@@ -3749,9 +3757,11 @@ def get_blk_items(mgr, blocks): | |
data_columns = self.validate_data_columns( | ||
data_columns, min_itemsize, new_non_index_axes | ||
) | ||
|
||
block_obj = self.get_object(obj, transposed)._consolidate() | ||
|
||
blocks, blk_items = self._get_blocks_and_items( | ||
block_obj, existing_table, new_non_index_axes, data_columns | ||
block_obj, table_exists, new_non_index_axes, self.values_axes, data_columns | ||
) | ||
|
||
# add my values | ||
|
@@ -3772,13 +3782,15 @@ def get_blk_items(mgr, blocks): | |
|
||
# make sure that we match up the existing columns | ||
# if we have an existing table | ||
if existing_table is not None and validate: | ||
existing_col: Optional[DataCol] | ||
|
||
if table_exists and validate: | ||
try: | ||
existing_col = existing_table.values_axes[i] | ||
existing_col = self.values_axes[i] | ||
except (IndexError, KeyError): | ||
raise ValueError( | ||
f"Incompatible appended table [{blocks}]" | ||
f"with existing table [{existing_table.values_axes}]" | ||
f"with existing table [{self.values_axes}]" | ||
) | ||
else: | ||
existing_col = None | ||
|
@@ -3827,22 +3839,34 @@ def get_blk_items(mgr, blocks): | |
|
||
j += 1 | ||
|
||
self.nan_rep = nan_rep | ||
self.data_columns = [col.name for col in vaxes if col.is_data_indexable] | ||
self.values_axes = vaxes | ||
self.index_axes = new_index_axes | ||
self.non_index_axes = new_non_index_axes | ||
dcs = [col.name for col in vaxes if col.is_data_indexable] | ||
|
||
# validate our min_itemsize | ||
self.validate_min_itemsize(min_itemsize) | ||
new_table = type(self)( | ||
parent=self.parent, | ||
group=self.group, | ||
encoding=self.encoding, | ||
errors=self.errors, | ||
index_axes=new_index_axes, | ||
non_index_axes=new_non_index_axes, | ||
values_axes=vaxes, | ||
data_columns=dcs, | ||
info=new_info, | ||
nan_rep=nan_rep, | ||
) | ||
if hasattr(self, "levels"): | ||
# TODO: get this into constructor, only for appropriate subclass | ||
new_table.levels = self.levels | ||
|
||
new_table.validate_min_itemsize(min_itemsize) | ||
|
||
if validate and table_exists: | ||
new_table.validate(self) | ||
|
||
# validate the axes if we have an existing table | ||
if validate: | ||
self.validate(existing_table) | ||
return new_table | ||
|
||
@staticmethod | ||
def _get_blocks_and_items( | ||
block_obj, existing_table, new_non_index_axes, data_columns | ||
block_obj, table_exists, new_non_index_axes, values_axes, data_columns | ||
): | ||
# Helper to clarify non-state-altering parts of _create_axes | ||
|
||
|
@@ -3864,15 +3888,15 @@ def get_blk_items(mgr, blocks): | |
blocks.extend(mgr.blocks) | ||
blk_items.extend(get_blk_items(mgr, mgr.blocks)) | ||
|
||
# reorder the blocks in the same order as the existing_table if we can | ||
if existing_table is not None: | ||
# reorder the blocks in the same order as the existing table if we can | ||
if table_exists: | ||
by_items = { | ||
tuple(b_items.tolist()): (b, b_items) | ||
for b, b_items in zip(blocks, blk_items) | ||
} | ||
new_blocks = [] | ||
new_blk_items = [] | ||
for ea in existing_table.values_axes: | ||
for ea in values_axes: | ||
items = tuple(ea.values) | ||
try: | ||
b, b_items = by_items.pop(items) | ||
|
@@ -4103,7 +4127,7 @@ def write( | |
self._handle.remove_node(self.group, "table") | ||
|
||
# create the axes | ||
self._create_axes( | ||
table = self._create_axes( | ||
axes=axes, | ||
obj=obj, | ||
validate=append, | ||
|
@@ -4112,34 +4136,34 @@ def write( | |
data_columns=data_columns, | ||
) | ||
|
||
for a in self.axes: | ||
for a in table.axes: | ||
a.validate_names() | ||
|
||
if not self.is_exists: | ||
if not table.is_exists: | ||
|
||
# create the table | ||
options = self.create_description( | ||
options = table.create_description( | ||
complib=complib, | ||
complevel=complevel, | ||
fletcher32=fletcher32, | ||
expectedrows=expectedrows, | ||
) | ||
|
||
# set the table attributes | ||
self.set_attrs() | ||
table.set_attrs() | ||
|
||
# create the table | ||
self._handle.create_table(self.group, **options) | ||
table._handle.create_table(table.group, **options) | ||
|
||
# update my info | ||
self.attrs.info = self.info | ||
table.attrs.info = table.info | ||
|
||
# validate the axes and set the kinds | ||
for a in self.axes: | ||
a.validate_and_set(self, append) | ||
for a in table.axes: | ||
a.validate_and_set(table, append) | ||
|
||
# add the rows | ||
self.write_data(chunksize, dropna=dropna) | ||
table.write_data(chunksize, dropna=dropna) | ||
|
||
def write_data(self, chunksize: Optional[int], dropna: bool = False): | ||
""" we form the data into a 2-d including indexes,values,mask | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you edit these to be proper doc-strings
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
formalized it to the extent I could. A lot of these docstrings could use improvement