Skip to content

Commit fc7f9c3

Browse files
author
Nick Eubank
committed
Revised and rebased
1 parent 3c0e66d commit fc7f9c3

File tree

3 files changed

+47
-7
lines changed

3 files changed

+47
-7
lines changed

doc/source/whatsnew/v0.16.1.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,37 @@ API changes
268268

269269
- By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`)
270270

271+
Backwards incompatible API changes
272+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
273+
- default behavior for HDF write functions with "table" format is now to keep rows that are all missing except for index. (:issue:`9382`)
274+
275+
Previously,
276+
277+
.. code-block:: python
278+
279+
In [1]:
280+
df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
281+
df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
282+
283+
pd.read_hdf('file.h5', 'df_with_missing')
284+
285+
Out [1]:
286+
col1 col2
287+
0 0 1
288+
2 2 3
289+
290+
291+
New behavior:
292+
293+
.. ipython:: python
294+
295+
df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
296+
df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
297+
298+
pd.read_hdf('file.h5', 'df_with_missing')
299+
300+
301+
271302
.. _whatsnew_0161.deprecations:
272303

273304
Deprecations

pandas/io/pytables.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ class DuplicateWarning(Warning):
220220
"""
221221

222222
with config.config_prefix('io.hdf'):
223-
config.register_option('dropna_table', True, dropna_doc,
223+
config.register_option('dropna_table', False, dropna_doc,
224224
validator=config.is_bool)
225225
config.register_option(
226226
'default_format', None, format_doc,
@@ -802,7 +802,7 @@ def put(self, key, value, format=None, append=False, **kwargs):
802802
This will force Table format, append the input data to the
803803
existing.
804804
encoding : default None, provide an encoding for strings
805-
dropna : boolean, default True, do not write an ALL nan row to
805+
dropna : boolean, default False, do not write an ALL nan row to
806806
the store settable by the option 'io.hdf.dropna_table'
807807
"""
808808
if format is None:
@@ -884,7 +884,7 @@ def append(self, key, value, format=None, append=True, columns=None,
884884
chunksize : size to chunk the writing
885885
expectedrows : expected TOTAL row size of this table
886886
encoding : default None, provide an encoding for strings
887-
dropna : boolean, default True, do not write an ALL nan row to
887+
dropna : boolean, default False, do not write an ALL nan row to
888888
the store settable by the option 'io.hdf.dropna_table'
889889
Notes
890890
-----
@@ -904,7 +904,7 @@ def append(self, key, value, format=None, append=True, columns=None,
904904
**kwargs)
905905

906906
def append_to_multiple(self, d, value, selector, data_columns=None,
907-
axes=None, dropna=True, **kwargs):
907+
axes=None, dropna=False, **kwargs):
908908
"""
909909
Append to multiple tables
910910
@@ -919,7 +919,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None,
919919
data_columns : list of columns to create as data columns, or True to
920920
use all columns
921921
dropna : if evaluates to True, drop rows from all tables if any single
922-
row in each table has all NaN
922+
row in each table has all NaN. Default False.
923923
924924
Notes
925925
-----
@@ -3742,7 +3742,7 @@ class AppendableTable(LegacyTable):
37423742

37433743
def write(self, obj, axes=None, append=False, complib=None,
37443744
complevel=None, fletcher32=None, min_itemsize=None,
3745-
chunksize=None, expectedrows=None, dropna=True, **kwargs):
3745+
chunksize=None, expectedrows=None, dropna=False, **kwargs):
37463746

37473747
if not append and self.is_exists:
37483748
self._handle.remove_node(self.group, 'table')
@@ -3779,7 +3779,7 @@ def write(self, obj, axes=None, append=False, complib=None,
37793779
# add the rows
37803780
self.write_data(chunksize, dropna=dropna)
37813781

3782-
def write_data(self, chunksize, dropna=True):
3782+
def write_data(self, chunksize, dropna=False):
37833783
""" we form the data into a 2-d including indexes,values,mask
37843784
write chunk-by-chunk """
37853785

pandas/io/tests/test_pytables.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4617,6 +4617,15 @@ def test_preserve_timedeltaindex_type(self):
46174617
store['df'] = df
46184618
assert_frame_equal(store['df'], df)
46194619

4620+
def test_all_missing_values(self):
4621+
# Test corresponding to Issue 9382
4622+
df_with_missing = DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, np.nan]})
4623+
4624+
with ensure_clean_path(self.path) as path:
4625+
df_with_missing.to_hdf(path, 'df_with_missing', format = 't')
4626+
reloaded = read_hdf(path, 'df_with_missing')
4627+
tm.assert_frame_equal(df_with_missing, reloaded)
4628+
46204629

46214630
def _test_sort(obj):
46224631
if isinstance(obj, DataFrame):

0 commit comments

Comments
 (0)