Revised and rebased

Nick Eubank · Nick Eubank · commit fc7f9c3240e4 · 2015-05-09T15:45:01.000-07:00
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
@@ -268,6 +268,37 @@ API changes
 
 - By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`)
 
+Backwards incompatible API changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- default behavior for HDF write functions with "table" format is now to keep rows that are all missing except for index. (:issue:`9382`)
+
+Previously,
+
+.. code-block:: python
+
+   In [1]: 
+   df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
+   df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
+   
+   pd.read_hdf('file.h5', 'df_with_missing')
+   
+   Out [1]: 
+         col1  col2
+     0     0     1
+     2     2     3
+
+
+New behavior: 
+
+.. ipython:: python
+
+   df_with_missing = pd.DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, 3]})
+   df_with_missing.to_hdf('file.h5', 'df_with_missing', format = 't')
+   
+   pd.read_hdf('file.h5', 'df_with_missing')
+
+
+
 .. _whatsnew_0161.deprecations:
 
 Deprecations
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -220,7 +220,7 @@ class DuplicateWarning(Warning):
 """
 
 with config.config_prefix('io.hdf'):
-    config.register_option('dropna_table', True, dropna_doc,
+    config.register_option('dropna_table', False, dropna_doc,
                            validator=config.is_bool)
     config.register_option(
         'default_format', None, format_doc,
@@ -802,7 +802,7 @@ def put(self, key, value, format=None, append=False, **kwargs):
             This will force Table format, append the input data to the
             existing.
         encoding : default None, provide an encoding for strings
-        dropna   : boolean, default True, do not write an ALL nan row to
+        dropna   : boolean, default False, do not write an ALL nan row to
             the store settable by the option 'io.hdf.dropna_table'
         """
         if format is None:
@@ -884,7 +884,7 @@ def append(self, key, value, format=None, append=True, columns=None,
         chunksize    : size to chunk the writing
         expectedrows : expected TOTAL row size of this table
         encoding     : default None, provide an encoding for strings
-        dropna       : boolean, default True, do not write an ALL nan row to
+        dropna       : boolean, default False, do not write an ALL nan row to
             the store settable by the option 'io.hdf.dropna_table'
         Notes
         -----
@@ -904,7 +904,7 @@ def append(self, key, value, format=None, append=True, columns=None,
                              **kwargs)
 
     def append_to_multiple(self, d, value, selector, data_columns=None,
-                           axes=None, dropna=True, **kwargs):
+                           axes=None, dropna=False, **kwargs):
         """
         Append to multiple tables
 
@@ -919,7 +919,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None,
         data_columns : list of columns to create as data columns, or True to
             use all columns
         dropna : if evaluates to True, drop rows from all tables if any single
-                 row in each table has all NaN
+                 row in each table has all NaN. Default False.
 
         Notes
         -----
@@ -3742,7 +3742,7 @@ class AppendableTable(LegacyTable):
 
     def write(self, obj, axes=None, append=False, complib=None,
               complevel=None, fletcher32=None, min_itemsize=None,
-              chunksize=None, expectedrows=None, dropna=True, **kwargs):
+              chunksize=None, expectedrows=None, dropna=False, **kwargs):
 
         if not append and self.is_exists:
             self._handle.remove_node(self.group, 'table')
@@ -3779,7 +3779,7 @@ def write(self, obj, axes=None, append=False, complib=None,
         # add the rows
         self.write_data(chunksize, dropna=dropna)
 
-    def write_data(self, chunksize, dropna=True):
+    def write_data(self, chunksize, dropna=False):
         """ we form the data into a 2-d including indexes,values,mask
             write chunk-by-chunk """
 
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
@@ -4617,6 +4617,15 @@ def test_preserve_timedeltaindex_type(self):
             store['df'] = df
             assert_frame_equal(store['df'], df)
 
+    def test_all_missing_values(self):
+        # Test corresponding to Issue 9382
+        df_with_missing = DataFrame({'col1':[0, np.nan, 2], 'col2':[1, np.nan, np.nan]})
+
+        with ensure_clean_path(self.path) as path:
+            df_with_missing.to_hdf(path, 'df_with_missing', format = 't')
+            reloaded = read_hdf(path, 'df_with_missing')
+            tm.assert_frame_equal(df_with_missing, reloaded)
+
 
 def _test_sort(obj):
     if isinstance(obj, DataFrame):