pandas-dev · jreback · May 29, 2018 · May 20, 2018 · May 25, 2018 · May 25, 2018
diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt
@@ -83,6 +83,7 @@ Indexing
 I/O
 ^^^
 
+- Bug in :class:`pandas.io.common.BytesZipFile` where zip compression produces uncompressed zip archive (:issue:`17778`)
 - Bug in :meth:`DataFrame.to_stata` which prevented exporting DataFrames to buffers and most file-like objects (:issue:`21041`)
 -
 

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -5,7 +5,7 @@
 import codecs
 import mmap
 from contextlib import contextmanager, closing
-from zipfile import ZipFile
+import zipfile
 
 from pandas.compat import StringIO, BytesIO, string_types, text_type
 from pandas import compat
@@ -428,7 +428,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
     return f, handles
 
 
-class BytesZipFile(ZipFile, BytesIO):
+class BytesZipFile(zipfile.ZipFile, BytesIO):
     """
     Wrapper for standard library class ZipFile and allow the returned file-like
     handle to accept byte strings via `write` method.
@@ -437,10 +437,10 @@ class BytesZipFile(ZipFile, BytesIO):
     bytes strings into a member of the archive.
     """
     # GH 17778
-    def __init__(self, file, mode='r', **kwargs):
+    def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
         if mode in ['wb', 'rb']:
             mode = mode.replace('b', '')
-        super(BytesZipFile, self).__init__(file, mode, **kwargs)
+        super(BytesZipFile, self).__init__(file, mode, compression, **kwargs)
 
     def write(self, data):
         super(BytesZipFile, self).writestr(self.filename, data)

diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -943,6 +943,22 @@ def test_to_csv_compression(self, compression):
             with tm.decompress_file(filename, compression) as fh:
                 assert_frame_equal(df, read_csv(fh, index_col=0))
 
+    def test_to_csv_compression_size(self, compression):
+
+        df = pd.concat(100 * [DataFrame([[0.123456, 0.234567, 0.567567],
+                                         [12.32112, 123123.2, 321321.2]],
+                                        columns=['X', 'Y', 'Z'])])
+
+        with ensure_clean() as filename:
+            import os
+            df.to_csv(filename, compression=compression)
+            file_size = os.path.getsize(filename)
+
+            if compression:
+                df.to_csv(filename, compression=None)
+                uncompressed_file_size = os.path.getsize(filename)
+                assert uncompressed_file_size > file_size
+
     def test_to_csv_date_format(self):
         with ensure_clean('__tmp_to_csv_date_format__') as path:
             dt_index = self.tsframe.index

diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
@@ -21,6 +21,24 @@ def test_compression_roundtrip(compression):
         assert_frame_equal(df, pd.read_json(result))
 
 
+def test_to_json_compression_size(compression):
+
+    df = pd.concat(100 * [pd.DataFrame([[0.123456, 0.234567, 0.567567],
+                                        [12.32112, 123123.2, 321321.2]],
+                                       columns=['X', 'Y', 'Z'])],
+                   ignore_index=True)
+
+    with tm.ensure_clean() as filename:
+        import os
+        df.to_json(filename, compression=compression)
+        file_size = os.path.getsize(filename)
+
+        if compression:
+            df.to_json(filename, compression=None)
+            uncompressed_file_size = os.path.getsize(filename)
+            assert uncompressed_file_size > file_size
+
+
 def test_read_zipped_json():
     uncompressed_path = tm.get_data_path("tsframe_v012.json")
     uncompressed_df = pd.read_json(uncompressed_path)

diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
@@ -457,6 +457,21 @@ def test_read_infer(self, ext, get_random_path):
 
             tm.assert_frame_equal(df, df2)
 
+    def test_compression_size(self, compression):
+
+        df = pd.concat(100 * [pd.DataFrame([[0.123456, 0.234567, 0.567567],
+                                            [12.32112, 123123.2, 321321.2]],
+                                           columns=['X', 'Y', 'Z'])])
+
+        with tm.ensure_clean() as filename:
+            df.to_pickle(filename, compression=compression)
+            file_size = os.path.getsize(filename)
+
+            if compression:
+                df.to_pickle(filename, compression=None)
+                uncompressed_file_size = os.path.getsize(filename)
+                assert uncompressed_file_size > file_size
+
 
 # ---------------------
 # test pickle compression

diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
@@ -161,6 +161,20 @@ def test_to_csv_compression(self, compression):
                                                    index_col=0,
                                                    squeeze=True))
 
+    def test_to_csv_compression_size(self, compression):
+
+        s = Series(100 * [0.123456, 0.234567, 0.567567], name='X')
+
+        with ensure_clean() as filename:
+            import os
+            s.to_csv(filename, compression=compression, header=True)
+            file_size = os.path.getsize(filename)
+
+            if compression:
+                s.to_csv(filename, compression=None, header=True)
+                uncompressed_file_size = os.path.getsize(filename)
+                assert uncompressed_file_size > file_size
+
 
 class TestSeriesIO(TestData):
-Original file line number
+Diff line change
@@ Expand Up / @@ -83,6 +83,7 @@ Indexing @@
     I/O
     ^^^
+    - Bug in :class:`pandas.io.common.BytesZipFile` where zip compression produces uncompressed zip archive (:issue:`17778`)
     - Bug in :meth:`DataFrame.to_stata` which prevented exporting DataFrames to buffers and most file-like objects (:issue:`21041`)
     -
@@ Expand Down @@