API: Add compression argument to Series.to_csv

reidy-p · reidy-p · commit 588a8f4455f0 · 2018-01-13T00:04:45.000Z
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -271,6 +271,7 @@ Other API Changes
 - :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
 - The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
 - ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)
+- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`)
 
 .. _whatsnew_0230.deprecations:
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2881,7 +2881,8 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
 
     def to_csv(self, path=None, index=True, sep=",", na_rep='',
                float_format=None, header=False, index_label=None,
-               mode='w', encoding=None, date_format=None, decimal='.'):
+               mode='w', encoding=None, compression=None, date_format=None,
+               decimal='.'):
         """
         Write Series to a comma-separated values (csv) file
 
@@ -2908,6 +2909,10 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
         encoding : string, optional
             a string representing the encoding to use if the contents are
             non-ascii, for python versions prior to 3
+        compression : string, optional
+            a string representing the compression to use in the output file,
+            allowed values are 'gzip', 'bz2', 'xz', only used when the first 
+            argument is a filename
         date_format: string, default None
             Format string for datetime objects.
         decimal: string, default '.'
@@ -2920,10 +2925,8 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
         result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
                            float_format=float_format, header=header,
                            index_label=index_label, mode=mode,
-                           encoding=encoding, date_format=date_format,
-                           decimal=decimal)
-        if path is None:
-            return result
+                           encoding=encoding, compression=compression,
+                           date_format=date_format, decimal=decimal)
 
     @Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs)
     def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
@@ -8,12 +8,13 @@
 import numpy as np
 import pandas as pd
 
-from pandas import Series, DataFrame
+from pandas import Series, DataFrame, compat
 
 from pandas.compat import StringIO, u
 from pandas.util.testing import (assert_series_equal, assert_almost_equal,
                                  assert_frame_equal, ensure_clean)
 import pandas.util.testing as tm
+import pandas.util._test_decorators as td
 
 from .common import TestData
 
@@ -138,6 +139,69 @@ def test_to_csv_path_is_none(self):
         csv_str = s.to_csv(path=None)
         assert isinstance(csv_str, str)
 
+    def test_to_csv_compression_gzip(self):
+
+        s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
+                   name='X')
+
+        with ensure_clean() as filename:
+
+            s.to_csv(filename, compression="gzip", header=True)
+
+            # test the round trip - to_csv -> read_csv
+            rs = pd.read_csv(filename, compression="gzip", index_col=0,
+                             squeeze=True)
+            assert_series_equal(s, rs)
+
+            # explicitly make sure file is gziped
+            import gzip
+            f = gzip.open(filename, 'rb')
+            text = f.read().decode('utf8')
+            f.close()
+            assert s.name in text
+
+    def test_to_csv_compression_bz2(self):
+
+        s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
+                   name='X')
+
+        with ensure_clean() as filename:
+
+            s.to_csv(filename, compression="bz2", header=True)
+
+            # test the round trip - to_csv -> read_csv
+            rs = pd.read_csv(filename, compression="bz2", index_col=0,
+                             squeeze=True)
+            assert_series_equal(s, rs)
+
+            # explicitly make sure file is bz2ed
+            import bz2
+            f = bz2.BZ2File(filename, 'rb')
+            text = f.read().decode('utf8')
+            f.close()
+            assert s.name in text
+
+    @td.skip_if_no_lzma
+    def test_to_csv_compression_xz(self):
+
+        s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
+                   name='X')
+
+        with ensure_clean() as filename:
+
+            s.to_csv(filename, compression="xz", header=True)
+
+            # test the round trip - to_csv -> read_csv
+            rs = pd.read_csv(filename, compression="xz", index_col=0,
+                             squeeze=True)
+            assert_series_equal(s, rs)
+
+            # explicitly make sure file is xzipped
+            lzma = compat.import_lzma()
+            f = lzma.open(filename, 'rb')
+            assert_series_equal(s, pd.read_csv(f, index_col=0, squeeze=True))
+            f.close()
+
 
 class TestSeriesIO(TestData):