diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a0205a8d64cb7..61d79c943c0e4 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -271,6 +271,7 @@ Other API Changes - :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) - The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) - ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) +- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/series.py b/pandas/core/series.py index 71cded4f9c888..4b6e6690eac0a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2881,7 +2881,8 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, def to_csv(self, path=None, index=True, sep=",", na_rep='', float_format=None, header=False, index_label=None, - mode='w', encoding=None, date_format=None, decimal='.'): + mode='w', encoding=None, compression=None, date_format=None, + decimal='.'): """ Write Series to a comma-separated values (csv) file @@ -2908,6 +2909,10 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='', encoding : string, optional a string representing the encoding to use if the contents are non-ascii, for python versions prior to 3 + compression : string, optional + a string representing the compression to use in the output file, + allowed values are 'gzip', 'bz2', 'xz', only used when the first + argument is a filename date_format: string, default None Format string for datetime objects. decimal: string, default '.' @@ -2920,8 +2925,8 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='', result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep, float_format=float_format, header=header, index_label=index_label, mode=mode, - encoding=encoding, date_format=date_format, - decimal=decimal) + encoding=encoding, compression=compression, + date_format=date_format, decimal=decimal) if path is None: return result diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index ad51261a47c5c..99dcc9272bf11 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -14,6 +14,7 @@ from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal, ensure_clean) import pandas.util.testing as tm +import pandas.util._test_decorators as td from .common import TestData @@ -138,6 +139,36 @@ def test_to_csv_path_is_none(self): csv_str = s.to_csv(path=None) assert isinstance(csv_str, str) + @pytest.mark.parametrize('compression', [ + None, + 'gzip', + 'bz2', + pytest.param('xz', marks=td.skip_if_no_lzma), + ]) + def test_to_csv_compression(self, compression): + + s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'], + name='X') + + with ensure_clean() as filename: + + s.to_csv(filename, compression=compression, header=True) + + # test the round trip - to_csv -> read_csv + rs = pd.read_csv(filename, compression=compression, index_col=0, + squeeze=True) + assert_series_equal(s, rs) + + # explicitly ensure file was compressed + f = tm.decompress_file(filename, compression=compression) + text = f.read().decode('utf8') + assert s.name in text + f.close() + + f = tm.decompress_file(filename, compression=compression) + assert_series_equal(s, pd.read_csv(f, index_col=0, squeeze=True)) + f.close() + class TestSeriesIO(TestData): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index cd9ebd3017256..1bea25a16ca1e 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -162,6 +162,41 @@ def round_trip_localpath(writer, reader, path=None): return obj +def decompress_file(path, compression): + """ + Open a compressed file and return a file object + + Parameters + ---------- + path : str + The path where the file is read from + + compression : {'gzip', 'bz2', 'xz', None} + Name of the decompression to use + + Returns + ------- + f : file object + """ + + if compression is None: + f = open(path, 'rb') + elif compression == 'gzip': + import gzip + f = gzip.open(path, 'rb') + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(path, 'rb') + elif compression == 'xz': + lzma = compat.import_lzma() + f = lzma.LZMAFile(path, 'rb') + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) + + return f + + def assert_almost_equal(left, right, check_exact=False, check_dtype='equiv', check_less_precise=False, **kwargs):