Skip to content

Commit 588a8f4

Browse files
committed
API: Add compression argument to Series.to_csv
1 parent 8347ff8 commit 588a8f4

File tree

3 files changed

+74
-6
lines changed

3 files changed

+74
-6
lines changed

doc/source/whatsnew/v0.23.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ Other API Changes
271271
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
272272
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
273273
- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)
274+
- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`)
274275

275276
.. _whatsnew_0230.deprecations:
276277

pandas/core/series.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2881,7 +2881,8 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
28812881

28822882
def to_csv(self, path=None, index=True, sep=",", na_rep='',
28832883
float_format=None, header=False, index_label=None,
2884-
mode='w', encoding=None, date_format=None, decimal='.'):
2884+
mode='w', encoding=None, compression=None, date_format=None,
2885+
decimal='.'):
28852886
"""
28862887
Write Series to a comma-separated values (csv) file
28872888
@@ -2908,6 +2909,10 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
29082909
encoding : string, optional
29092910
a string representing the encoding to use if the contents are
29102911
non-ascii, for python versions prior to 3
2912+
compression : string, optional
2913+
a string representing the compression to use in the output file,
2914+
allowed values are 'gzip', 'bz2', 'xz', only used when the first
2915+
argument is a filename
29112916
date_format: string, default None
29122917
Format string for datetime objects.
29132918
decimal: string, default '.'
@@ -2920,10 +2925,8 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
29202925
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
29212926
float_format=float_format, header=header,
29222927
index_label=index_label, mode=mode,
2923-
encoding=encoding, date_format=date_format,
2924-
decimal=decimal)
2925-
if path is None:
2926-
return result
2928+
encoding=encoding, compression=compression,
2929+
date_format=date_format, decimal=decimal)
29272930

29282931
@Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs)
29292932
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',

pandas/tests/series/test_io.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88
import numpy as np
99
import pandas as pd
1010

11-
from pandas import Series, DataFrame
11+
from pandas import Series, DataFrame, compat
1212

1313
from pandas.compat import StringIO, u
1414
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
1515
assert_frame_equal, ensure_clean)
1616
import pandas.util.testing as tm
17+
import pandas.util._test_decorators as td
1718

1819
from .common import TestData
1920

@@ -138,6 +139,69 @@ def test_to_csv_path_is_none(self):
138139
csv_str = s.to_csv(path=None)
139140
assert isinstance(csv_str, str)
140141

142+
def test_to_csv_compression_gzip(self):
143+
144+
s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
145+
name='X')
146+
147+
with ensure_clean() as filename:
148+
149+
s.to_csv(filename, compression="gzip", header=True)
150+
151+
# test the round trip - to_csv -> read_csv
152+
rs = pd.read_csv(filename, compression="gzip", index_col=0,
153+
squeeze=True)
154+
assert_series_equal(s, rs)
155+
156+
# explicitly make sure file is gziped
157+
import gzip
158+
f = gzip.open(filename, 'rb')
159+
text = f.read().decode('utf8')
160+
f.close()
161+
assert s.name in text
162+
163+
def test_to_csv_compression_bz2(self):
164+
165+
s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
166+
name='X')
167+
168+
with ensure_clean() as filename:
169+
170+
s.to_csv(filename, compression="bz2", header=True)
171+
172+
# test the round trip - to_csv -> read_csv
173+
rs = pd.read_csv(filename, compression="bz2", index_col=0,
174+
squeeze=True)
175+
assert_series_equal(s, rs)
176+
177+
# explicitly make sure file is bz2ed
178+
import bz2
179+
f = bz2.BZ2File(filename, 'rb')
180+
text = f.read().decode('utf8')
181+
f.close()
182+
assert s.name in text
183+
184+
@td.skip_if_no_lzma
185+
def test_to_csv_compression_xz(self):
186+
187+
s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
188+
name='X')
189+
190+
with ensure_clean() as filename:
191+
192+
s.to_csv(filename, compression="xz", header=True)
193+
194+
# test the round trip - to_csv -> read_csv
195+
rs = pd.read_csv(filename, compression="xz", index_col=0,
196+
squeeze=True)
197+
assert_series_equal(s, rs)
198+
199+
# explicitly make sure file is xzipped
200+
lzma = compat.import_lzma()
201+
f = lzma.open(filename, 'rb')
202+
assert_series_equal(s, pd.read_csv(f, index_col=0, squeeze=True))
203+
f.close()
204+
141205

142206
class TestSeriesIO(TestData):
143207

0 commit comments

Comments
 (0)