Skip to content

Commit ffc977e

Browse files
takluyverwesm
authored andcommitted
BUG: Use stdlib csv module to write csv, so commas in data are escaped
correctly.
1 parent 56e35a9 commit ffc977e

File tree

3 files changed

+22
-15
lines changed

3 files changed

+22
-15
lines changed

pandas/core/frame.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# pylint: disable=W0212,W0231,W0703,W0622
1414

1515
from StringIO import StringIO
16+
import csv
1617
import operator
1718
import warnings
1819

@@ -485,13 +486,13 @@ def to_csv(self, path, nanRep='', cols=None, header=True,
485486
mode : Python write mode, default 'wb'
486487
"""
487488
f = open(path, mode)
489+
csvout = csv.writer(f)
488490

489491
if cols is None:
490492
cols = self.columns
491493

492494
series = self._series
493495
if header:
494-
joined_cols = ','.join([str(c) for c in cols])
495496
if index:
496497
# should write something for index label
497498
if index_label is None:
@@ -509,31 +510,26 @@ def to_csv(self, path, nanRep='', cols=None, header=True,
509510
elif not isinstance(index_label, (list, tuple, np.ndarray)):
510511
# given a string for a DF with Index
511512
index_label = [index_label]
512-
f.write('%s,%s' % (",".join(index_label), joined_cols))
513+
csvout.writerow(list(index_label) + list(cols))
513514
else:
514-
f.write(joined_cols)
515-
f.write('\n')
515+
csvout.writerow(cols)
516516

517517
nlevels = getattr(self.index, 'nlevels', 1)
518518
for idx in self.index:
519+
row_fields = []
519520
if index:
520521
if nlevels == 1:
521-
f.write(str(idx))
522+
row_fields = [idx]
522523
else: # handle MultiIndex
523-
f.write(",".join([str(i) for i in idx]))
524+
row_fields = list(idx)
524525
for i, col in enumerate(cols):
525526
val = series[col].get(idx)
526527
if isnull(val):
527528
val = nanRep
528-
else:
529-
val = str(val)
530529

531-
if i > 0 or index:
532-
f.write(',%s' % val)
533-
else:
534-
f.write('%s' % val)
530+
row_fields.append(val)
535531

536-
f.write('\n')
532+
csvout.writerow(row_fields)
537533

538534
f.close()
539535

pandas/core/series.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# pylint: disable=E1101,E1103
66
# pylint: disable=W0703,W0622,W0613,W0201
77

8+
import csv
89
import itertools
910
import operator
1011
import sys
@@ -1589,8 +1590,8 @@ def to_csv(self, path):
15891590
Output filepath. If None, write to stdout
15901591
"""
15911592
f = open(path, 'wb')
1592-
for idx, value in self.iteritems():
1593-
f.write(str(idx) + ',' + str(value) + '\n')
1593+
csvout = csv.writer(f)
1594+
csvout.writerows(self.iteritems())
15941595
f.close()
15951596

15961597
def dropna(self):

pandas/tests/test_frame.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1582,6 +1582,16 @@ def test_to_csv_float32_nanrep(self):
15821582
lines = open(pth).readlines()
15831583
self.assert_(lines[1].split(',')[2] == '999')
15841584
os.remove(pth)
1585+
1586+
def test_to_csv_withcommas(self):
1587+
"Commas inside fields should be correctly escaped when saving as CSV."
1588+
path = '__tmp__'
1589+
df = DataFrame({'A':[1,2,3], 'B':['5,6','7,8','9,0']})
1590+
df.to_csv(path)
1591+
df2 = DataFrame.from_csv(path)
1592+
assert_frame_equal(df2, df)
1593+
1594+
os.remove(path)
15851595

15861596
def test_info(self):
15871597
io = StringIO()

0 commit comments

Comments
 (0)