Skip to content

Commit 904933a

Browse files
committed
FIX: Stata writer no longer ignores encoding when writing strings
The encoding was not used when writing strings. Fixes #7286
1 parent e0bb6f1 commit 904933a

File tree

3 files changed

+14
-3
lines changed

3 files changed

+14
-3
lines changed

doc/source/v0.14.1.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ Bug Fixes
221221
(:issue:`7357`)
222222
- Bug in ``StataReader.data`` where reading a 0-observation dta failed (:issue:`7369`)
223223
- Bug in when reading Stata 13 (117) files containing fixed width strings (:issue:`7360`)
224-
224+
- Bug in when writing Stata files where the encoding was ignored (:issue:`7286`)
225225

226226

227227

pandas/io/stata.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,7 +1328,10 @@ def _write_data_nodates(self):
13281328
var = _pad_bytes('', typ)
13291329
if len(var) < typ:
13301330
var = _pad_bytes(var, typ)
1331-
self._write(var)
1331+
if compat.PY3:
1332+
self._write(var)
1333+
else:
1334+
self._write(var.encode(self._encoding))
13321335
else:
13331336
try:
13341337
self._file.write(struct.pack(byteorder + TYPE_MAP[typ],
@@ -1358,7 +1361,10 @@ def _write_data_dates(self):
13581361
if typ <= 244: # we've got a string
13591362
if len(var) < typ:
13601363
var = _pad_bytes(var, typ)
1361-
self._write(var)
1364+
if compat.PY3:
1365+
self._write(var)
1366+
else:
1367+
self._write(var.encode(self._encoding))
13621368
else:
13631369
self._file.write(struct.pack(byteorder+TYPE_MAP[typ], var))
13641370

pandas/io/tests/test_stata.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,11 @@ def test_encoding(self):
283283
self.assertEqual(result, expected)
284284
self.assertIsInstance(result, unicode)
285285

286+
with tm.ensure_clean() as path:
287+
encoded.to_stata(path,encoding='latin-1', write_index=False)
288+
reread_encoded = read_stata(path, encoding='latin-1')
289+
tm.assert_frame_equal(encoded, reread_encoded)
290+
286291
def test_read_write_dta11(self):
287292
original = DataFrame([(1, 2, 3, 4)],
288293
columns=['good', compat.u('b\u00E4d'), '8number', 'astringwithmorethan32characters______'])

0 commit comments

Comments
 (0)