Skip to content

Commit 60f49a1

Browse files
committed
BUG: Ensure incomplete stata files are deleted
Attempt to delete failed writes and warn if not able to delete
1 parent 2b5058e commit 60f49a1

File tree

3 files changed

+29
-2
lines changed

3 files changed

+29
-2
lines changed

doc/source/whatsnew/v0.24.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1576,6 +1576,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
15761576
- :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`)
15771577
- Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`)
15781578
- Bug in :func:`pandas.io.json.json_normalize` that caused it to raise ``TypeError`` when two consecutive elements of ``record_path`` are dicts (:issue:`22706`)
1579+
- Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`)
15791580
- Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`)
15801581

15811582
Plotting

pandas/io/stata.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from collections import OrderedDict
1414
import datetime
15+
import os
1516
import struct
1617
import sys
1718
import warnings
@@ -503,6 +504,10 @@ class InvalidColumnName(Warning):
503504
"""
504505

505506

507+
class IOWarning(Warning):
508+
pass
509+
510+
506511
def _cast_to_stata_types(data):
507512
"""Checks the dtypes of the columns of a pandas DataFrame for
508513
compatibility with the data types and ranges supported by Stata, and
@@ -2209,7 +2214,17 @@ def write_file(self):
22092214
self._write_value_labels()
22102215
self._write_file_close_tag()
22112216
self._write_map()
2212-
finally:
2217+
except Exception as exc:
2218+
self._close()
2219+
try:
2220+
if self._own_file:
2221+
os.unlink(self._fname)
2222+
except Exception:
2223+
warnings.warn('This save was not successful but {0} could not '
2224+
'be deleted. This file is not '
2225+
'valid.'.format(self._fname), IOWarning)
2226+
raise exc
2227+
else:
22132228
self._close()
22142229

22152230
def _close(self):

pandas/tests/io/test_stata.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
from pandas.core.frame import DataFrame, Series
2222
from pandas.io.parsers import read_csv
2323
from pandas.io.stata import (InvalidColumnName, PossiblePrecisionLoss,
24-
StataMissingValue, StataReader, read_stata)
24+
StataMissingValue, StataReader, read_stata,
25+
IOWarning)
2526

2627

2728
@pytest.fixture
@@ -1547,6 +1548,16 @@ def test_all_none_exception(self, version):
15471548
assert 'Only string-like' in excinfo.value.args[0]
15481549
assert 'Column `none`' in excinfo.value.args[0]
15491550

1551+
@pytest.mark.parametrize('version', [114, 117])
1552+
def test_invalid_file_not_written(self, version):
1553+
content = 'Here is one __�__ Another one __·__ Another one __½__'
1554+
df = DataFrame([content], columns=['invalid'])
1555+
expected_exc = UnicodeEncodeError if PY3 else UnicodeDecodeError
1556+
with tm.ensure_clean() as path:
1557+
with pytest.raises(expected_exc):
1558+
with tm.assert_produces_warning(IOWarning):
1559+
df.to_stata(path)
1560+
15501561
def test_strl_latin1(self):
15511562
# GH 23573, correct GSO data to reflect correct size
15521563
output = DataFrame([[u'pandas'] * 2, [u'þâÑÐŧ'] * 2],

0 commit comments

Comments
 (0)