Skip to content

Commit e0bb6f1

Browse files
committed
FIX: Enable fixed width strings to be read from Stata 13 (117) files
Fixes a bug which prevented files containing fixed width string data from being read. Stata 13 files also allow variable length strings, which are not supported in the current version, and an explicit exception regarding this type is now given. Added tests which cover these cases, and Stata 13 format files. fixes #7360
1 parent 8d209de commit e0bb6f1

File tree

5 files changed

+13
-2
lines changed

5 files changed

+13
-2
lines changed

doc/source/v0.14.1.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ Bug Fixes
220220
- Bug where ``nanops._has_infs`` doesn't work with many dtypes
221221
(:issue:`7357`)
222222
- Bug in ``StataReader.data`` where reading a 0-observation dta failed (:issue:`7369`)
223+
- Bug in when reading Stata 13 (117) files containing fixed width strings (:issue:`7360`)
224+
223225

224226

225227

pandas/io/stata.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -512,8 +512,10 @@ def _read_header(self):
512512
try:
513513
i = 0
514514
for typ in typlist:
515-
if typ <= 2045 or typ == 32768:
516-
self.typlist[i] = None
515+
if typ <= 2045:
516+
self.typlist[i] = typ
517+
elif typ == 32768:
518+
raise ValueError("Long strings are not supported")
517519
else:
518520
self.typlist[i] = self.TYPE_MAP_XML[typ]
519521
i += 1

pandas/io/tests/data/stata5_117.dta

5.24 KB
Binary file not shown.

pandas/io/tests/data/stata6_117.dta

3.41 KB
Binary file not shown.

pandas/io/tests/test_stata.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,13 @@ def setUp(self):
6060
self.dta14_113 = os.path.join(self.dirpath, 'stata5_113.dta')
6161
self.dta14_114 = os.path.join(self.dirpath, 'stata5_114.dta')
6262
self.dta14_115 = os.path.join(self.dirpath, 'stata5_115.dta')
63+
self.dta14_117 = os.path.join(self.dirpath, 'stata5_117.dta')
6364

6465
self.csv15 = os.path.join(self.dirpath, 'stata6.csv')
6566
self.dta15_113 = os.path.join(self.dirpath, 'stata6_113.dta')
6667
self.dta15_114 = os.path.join(self.dirpath, 'stata6_114.dta')
6768
self.dta15_115 = os.path.join(self.dirpath, 'stata6_115.dta')
69+
self.dta15_117 = os.path.join(self.dirpath, 'stata6_117.dta')
6870

6971
def read_dta(self, file):
7072
return read_stata(file, convert_dates=True)
@@ -354,9 +356,12 @@ def test_read_write_reread_dta14(self):
354356
parsed_114.index.name = 'index'
355357
parsed_115 = self.read_dta(self.dta14_115)
356358
parsed_115.index.name = 'index'
359+
parsed_117 = self.read_dta(self.dta14_117)
360+
parsed_117.index.name = 'index'
357361

358362
tm.assert_frame_equal(parsed_114, parsed_113)
359363
tm.assert_frame_equal(parsed_114, parsed_115)
364+
tm.assert_frame_equal(parsed_114, parsed_117)
360365

361366
with tm.ensure_clean() as path:
362367
parsed_114.to_stata(path, {'date_td': 'td'})
@@ -375,10 +380,12 @@ def test_read_write_reread_dta15(self):
375380
parsed_113 = self.read_dta(self.dta15_113)
376381
parsed_114 = self.read_dta(self.dta15_114)
377382
parsed_115 = self.read_dta(self.dta15_115)
383+
parsed_117 = self.read_dta(self.dta15_117)
378384

379385
tm.assert_frame_equal(expected, parsed_114)
380386
tm.assert_frame_equal(parsed_113, parsed_114)
381387
tm.assert_frame_equal(parsed_114, parsed_115)
388+
tm.assert_frame_equal(parsed_114, parsed_117)
382389

383390
def test_timestamp_and_label(self):
384391
original = DataFrame([(1,)], columns=['var'])

0 commit comments

Comments
 (0)