diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index b221a7df373a4..f4b0024f5d5b8 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -99,7 +99,7 @@ Bug Fixes - +- Fixed bug on bug endian platforms which produced incorrect results in ``StataReader`` (:issue:`8688`). - Bug in ``MultiIndex.has_duplicates`` when having many levels causes an indexer overflow (:issue:`9075`, :issue:`5873`) - Bug in ``pivot`` and `unstack`` where ``nan`` values would break index alignment (:issue:`7466`) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index ccfe8468813c7..0d6e554b8b474 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -833,6 +833,7 @@ def __init__(self, path_or_buf, encoding='iso-8859-1'): self._missing_values = False self._data_read = False self._value_labels_read = False + self._native_byteorder = _set_endianness(sys.byteorder) if isinstance(path_or_buf, str): path_or_buf, encoding = get_filepath_or_buffer( path_or_buf, encoding=self._default_encoding @@ -1195,13 +1196,16 @@ def data(self, convert_dates=True, convert_categoricals=True, index=None, dtype = [] # Convert struct data types to numpy data type for i, typ in enumerate(self.typlist): if typ in self.NUMPY_TYPE_MAP: - dtype.append(('s' + str(i), self.NUMPY_TYPE_MAP[typ])) + dtype.append(('s' + str(i), self.byteorder + self.NUMPY_TYPE_MAP[typ])) else: dtype.append(('s' + str(i), 'S' + str(typ))) dtype = np.dtype(dtype) read_len = count * dtype.itemsize self.path_or_buf.seek(self.data_location) data = np.frombuffer(self.path_or_buf.read(read_len),dtype=dtype,count=count) + # if necessary, swap the byte order to native here + if self.byteorder != self._native_byteorder: + data = data.byteswap().newbyteorder() self._data_read = True if convert_categoricals: