Skip to content

Reading binary file handle (mode "rb") doesn't work with read_fwf #18035

Closed
@prcastro

Description

@prcastro

Code Sample, a copy-pastable example if possible

with open("file.txt", "rb") as fh:
    data = pd.read_fwf(fh)

File ("file.txt"):

aas aas aas
bba bab b a

Problem description

Raises:

TypeError                                 Traceback (most recent call last)
<ipython-input-12-524e61d2241f> in <module>()
      1 with open("file.txt", "rb") as fh:
----> 2     data = pd.read_fwf(fh)

/disk1/home/_/.Anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in read_fwf(filepath_or_buffer, colspecs, widths, **kwds)
    685     kwds['colspecs'] = colspecs
    686     kwds['engine'] = 'python-fwf'
--> 687     return _read(filepath_or_buffer, kwds)
    688 
    689 

/disk1/home/_/.Anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    403 
    404     # Create the parser.
--> 405     parser = TextFileReader(filepath_or_buffer, **kwds)
    406 
    407     if chunksize or iterator:

/disk1/home/_/.Anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    762             self.options['has_index_names'] = kwds['has_index_names']
    763 
--> 764         self._make_engine(self.engine)
    765 
    766     def close(self):

/disk1/home/_/.Anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
    993                                  ' "c", "python", or' ' "python-fwf")'.format(
    994                                      engine=engine))
--> 995             self._engine = klass(self.f, **self.options)
    996 
    997     def _failover_to_python(self):

/disk1/home/_/.Anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, **kwds)
   3313         # Support iterators, convert to a list.
   3314         self.colspecs = kwds.pop('colspecs')
-> 3315         PythonParser.__init__(self, f, **kwds)
   3316 
   3317     def _make_reader(self, f):

/disk1/home/_/.Anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, **kwds)
   1988         # Set self.data to something that can read lines.
   1989         if hasattr(f, 'readline'):
-> 1990             self._make_reader(f)
   1991         else:
   1992             self.data = f

/disk1/home/_/.Anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in _make_reader(self, f)
   3317     def _make_reader(self, f):
   3318         self.data = FixedWidthReader(f, self.colspecs, self.delimiter,
-> 3319                                      self.comment, self.skiprows)

/disk1/home/_/.Anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, colspecs, delimiter, comment, skiprows)
   3217         self.comment = comment
   3218         if colspecs == 'infer':
-> 3219             self.colspecs = self.detect_colspecs(skiprows=skiprows)
   3220         else:
   3221             self.colspecs = colspecs

/disk1/home/_/.Anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in detect_colspecs(self, n, skiprows)
   3282             rows = [row.partition(self.comment)[0] for row in rows]
   3283         for row in rows:
-> 3284             for m in pattern.finditer(row):
   3285                 mask[m.start():m.end()] = 1
   3286         shifted = np.roll(mask, 1)

TypeError: cannot use a string pattern on a bytes-like object

Expected Output

The data is loaded correctly

Output of pd.show_versions()

INSTALLED VERSIONS ------------------ commit: None python: 3.6.2.final.0 python-bits: 64 OS: Linux OS-release: 3.10.0-693.1.1.el7.x86_64 machine: x86_64 processor: x86_64 byteorder: little LC_ALL: None LANG: en_US.UTF-8 LOCALE: en_US.UTF-8

pandas: 0.20.3
pytest: 3.2.1
pip: 9.0.1
setuptools: 27.2.0
Cython: 0.26
numpy: 1.12.1
scipy: 0.19.1
xarray: None
IPython: 6.1.0
sphinx: 1.6.3
patsy: 0.4.1
dateutil: 2.6.1
pytz: 2017.2
blosc: None
bottleneck: 1.2.1
tables: 3.4.2
numexpr: 2.6.2
feather: None
matplotlib: 2.1.0
openpyxl: 2.4.8
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: 0.9.8
lxml: 3.8.0
bs4: 4.6.0
html5lib: 0.9999999
sqlalchemy: 1.1.13
pymysql: None
psycopg2: 2.7.3 (dt dec pq3 ext lo64)
jinja2: 2.9.6
s3fs: None
pandas_gbq: None
pandas_datareader: None

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions