From 8633d23f14fd2df4beec40e338c1bfea3a75eb2b Mon Sep 17 00:00:00 2001
From: Jeffrey Tratner <jeffrey.tratner@gmail.com>
Date: Sun, 8 Sep 2013 23:11:08 -0400
Subject: [PATCH 1/2] BUG: Fix read_fwf with compressed files.

`gzip` and `bz2` both now return `bytes` rather than `str` in Python 3,
so need to check for bytes and decode as necessary.
---
 doc/source/release.rst          |  2 ++
 pandas/io/parsers.py            | 19 ++++++++++++++-----
 pandas/io/tests/test_parsers.py | 25 +++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index f32ea44ed6242..53c50100072f9 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -369,6 +369,8 @@ Bug Fixes
   - Bug in ``iloc`` with a slice index failing (:issue:`4771`)
   - Incorrect error message with no colspecs or width in ``read_fwf``. (:issue:`4774`)
   - Fix bugs in indexing in a Series with a duplicate index (:issue:`4548`, :issue:`4550`)
+  - Fixed bug with reading compressed files with ``read_fwf`` in Python 3.
+    (:issue:`3963`)
 
 pandas 0.12.0
 -------------
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index f05b0a676cde4..5ca0a498d1e07 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1937,11 +1937,20 @@ def __init__(self, f, colspecs, filler, thousands=None):
                        isinstance(colspec[1], int) ):
                 raise AssertionError()
 
-    def next(self):
-        line = next(self.f)
-        # Note: 'colspecs' is a sequence of half-open intervals.
-        return [line[fromm:to].strip(self.filler or ' ')
-                for (fromm, to) in self.colspecs]
+    if compat.PY3:
+        def next(self):
+            line = next(self.f)
+            if isinstance(line, bytes):
+                line = line.decode('utf-8')
+            # Note: 'colspecs' is a sequence of half-open intervals.
+            return [line[fromm:to].strip(self.filler or ' ')
+                    for (fromm, to) in self.colspecs]
+    else:
+        def next(self):
+            line = next(self.f)
+            # Note: 'colspecs' is a sequence of half-open intervals.
+            return [line[fromm:to].strip(self.filler or ' ')
+                    for (fromm, to) in self.colspecs]
 
     # Iterator protocol in Python 3 uses __next__()
     __next__ = next
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index 9d751de6645ce..f872ddd793935 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -2028,6 +2028,31 @@ def test_fwf_regression(self):
             res = df.loc[:,c]
             self.assert_(len(res))
 
+    def test_fwf_compression(self):
+        try:
+            import gzip
+            import bz2
+        except ImportError:
+            raise nose.SkipTest("Need gzip and bz2 to run this test")
+
+        data = """1111111111
+        2222222222
+        3333333333""".strip()
+        widths = [5, 5]
+        names = ['one', 'two']
+        expected = read_fwf(StringIO(data), widths=widths, names=names)
+        if compat.PY3:
+            data = bytes(data, encoding='utf-8')
+        for comp_name, compresser in [('gzip', gzip.GzipFile),
+                                      ('bz2', bz2.BZ2File)]:
+            with tm.ensure_clean() as path:
+                tmp = compresser(path, mode='wb')
+                tmp.write(data)
+                tmp.close()
+                result = read_fwf(path, widths=widths, names=names,
+                                  compression=comp_name)
+                tm.assert_frame_equal(result, expected)
+
     def test_verbose_import(self):
         text = """a,b,c,d
 one,1,2,3

From c3dae2676de45b3391c1b71fbb7af58ff9a16108 Mon Sep 17 00:00:00 2001
From: Jeffrey Tratner <jeffrey.tratner@gmail.com>
Date: Mon, 9 Sep 2013 01:37:53 -0400
Subject: [PATCH 2/2] Use passed encoding to decode bytes

---
 pandas/io/parsers.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 5ca0a498d1e07..e1b09eb76415f 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -14,6 +14,7 @@
 from pandas.core.frame import DataFrame
 import datetime
 import pandas.core.common as com
+from pandas.core.config import get_option
 from pandas import compat
 from pandas.io.date_converters import generic_parser
 from pandas.io.common import get_filepath_or_buffer
@@ -1921,11 +1922,14 @@ class FixedWidthReader(object):
     """
     A reader of fixed-width lines.
     """
-    def __init__(self, f, colspecs, filler, thousands=None):
+    def __init__(self, f, colspecs, filler, thousands=None, encoding=None):
         self.f = f
         self.colspecs = colspecs
         self.filler = filler  # Empty characters between fields.
         self.thousands = thousands
+        if encoding is None:
+            encoding = get_option('display.encoding')
+        self.encoding = encoding
 
         if not ( isinstance(colspecs, (tuple, list))):
             raise AssertionError()
@@ -1941,7 +1945,7 @@ def __init__(self, f, colspecs, filler, thousands=None):
         def next(self):
             line = next(self.f)
             if isinstance(line, bytes):
-                line = line.decode('utf-8')
+                line = line.decode(self.encoding)
             # Note: 'colspecs' is a sequence of half-open intervals.
             return [line[fromm:to].strip(self.filler or ' ')
                     for (fromm, to) in self.colspecs]
@@ -1968,7 +1972,8 @@ def __init__(self, f, **kwds):
         PythonParser.__init__(self, f, **kwds)
 
     def _make_reader(self, f):
-        self.data = FixedWidthReader(f, self.colspecs, self.delimiter)
+        self.data = FixedWidthReader(f, self.colspecs, self.delimiter,
+                                     encoding=self.encoding)
 
 
 ##### deprecations in 0.12 #####