diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt
index bb4ecddd58f16..20620f15944f0 100644
--- a/doc/source/v0.14.1.txt
+++ b/doc/source/v0.14.1.txt
@@ -58,6 +58,9 @@ Known Issues
 Enhancements
 ~~~~~~~~~~~~
 - Tests for basic reading of public S3 buckets now exist (:issue:`7281`).
+- ``read_html`` now sports an ``encoding`` argument that is passed to the
+  underlying parser library. You can use this to read non-ascii encoded web
+  pages (:issue:`7323`).
 
 - Support for dateutil timezones, which can now be used in the same way as
   pytz timezones across pandas. (:issue:`4688`)
diff --git a/pandas/io/html.py b/pandas/io/html.py
index 4375d08abc37c..5ea6ca36ac764 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -98,30 +98,33 @@ def _get_skiprows(skiprows):
                     type(skiprows).__name__)
 
 
-def _read(io):
+def _read(obj):
     """Try to read from a url, file or string.
 
     Parameters
     ----------
-    io : str, unicode, or file-like
+    obj : str, unicode, or file-like
 
     Returns
     -------
     raw_text : str
     """
-    if _is_url(io):
-        with urlopen(io) as url:
-            raw_text = url.read()
-    elif hasattr(io, 'read'):
-        raw_text = io.read()
-    elif os.path.isfile(io):
-        with open(io) as f:
-            raw_text = f.read()
-    elif isinstance(io, string_types):
-        raw_text = io
+    if _is_url(obj):
+        with urlopen(obj) as url:
+            text = url.read()
+    elif hasattr(obj, 'read'):
+        text = obj.read()
+    elif isinstance(obj, string_types):
+        text = obj
+        try:
+            if os.path.isfile(text):
+                with open(text, 'rb') as f:
+                    return f.read()
+        except TypeError:
+            pass
     else:
-        raise TypeError("Cannot read object of type %r" % type(io).__name__)
-    return raw_text
+        raise TypeError("Cannot read object of type %r" % type(obj).__name__)
+    return text
 
 
 class _HtmlFrameParser(object):
@@ -165,10 +168,11 @@ class _HtmlFrameParser(object):
     See each method's respective documentation for details on their
     functionality.
     """
-    def __init__(self, io, match, attrs):
+    def __init__(self, io, match, attrs, encoding):
         self.io = io
         self.match = match
         self.attrs = attrs
+        self.encoding = encoding
 
     def parse_tables(self):
         tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
@@ -422,7 +426,8 @@ def _setup_build_doc(self):
 
     def _build_doc(self):
         from bs4 import BeautifulSoup
-        return BeautifulSoup(self._setup_build_doc(), features='html5lib')
+        return BeautifulSoup(self._setup_build_doc(), features='html5lib',
+                             from_encoding=self.encoding)
 
 
 def _build_xpath_expr(attrs):
@@ -519,7 +524,7 @@ def _build_doc(self):
         from lxml.html import parse, fromstring, HTMLParser
         from lxml.etree import XMLSyntaxError
 
-        parser = HTMLParser(recover=False)
+        parser = HTMLParser(recover=False, encoding=self.encoding)
 
         try:
             # try to parse the input in the simplest way
@@ -689,7 +694,7 @@ def _validate_flavor(flavor):
 
 
 def _parse(flavor, io, match, header, index_col, skiprows, infer_types,
-           parse_dates, tupleize_cols, thousands, attrs):
+           parse_dates, tupleize_cols, thousands, attrs, encoding):
     flavor = _validate_flavor(flavor)
     compiled_match = re.compile(match)  # you can pass a compiled regex here
 
@@ -697,7 +702,7 @@ def _parse(flavor, io, match, header, index_col, skiprows, infer_types,
     retained = None
     for flav in flavor:
         parser = _parser_dispatch(flav)
-        p = parser(io, compiled_match, attrs)
+        p = parser(io, compiled_match, attrs, encoding)
 
         try:
             tables = p.parse_tables()
@@ -715,7 +720,7 @@ def _parse(flavor, io, match, header, index_col, skiprows, infer_types,
 
 def read_html(io, match='.+', flavor=None, header=None, index_col=None,
               skiprows=None, infer_types=None, attrs=None, parse_dates=False,
-              tupleize_cols=False, thousands=','):
+              tupleize_cols=False, thousands=',', encoding=None):
     r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
 
     Parameters
@@ -792,6 +797,12 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
     thousands : str, optional
         Separator to use to parse thousands. Defaults to ``','``.
 
+    encoding : str or None, optional
+        The encoding used to decode the web page. Defaults to ``None``.``None``
+        preserves the previous encoding behavior, which depends on the
+        underlying parser library (e.g., the parser library will try to use
+        the encoding provided by the document).
+
     Returns
     -------
     dfs : list of DataFrames
@@ -837,4 +848,4 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
         raise ValueError('cannot skip rows starting from the end of the '
                          'data (you passed a negative value)')
     return _parse(flavor, io, match, header, index_col, skiprows, infer_types,
-                  parse_dates, tupleize_cols, thousands, attrs)
+                  parse_dates, tupleize_cols, thousands, attrs, encoding)
diff --git a/pandas/io/tests/data/html_encoding/chinese_utf16.html b/pandas/io/tests/data/html_encoding/chinese_utf16.html
new file mode 100644
index 0000000000000..59fffc0d19c57
Binary files /dev/null and b/pandas/io/tests/data/html_encoding/chinese_utf16.html differ
diff --git a/pandas/io/tests/data/html_encoding/chinese_utf32.html b/pandas/io/tests/data/html_encoding/chinese_utf32.html
new file mode 100644
index 0000000000000..365c44bf08ea1
Binary files /dev/null and b/pandas/io/tests/data/html_encoding/chinese_utf32.html differ
diff --git a/pandas/io/tests/data/html_encoding/chinese_utf8.html b/pandas/io/tests/data/html_encoding/chinese_utf8.html
new file mode 100644
index 0000000000000..ad1ca33a78a65
--- /dev/null
+++ b/pandas/io/tests/data/html_encoding/chinese_utf8.html
@@ -0,0 +1,26 @@
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>0</th>
+      <th>1</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>0</th>
+      <td> æ¼Šç…»çŒ</td>
+      <td> æ¼Šç…»çŒ</td>
+    </tr>
+    <tr>
+      <th>1</th>
+      <td> è¢Ÿè¢˜è§•</td>
+      <td> è¢Ÿè¢˜è§•</td>
+    </tr>
+    <tr>
+      <th>2</th>
+      <td> åŸ±å¨µå¾–</td>
+      <td> åŸ±å¨µå¾–</td>
+    </tr>
+  </tbody>
+</table>
\ No newline at end of file
diff --git a/pandas/io/tests/data/html_encoding/letz_latin1.html b/pandas/io/tests/data/html_encoding/letz_latin1.html
new file mode 100644
index 0000000000000..7b4b99cb33388
--- /dev/null
+++ b/pandas/io/tests/data/html_encoding/letz_latin1.html
@@ -0,0 +1,26 @@
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>0</th>
+      <th>1</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>0</th>
+      <td>  Gét</td>
+      <td>  Gét</td>
+    </tr>
+    <tr>
+      <th>1</th>
+      <td>   mä</td>
+      <td>   mä</td>
+    </tr>
+    <tr>
+      <th>2</th>
+      <td> iech</td>
+      <td> iech</td>
+    </tr>
+  </tbody>
+</table>
\ No newline at end of file
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
index 12da26c0c7e50..a20a8945eeb11 100644
--- a/pandas/io/tests/test_html.py
+++ b/pandas/io/tests/test_html.py
@@ -1,5 +1,6 @@
 from __future__ import print_function
 
+import glob
 import os
 import re
 import warnings
@@ -110,15 +111,14 @@ def test_to_html_compat(self):
         df = mkdf(4, 3, data_gen_f=lambda *args: rand(), c_idx_names=False,
                   r_idx_names=False).applymap('{0:.3f}'.format).astype(float)
         out = df.to_html()
-        res = self.read_html(out, attrs={'class': 'dataframe'},
-                                 index_col=0)[0]
+        res = self.read_html(out, attrs={'class': 'dataframe'}, index_col=0)[0]
         tm.assert_frame_equal(res, df)
 
     @network
     def test_banklist_url(self):
         url = 'http://www.fdic.gov/bank/individual/failed/banklist.html'
         df1 = self.read_html(url, 'First Federal Bank of Florida',
-                                 attrs={"id": 'table'})
+                             attrs={"id": 'table'})
         df2 = self.read_html(url, 'Metcalf Bank', attrs={'id': 'table'})
 
         assert_framelist_equal(df1, df2)
@@ -135,9 +135,9 @@ def test_spam_url(self):
     @slow
     def test_banklist(self):
         df1 = self.read_html(self.banklist_data, '.*Florida.*',
-                                 attrs={'id': 'table'})
+                             attrs={'id': 'table'})
         df2 = self.read_html(self.banklist_data, 'Metcalf Bank',
-                                 attrs={'id': 'table'})
+                             attrs={'id': 'table'})
 
         assert_framelist_equal(df1, df2)
 
@@ -183,8 +183,7 @@ def test_skiprows_int(self):
         assert_framelist_equal(df1, df2)
 
     def test_skiprows_xrange(self):
-        df1 = self.read_html(self.spam_data, '.*Water.*',
-                                 skiprows=range(2))[0]
+        df1 = self.read_html(self.spam_data, '.*Water.*', skiprows=range(2))[0]
         df2 = self.read_html(self.spam_data, 'Unit', skiprows=range(2))[0]
         tm.assert_frame_equal(df1, df2)
 
@@ -195,8 +194,7 @@ def test_skiprows_list(self):
         assert_framelist_equal(df1, df2)
 
     def test_skiprows_set(self):
-        df1 = self.read_html(self.spam_data, '.*Water.*',
-                                 skiprows=set([1, 2]))
+        df1 = self.read_html(self.spam_data, '.*Water.*', skiprows=set([1, 2]))
         df2 = self.read_html(self.spam_data, 'Unit', skiprows=set([2, 1]))
 
         assert_framelist_equal(df1, df2)
@@ -208,23 +206,20 @@ def test_skiprows_slice(self):
         assert_framelist_equal(df1, df2)
 
     def test_skiprows_slice_short(self):
-        df1 = self.read_html(self.spam_data, '.*Water.*',
-                                 skiprows=slice(2))
+        df1 = self.read_html(self.spam_data, '.*Water.*', skiprows=slice(2))
         df2 = self.read_html(self.spam_data, 'Unit', skiprows=slice(2))
 
         assert_framelist_equal(df1, df2)
 
     def test_skiprows_slice_long(self):
-        df1 = self.read_html(self.spam_data, '.*Water.*',
-                                 skiprows=slice(2, 5))
-        df2 = self.read_html(self.spam_data, 'Unit',
-                                 skiprows=slice(4, 1, -1))
+        df1 = self.read_html(self.spam_data, '.*Water.*', skiprows=slice(2, 5))
+        df2 = self.read_html(self.spam_data, 'Unit', skiprows=slice(4, 1, -1))
 
         assert_framelist_equal(df1, df2)
 
     def test_skiprows_ndarray(self):
         df1 = self.read_html(self.spam_data, '.*Water.*',
-                                 skiprows=np.arange(2))
+                             skiprows=np.arange(2))
         df2 = self.read_html(self.spam_data, 'Unit', skiprows=np.arange(2))
 
         assert_framelist_equal(df1, df2)
@@ -242,30 +237,30 @@ def test_index(self):
     def test_header_and_index_no_types(self):
         with tm.assert_produces_warning(FutureWarning):
             df1 = self.read_html(self.spam_data, '.*Water.*', header=1,
-                                     index_col=0, infer_types=False)
+                                 index_col=0, infer_types=False)
         with tm.assert_produces_warning(FutureWarning):
-            df2 = self.read_html(self.spam_data, 'Unit', header=1,
-                                    index_col=0, infer_types=False)
+            df2 = self.read_html(self.spam_data, 'Unit', header=1, index_col=0,
+                                 infer_types=False)
         assert_framelist_equal(df1, df2)
 
     def test_header_and_index_with_types(self):
         df1 = self.read_html(self.spam_data, '.*Water.*', header=1,
-                                 index_col=0)
+                             index_col=0)
         df2 = self.read_html(self.spam_data, 'Unit', header=1, index_col=0)
         assert_framelist_equal(df1, df2)
 
     def test_infer_types(self):
         with tm.assert_produces_warning(FutureWarning):
             df1 = self.read_html(self.spam_data, '.*Water.*', index_col=0,
-                                     infer_types=False)
+                                 infer_types=False)
         with tm.assert_produces_warning(FutureWarning):
             df2 = self.read_html(self.spam_data, 'Unit', index_col=0,
-                                    infer_types=False)
+                                 infer_types=False)
         assert_framelist_equal(df1, df2)
 
         with tm.assert_produces_warning(FutureWarning):
             df2 = self.read_html(self.spam_data, 'Unit', index_col=0,
-                                    infer_types=True)
+                                 infer_types=True)
 
         with tm.assertRaises(AssertionError):
             assert_framelist_equal(df1, df2)
@@ -308,14 +303,16 @@ def test_bad_url_protocol(self):
     def test_invalid_url(self):
         try:
             with tm.assertRaises(URLError):
-                self.read_html('http://www.a23950sdfa908sd.com', match='.*Water.*')
+                self.read_html('http://www.a23950sdfa908sd.com',
+                               match='.*Water.*')
         except ValueError as e:
             tm.assert_equal(str(e), 'No tables found')
 
     @slow
     def test_file_url(self):
         url = self.banklist_data
-        dfs = self.read_html(file_path_to_url(url), 'First', attrs={'id': 'table'})
+        dfs = self.read_html(file_path_to_url(url), 'First',
+                             attrs={'id': 'table'})
         tm.assert_isinstance(dfs, list)
         for df in dfs:
             tm.assert_isinstance(df, DataFrame)
@@ -367,8 +364,8 @@ def test_multiindex_header_index_skiprows(self):
     def test_regex_idempotency(self):
         url = self.banklist_data
         dfs = self.read_html(file_path_to_url(url),
-                                 match=re.compile(re.compile('Florida')),
-                                 attrs={'id': 'table'})
+                             match=re.compile(re.compile('Florida')),
+                             attrs={'id': 'table'})
         tm.assert_isinstance(dfs, list)
         for df in dfs:
             tm.assert_isinstance(df, DataFrame)
@@ -381,15 +378,13 @@ def test_negative_skiprows(self):
     @network
     def test_multiple_matches(self):
         url = 'http://code.google.com/p/pythonxy/wiki/StandardPlugins'
-        dfs = self.read_html(url, match='Python',
-                                 attrs={'class': 'wikitable'})
+        dfs = self.read_html(url, match='Python', attrs={'class': 'wikitable'})
         self.assertTrue(len(dfs) > 1)
 
     @network
     def test_pythonxy_plugins_table(self):
         url = 'http://code.google.com/p/pythonxy/wiki/StandardPlugins'
-        dfs = self.read_html(url, match='Python',
-                                 attrs={'class': 'wikitable'})
+        dfs = self.read_html(url, match='Python', attrs={'class': 'wikitable'})
         zz = [df.iloc[0, 0] for df in dfs]
         self.assertEqual(sorted(zz), sorted(['Python', 'SciTE']))
 
@@ -471,7 +466,7 @@ def try_remove_ws(x):
                 return x
 
         df = self.read_html(self.banklist_data, 'Metcalf',
-                                attrs={'id': 'table'})[0]
+                            attrs={'id': 'table'})[0]
         ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'),
                                 converters={'Updated Date': Timestamp,
                                             'Closing Date': Timestamp})
@@ -505,7 +500,7 @@ def test_gold_canyon(self):
 
         self.assertIn(gc, raw_text)
         df = self.read_html(self.banklist_data, 'Gold Canyon',
-                                attrs={'id': 'table'})[0]
+                            attrs={'id': 'table'})[0]
         self.assertIn(gc, df.to_string())
 
     def test_different_number_of_rows(self):
@@ -594,6 +589,35 @@ def test_computer_sales_page(self):
                 self.read_html(data, infer_types=False, header=[0, 1])
 
 
+def _lang_enc(filename):
+    return os.path.splitext(os.path.basename(filename))[0].split('_')
+
+
+class TestReadHtmlEncoding(tm.TestCase):
+    files = glob.glob(os.path.join(DATA_PATH, 'html_encoding', '*.html'))
+
+    def read_filename(self, f, encoding):
+        return read_html(f, encoding=encoding, index_col=0)
+
+    def read_file_like(self, f, encoding):
+        with open(f, 'rb') as fobj:
+            return read_html(StringIO(fobj.read()), encoding=encoding,
+                             index_col=0)
+
+    def read_string(self, f, encoding):
+        with open(f, 'rb') as fobj:
+            return read_html(fobj.read(), encoding=encoding, index_col=0)
+
+    def test_encode(self):
+        for f in self.files:
+            _, encoding = _lang_enc(f)
+            from_string = self.read_string(f, encoding).pop()
+            from_file_like = self.read_file_like(f, encoding).pop()
+            from_filename = self.read_filename(f, encoding).pop()
+            tm.assert_frame_equal(from_string, from_file_like)
+            tm.assert_frame_equal(from_string, from_filename)
+
+
 class TestReadHtmlLxml(tm.TestCase):
     @classmethod
     def setUpClass(cls):
@@ -644,7 +668,6 @@ def test_parse_dates_combine(self):
         tm.assert_frame_equal(newdf, res[0])
 
     def test_computer_sales_page(self):
-        import pandas as pd
         data = os.path.join(DATA_PATH, 'computer_sales_page.html')
         with tm.assert_produces_warning(FutureWarning):
             self.read_html(data, infer_types=False, header=[0, 1])