From 7e43c78a4e310955f3f214f58d1b77dc03c2ec0d Mon Sep 17 00:00:00 2001
From: "Graham R. Jeffries" <graham.r.jeffries@gmail.com>
Date: Mon, 27 Mar 2017 13:36:19 -0400
Subject: [PATCH 1/3] Remove NotImplementedError for parse_dates keyword in
 read_excel

Rebase and update of PR https://github.com/pydata/pandas/pull/12051

Author: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Author: Graham R. Jeffries <graham.r.jeffries@gmail.com>

This patch had conflicts when merged, resolved by
Committer: Jeff Reback <jeff@reback.net>

Closes #14326 from jorisvandenbossche/pr/12051 and squashes the following commits:

0b65a7a [Joris Van den Bossche] update wording
656ec44 [Joris Van den Bossche] Fix detection to raise warning
b1c7f87 [Joris Van den Bossche] add whatsnew
925ce1b [Joris Van den Bossche] Update tests
0e10a9d [Graham R. Jeffries] remove read_excel kwd NotImplemented error, update documentation #11544
---
 doc/source/io.rst               | 14 +++++++++++++
 doc/source/whatsnew/v0.19.0.txt |  4 ++++
 pandas/io/excel.py              |  9 +++------
 pandas/tests/io/test_excel.py   | 36 ++++++++++++++++++++-------------
 4 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index faeea9d448cf2..e72224c6fa1fe 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -2767,6 +2767,20 @@ indices to be parsed.
 
    read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3])
 
+
+Parsing Dates
++++++++++++++
+
+Datetime-like values are normally automatically converted to the appropriate
+dtype when reading the excel file. But if you have a column of strings that
+*look* like dates (but are not actually formatted as dates in excel), you can
+use the `parse_dates` keyword to parse those strings to datetimes:
+
+.. code-block:: python
+
+   read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings'])
+
+
 Cell Converters
 +++++++++++++++
 
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 9b003034aa94a..11df0afb144ea 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -517,6 +517,7 @@ Other enhancements
 - The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json <io.jsonl>` (:issue:`9180`)
 - :func:`read_excel` now supports the true_values and false_values keyword arguments (:issue:`13347`)
 - ``groupby()`` will now accept a scalar and a single-element list for specifying ``level`` on a non-``MultiIndex`` grouper. (:issue:`13907`)
+<<<<<<< HEAD
 - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`).
 - ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`)
 - ``pd.read_stata()`` can now handle some format 111 files, which are produced by SAS when generating Stata dta files (:issue:`11526`)
@@ -524,6 +525,9 @@ Other enhancements
   series or indices. This behaves like a standard binary operator with regards
   to broadcasting rules (:issue:`14208`).
 
+=======
+- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`)
+>>>>>>> PR_TOOL_MERGE_PR_14326
 
 .. _whatsnew_0190.api:
 
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index 82ea2e8a46592..e7a8b71a5f6c9 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -343,13 +343,10 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None,
         if 'chunksize' in kwds:
             raise NotImplementedError("chunksize keyword of read_excel "
                                       "is not implemented")
-        if parse_dates:
-            raise NotImplementedError("parse_dates keyword of read_excel "
-                                      "is not implemented")
 
-        if date_parser is not None:
-            raise NotImplementedError("date_parser keyword of read_excel "
-                                      "is not implemented")
+        if parse_dates is True and not index_col:
+            warn("The 'parse_dates=True' keyword of read_excel was provided"
+                 " without an 'index_col' keyword value.")
 
         import xlrd
         from xlrd import (xldate, XL_CELL_DATE,
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index b66cb24bf44d8..df77708232dd2 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -924,17 +924,27 @@ def test_read_excel_chunksize(self):
                           chunksize=100)
 
     def test_read_excel_parse_dates(self):
-        # GH 11544
-        with tm.assertRaises(NotImplementedError):
-            pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext),
-                          parse_dates=True)
+        # GH 11544, 12051
 
-    def test_read_excel_date_parser(self):
-        # GH 11544
-        with tm.assertRaises(NotImplementedError):
-            dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
-            pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext),
-                          date_parser=dateparse)
+        df = DataFrame(
+            {'col': [1, 2, 3],
+             'date_strings': pd.date_range('2012-01-01', periods=3)})
+        df2 = df.copy()
+        df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y')
+
+        with ensure_clean(self.ext) as pth:
+            df2.to_excel(pth)
+
+            res = read_excel(pth)
+            tm.assert_frame_equal(df2, res)
+
+            res = read_excel(pth, parse_dates=['date_strings'])
+            tm.assert_frame_equal(df, res)
+
+            dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y')
+            res = read_excel(pth, parse_dates=['date_strings'],
+                             date_parser=dateparser)
+            tm.assert_frame_equal(df, res)
 
     def test_read_excel_skiprows_list(self):
         # GH 4903
@@ -1382,8 +1392,7 @@ def test_to_excel_multiindex(self):
             # round trip
             frame.to_excel(path, 'test1', merge_cells=self.merge_cells)
             reader = ExcelFile(path)
-            df = read_excel(reader, 'test1', index_col=[0, 1],
-                            parse_dates=False)
+            df = read_excel(reader, 'test1', index_col=[0, 1])
             tm.assert_frame_equal(frame, df)
 
     # GH13511
@@ -1424,8 +1433,7 @@ def test_to_excel_multiindex_cols(self):
             frame.to_excel(path, 'test1', merge_cells=self.merge_cells)
             reader = ExcelFile(path)
             df = read_excel(reader, 'test1', header=header,
-                            index_col=[0, 1],
-                            parse_dates=False)
+                            index_col=[0, 1])
             if not self.merge_cells:
                 fm = frame.columns.format(sparsify=False,
                                           adjoin=False, names=False)

From 5d5989876f5fc5841f2f7e37cd16e7630623fa80 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 27 Mar 2017 13:37:32 -0400
Subject: [PATCH 2/3] BUG: index_names can be None when processing date
 conversions

---
 doc/source/whatsnew/v0.19.0.txt |  4 ----
 doc/source/whatsnew/v0.20.0.txt |  3 ++-
 pandas/io/excel.py              |  2 +-
 pandas/io/parsers.py            | 13 ++++++++++---
 pandas/tests/io/test_excel.py   |  5 +++--
 5 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 11df0afb144ea..9b003034aa94a 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -517,7 +517,6 @@ Other enhancements
 - The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json <io.jsonl>` (:issue:`9180`)
 - :func:`read_excel` now supports the true_values and false_values keyword arguments (:issue:`13347`)
 - ``groupby()`` will now accept a scalar and a single-element list for specifying ``level`` on a non-``MultiIndex`` grouper. (:issue:`13907`)
-<<<<<<< HEAD
 - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`).
 - ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`)
 - ``pd.read_stata()`` can now handle some format 111 files, which are produced by SAS when generating Stata dta files (:issue:`11526`)
@@ -525,9 +524,6 @@ Other enhancements
   series or indices. This behaves like a standard binary operator with regards
   to broadcasting rules (:issue:`14208`).
 
-=======
-- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`)
->>>>>>> PR_TOOL_MERGE_PR_14326
 
 .. _whatsnew_0190.api:
 
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 3ab69e1ff409b..fdf34e0d11572 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -270,7 +270,7 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you
 
 .. _whatsnew_0200.enhancements.other:
 
-Other enhancements
+Other Enhancements
 ^^^^^^^^^^^^^^^^^^
 
 - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here <io.feather>`.
@@ -314,6 +314,7 @@ Other enhancements
 - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs <categorical.union>` for more information.
 - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`)
 - ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`)
+- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`)
 
 .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
 
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index e7a8b71a5f6c9..d324855bc2f4d 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -344,7 +344,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None,
             raise NotImplementedError("chunksize keyword of read_excel "
                                       "is not implemented")
 
-        if parse_dates is True and not index_col:
+        if parse_dates is True and index_col is None:
             warn("The 'parse_dates=True' keyword of read_excel was provided"
                  " without an 'index_col' keyword value.")
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 45c62b224ef4e..30b88de91ef76 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1176,13 +1176,18 @@ def _should_parse_dates(self, i):
         if isinstance(self.parse_dates, bool):
             return self.parse_dates
         else:
-            name = self.index_names[i]
+            if self.index_names is not None:
+                name = self.index_names[i]
+            else:
+                name = None
             j = self.index_col[i]
 
             if is_scalar(self.parse_dates):
-                return (j == self.parse_dates) or (name == self.parse_dates)
+                return ((j == self.parse_dates) or
+                        (name is not None and name == self.parse_dates))
             else:
-                return (j in self.parse_dates) or (name in self.parse_dates)
+                return ((j in self.parse_dates) or
+                        (name is not None and name in self.parse_dates))
 
     def _extract_multi_indexer_columns(self, header, index_names, col_names,
                                        passed_names=False):
@@ -1352,6 +1357,7 @@ def _get_name(icol):
 
     def _agg_index(self, index, try_parse_dates=True):
         arrays = []
+
         for i, arr in enumerate(index):
 
             if (try_parse_dates and self._should_parse_dates(i)):
@@ -1512,6 +1518,7 @@ def _cast_types(self, values, cast_type, column):
 
     def _do_date_conversions(self, names, data):
         # returns data, columns
+
         if self.parse_dates is not None:
             data, names = _process_date_conversion(
                 data, self._date_conv, self.parse_dates, self.index_col,
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index df77708232dd2..87e87bc4aba65 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -935,15 +935,16 @@ def test_read_excel_parse_dates(self):
         with ensure_clean(self.ext) as pth:
             df2.to_excel(pth)
 
+            # no index_col specified
             res = read_excel(pth)
             tm.assert_frame_equal(df2, res)
 
-            res = read_excel(pth, parse_dates=['date_strings'])
+            res = read_excel(pth, parse_dates=['date_strings'], index_col=0)
             tm.assert_frame_equal(df, res)
 
             dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y')
             res = read_excel(pth, parse_dates=['date_strings'],
-                             date_parser=dateparser)
+                             date_parser=dateparser, index_col=0)
             tm.assert_frame_equal(df, res)
 
     def test_read_excel_skiprows_list(self):

From a1eee6789df1ffbc171704f79bb7438555f398c9 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 27 Mar 2017 15:46:09 -0400
Subject: [PATCH 3/3] test for warning

---
 pandas/tests/io/test_excel.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 87e87bc4aba65..256a37e922177 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -935,10 +935,14 @@ def test_read_excel_parse_dates(self):
         with ensure_clean(self.ext) as pth:
             df2.to_excel(pth)
 
-            # no index_col specified
             res = read_excel(pth)
             tm.assert_frame_equal(df2, res)
 
+            # no index_col specified when parse_dates is True
+            with tm.assert_produces_warning():
+                res = read_excel(pth, parse_dates=True)
+                tm.assert_frame_equal(df2, res)
+
             res = read_excel(pth, parse_dates=['date_strings'], index_col=0)
             tm.assert_frame_equal(df, res)