From f2be8618e73f71133535a6a06e165c40bdf6242c Mon Sep 17 00:00:00 2001
From: Gianluca Rossi <grossi@quantcast.com>
Date: Sat, 8 Aug 2015 22:14:34 +0100
Subject: [PATCH 01/12] Add ability to 'read_sql_table' to read views and
 implemnt unit test to check behaviour

---
 pandas/io/sql.py            |  2 +-
 pandas/io/tests/test_sql.py | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 8eefe4ba98876..b587ec128c016 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -337,7 +337,7 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
     from sqlalchemy.schema import MetaData
     meta = MetaData(con, schema=schema)
     try:
-        meta.reflect(only=[table_name])
+        meta.reflect(only=[table_name], views=True)
     except sqlalchemy.exc.InvalidRequestError:
         raise ValueError("Table %s not found" % table_name)
 
diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
index 859c6d3250121..434f8c4b71e85 100644
--- a/pandas/io/tests/test_sql.py
+++ b/pandas/io/tests/test_sql.py
@@ -161,6 +161,20 @@
                 SELECT * FROM iris WHERE
                 "Name"=%(name)s AND "SepalLength"=%(length)s
                 """
+    },
+    'create_view': {
+        'sqlite': """
+                CREATE VIEW iris_view AS
+                SELECT * FROM iris;
+                """,
+        'mysql': """
+                CREATE VIEW iris_view AS
+                SELECT * FROM iris;
+                """,
+        'postgresql': """
+                CREATE VIEW iris_view AS
+                SELECT * FROM iris;
+                """
     }
 }
 
@@ -244,6 +258,10 @@ def _load_iris_data(self):
             for row in r:
                 self._get_exec().execute(ins, row)
 
+    def _load_iris_view(self):
+        self.drop_table('iris_view')
+        self._get_exec().execute(SQL_STRINGS['create_view'][self.flavor])
+
     def _check_iris_loaded_frame(self, iris_frame):
         pytype = iris_frame.dtypes[0].type
         row = iris_frame.iloc[0]
@@ -482,6 +500,7 @@ class _TestSQLApi(PandasSQLTest):
     def setUp(self):
         self.conn = self.connect()
         self._load_iris_data()
+        self._load_iris_view()
         self._load_test1_data()
         self._load_test2_data()
         self._load_test3_data()
@@ -492,6 +511,11 @@ def test_read_sql_iris(self):
             "SELECT * FROM iris", self.conn)
         self._check_iris_loaded_frame(iris_frame)
 
+    def test_read_sql_view(self):
+        iris_frame = sql.read_sql_query(
+            "SELECT * FROM iris_view", self.conn)
+        self._check_iris_loaded_frame(iris_frame)
+
     def test_legacy_read_frame(self):
         with tm.assert_produces_warning(FutureWarning):
             iris_frame = sql.read_frame(

From 69a28c764b61669f89a9ba9f9717c78d77ee983f Mon Sep 17 00:00:00 2001
From: Gianluca Rossi <grossi@quantcast.com>
Date: Sun, 2 Aug 2015 12:24:53 +0100
Subject: [PATCH 02/12] BUG: Fix bug which was preventing the inheritance of
 Series' names when only few of them exist, and new column names are not
 provided via the 'keys' argument. Closes #10698

---
 doc/source/whatsnew/v0.17.0.txt  | 25 +++++++++++++++++++++++++
 pandas/tools/merge.py            | 12 +++++++++---
 pandas/tools/tests/test_merge.py | 13 +++++++++++--
 3 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 70d616ca72c1b..843cc32a8ab9b 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -137,6 +137,7 @@ Other enhancements
 - ``.as_blocks`` will now take a ``copy`` optional argument to return a copy of the data, default is to copy (no change in behavior from prior versions), (:issue:`9607`)
 
 - ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`).
+
 - ``pd.read_stata`` will now read Stata 118 type files. (:issue:`9882`)
 
 - ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`).
@@ -152,6 +153,30 @@ Other enhancements
    s.drop_duplicates(keep=False)
 
 
+- ``concat`` will now inherit the existing series names (even when some are missing), if new ones are not provided through the ``keys`` argument (:issue:`10698`).
+
+  Previous Behavior:
+
+  .. code-block:: python
+
+     In [1]: foo = pd.Series([1,2], name='foo')
+     In [2]: bar = pd.Series([1,2])
+     In [3]: baz = pd.Series([4,5])
+     In [4] pd.concat([foo, bar, baz], 1)
+     Out[4]:
+           0  1  2
+        0  1  1  4
+        1  2  2  5
+
+  New Behavior:
+
+  .. ipython:: python
+
+    foo = pd.Series([1,2], name='foo')
+    bar = pd.Series([1,2])
+    baz = pd.Series([4,5])
+    pd.concat([foo, bar, baz], 1)
+
 .. _whatsnew_0170.api:
 
 .. _whatsnew_0170.api_breaking:
diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
index 430828a3db31b..d04cc8c4a7754 100644
--- a/pandas/tools/merge.py
+++ b/pandas/tools/merge.py
@@ -16,7 +16,7 @@
 from pandas.core.internals import (items_overlap_with_suffix,
                                    concatenate_block_managers)
 from pandas.util.decorators import Appender, Substitution
-from pandas.core.common import ABCSeries
+from pandas.core.common import ABCSeries, isnull
 from pandas.io.parsers import TextFileReader
 
 import pandas.core.common as com
@@ -896,8 +896,14 @@ def get_result(self):
                 data = dict(zip(range(len(self.objs)), self.objs))
                 index, columns = self.new_axes
                 tmpdf = DataFrame(data, index=index)
-                if columns is not None:
-                    tmpdf.columns = columns
+                # checks if the column variable already stores valid column names (because set via the 'key' argument
+                # in the 'concat' function call. If that's not the case, use the series names as column names
+                if columns.equals(Index(np.arange(len(self.objs)))):
+                    columns = np.array([ data[i].name for i in range(len(data)) ], dtype='object')
+                    indexer = isnull(columns)
+                    if indexer.any():
+                        columns[indexer] = np.arange(len(indexer[indexer]))
+                tmpdf.columns = columns
                 return tmpdf.__finalize__(self, method='concat')
 
         # combine block managers
diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
index 8b1457e7fd490..3be283eff1bb4 100644
--- a/pandas/tools/tests/test_merge.py
+++ b/pandas/tools/tests/test_merge.py
@@ -1797,6 +1797,15 @@ def test_concat_dataframe_keys_bug(self):
         self.assertEqual(list(result.columns), [('t1', 'value'),
                                                 ('t2', 'value')])
 
+    def test_concat_series_partial_columns_names(self):
+        foo = pd.Series([1,2], name='foo')
+        bar = pd.Series([1,2])
+        baz = pd.Series([4,5])
+
+        result = pd.concat([foo, bar, baz], 1)
+        expected = DataFrame({'foo' : [1,2], 0 : [1,2], 1 : [4,5]}, columns=['foo',0,1])
+        tm.assert_frame_equal(result, expected)
+
     def test_concat_dict(self):
         frames = {'foo': DataFrame(np.random.randn(4, 3)),
                   'bar': DataFrame(np.random.randn(4, 3)),
@@ -2330,7 +2339,7 @@ def test_concat_series_axis1(self):
 
         s2.name = None
         result = concat([s, s2], axis=1)
-        self.assertTrue(np.array_equal(result.columns, lrange(2)))
+        self.assertTrue(np.array_equal(result.columns, Index(['A', 0], dtype='object')))
 
         # must reindex, #2603
         s = Series(randn(3), index=['c', 'a', 'b'], name='A')
@@ -2431,7 +2440,7 @@ def test_concat_series_axis1_same_names_ignore_index(self):
         s2 = Series(randn(len(dates)), index=dates, name='value')
 
         result = concat([s1, s2], axis=1, ignore_index=True)
-        self.assertTrue(np.array_equal(result.columns, [0, 1]))
+        self.assertTrue(np.array_equal(result.columns, ['value', 'value']))
 
     def test_concat_iterables(self):
         from collections import deque, Iterable

From 9493f002bbbb77b82dead6a5ffdd2fcd2b769c1c Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sat, 1 Aug 2015 23:06:52 +0900
Subject: [PATCH 03/12] BUG: Categorical doesn't show tzinfo properly

---
 doc/source/whatsnew/v0.17.0.txt  |   3 +
 pandas/core/categorical.py       |  33 +-
 pandas/core/format.py            |  37 +-
 pandas/core/index.py             |   9 +
 pandas/tests/test_categorical.py | 576 +++++++++++++++++++++++++++++++
 pandas/tests/test_index.py       |   9 +
 pandas/tseries/period.py         |   7 +
 7 files changed, 653 insertions(+), 21 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 770ad8a268f11..86bb78f4066ab 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -606,6 +606,9 @@ Bug Fixes
 
 
 - Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`)
+- Bug in ``Categorical`` may not representing properly when category contains ``tz`` or ``Period`` (:issue:`10713`)
+- Bug in ``Categorical.__iter__`` may not returning correct ``datetime`` and ``Period`` (:issue:`10713`)
+
 
 - Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`).
 - Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`)
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index b0d564caa5826..c9e30ea31dab8 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -12,7 +12,7 @@
 import pandas.core.common as com
 from pandas.util.decorators import cache_readonly, deprecate_kwarg
 
-from pandas.core.common import (CategoricalDtype, ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex,
+from pandas.core.common import (CategoricalDtype, ABCSeries, ABCIndexClass, ABCCategoricalIndex,
                                 isnull, notnull, is_dtype_equal,
                                 is_categorical_dtype, is_integer_dtype, is_object_dtype,
                                 _possibly_infer_to_datetimelike, get_dtype_kinds,
@@ -1053,15 +1053,12 @@ def get_values(self):
         Returns
         -------
         values : numpy array
-            A numpy array of the same dtype as categorical.categories.dtype or dtype string if
-            periods
+            A numpy array of the same dtype as categorical.categories.dtype or
+            Index if datetime / periods
         """
-
-        # if we are a period index, return a string repr
-        if isinstance(self.categories, ABCPeriodIndex):
-            return take_1d(np.array(self.categories.to_native_types(), dtype=object),
-                           self._codes)
-
+        # if we are a datetime and period index, return Index to keep metadata
+        if com.is_datetimelike(self.categories):
+            return self.categories.take(self._codes)
         return np.array(self)
 
     def check_for_ordered(self, op):
@@ -1308,7 +1305,7 @@ def __len__(self):
 
     def __iter__(self):
         """Returns an Iterator over the values of this Categorical."""
-        return iter(np.array(self))
+        return iter(self.get_values())
 
     def _tidy_repr(self, max_vals=10, footer=True):
         """ a short repr displaying only max_vals and an optional (but default footer) """
@@ -1328,7 +1325,7 @@ def _repr_categories(self):
         max_categories = (10 if get_option("display.max_categories") == 0
                     else get_option("display.max_categories"))
         from pandas.core import format as fmt
-        category_strs = fmt.format_array(self.categories.get_values(), None)
+        category_strs = fmt.format_array(self.categories, None)
         if len(category_strs) > max_categories:
             num = max_categories // 2
             head = category_strs[:num]
@@ -1343,8 +1340,9 @@ def _repr_categories_info(self):
         """ Returns a string representation of the footer."""
 
         category_strs = self._repr_categories()
-        levheader = "Categories (%d, %s): " % (len(self.categories),
-                                               self.categories.dtype)
+        dtype = getattr(self.categories, 'dtype_str', str(self.categories.dtype))
+
+        levheader = "Categories (%d, %s): " % (len(self.categories), dtype)
         width, height = get_terminal_size()
         max_width = get_option("display.width") or width
         if com.in_ipython_frontend():
@@ -1352,13 +1350,14 @@ def _repr_categories_info(self):
             max_width = 0
         levstring = ""
         start = True
-        cur_col_len = len(levheader)
+        cur_col_len = len(levheader) # header
         sep_len, sep = (3, " < ") if self.ordered else (2, ", ")
+        linesep = sep.rstrip() + "\n" # remove whitespace
         for val in category_strs:
             if max_width != 0 and cur_col_len + sep_len + len(val) > max_width:
-                levstring += "\n" + (" "* len(levheader))
-                cur_col_len = len(levheader)
-            if not start:
+                levstring += linesep + (" " * (len(levheader) + 1))
+                cur_col_len = len(levheader) + 1 # header + a whitespace
+            elif not start:
                 levstring += sep
                 cur_col_len += len(val)
             levstring += val
diff --git a/pandas/core/format.py b/pandas/core/format.py
index a18d0cfa6f195..4ec4375349764 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -207,7 +207,7 @@ def _get_formatted_index(self):
         return fmt_index, have_header
 
     def _get_formatted_values(self):
-        return format_array(self.tr_series.get_values(), None,
+        return format_array(self.tr_series.values, None,
                             float_format=self.float_format,
                             na_rep=self.na_rep)
 
@@ -681,7 +681,7 @@ def _format_col(self, i):
         frame = self.tr_frame
         formatter = self._get_formatter(i)
         return format_array(
-            (frame.iloc[:, i]).get_values(),
+            frame.iloc[:, i].values,
             formatter, float_format=self.float_format, na_rep=self.na_rep,
             space=self.col_space
         )
@@ -1895,8 +1895,13 @@ def get_formatted_cells(self):
 
 def format_array(values, formatter, float_format=None, na_rep='NaN',
                  digits=None, space=None, justify='right'):
-    if com.is_float_dtype(values.dtype):
+
+    if com.is_categorical_dtype(values):
+        fmt_klass = CategoricalArrayFormatter
+    elif com.is_float_dtype(values.dtype):
         fmt_klass = FloatArrayFormatter
+    elif com.is_period_arraylike(values):
+        fmt_klass = PeriodArrayFormatter
     elif com.is_integer_dtype(values.dtype):
         fmt_klass = IntArrayFormatter
     elif com.is_datetime64_dtype(values.dtype):
@@ -1963,6 +1968,8 @@ def _format(x):
                 return '%s' % formatter(x)
 
         vals = self.values
+        if isinstance(vals, Index):
+            vals = vals.values
 
         is_float = lib.map_infer(vals, com.is_float) & notnull(vals)
         leading_space = is_float.any()
@@ -2076,8 +2083,30 @@ def _format_strings(self):
             values = values.asobject
             is_dates_only = _is_dates_only(values)
             formatter = (self.formatter or _get_format_datetime64(is_dates_only, values, date_format=self.date_format))
-            fmt_values = [ formatter(x) for x in self.values ]
+            fmt_values = [ formatter(x) for x in values ]
+
+        return fmt_values
+
 
+class PeriodArrayFormatter(IntArrayFormatter):
+
+    def _format_strings(self):
+        values = np.array(self.values.to_native_types(), dtype=object)
+        formatter = self.formatter or (lambda x: '%s' % x)
+        fmt_values = [formatter(x) for x in values]
+        return fmt_values
+
+
+class CategoricalArrayFormatter(GenericArrayFormatter):
+
+    def __init__(self, values, *args, **kwargs):
+        GenericArrayFormatter.__init__(self, values, *args, **kwargs)
+
+    def _format_strings(self):
+        fmt_values = format_array(self.values.get_values(), self.formatter,
+                                  float_format=self.float_format,
+                                  na_rep=self.na_rep, digits=self.digits,
+                                  space=self.space, justify=self.justify)
         return fmt_values
 
 
diff --git a/pandas/core/index.py b/pandas/core/index.py
index a9878f493251b..a9631d7aabedd 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -276,6 +276,11 @@ def dtype(self):
         """ return the dtype object of the underlying data """
         return self._data.dtype
 
+    @cache_readonly
+    def dtype_str(self):
+        """ return the dtype str of the underlying data """
+        return str(self.dtype)
+
     @property
     def values(self):
         """ return the underlying data as an ndarray """
@@ -2994,6 +2999,10 @@ def equals(self, other):
 
         return False
 
+    @property
+    def _formatter_func(self):
+        return self.categories._formatter_func
+
     def _format_attrs(self):
         """
         Return a list of tuples of the (attr,formatted_value)
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index a065d03d4ad72..680b370cbca41 100755
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -1736,6 +1736,582 @@ def test_repr(self):
                 "Categories (26, object): [a < b < c < d ... w < x < y < z]")
         self.assertEqual(exp,a.__unicode__())
 
+    def test_categorical_repr(self):
+        c = pd.Categorical([1, 2 ,3])
+        exp = """[1, 2, 3]
+Categories (3, int64): [1, 2, 3]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical([1, 2 ,3, 1, 2 ,3], categories=[1, 2, 3])
+        exp = """[1, 2, 3, 1, 2, 3]
+Categories (3, int64): [1, 2, 3]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical([1, 2, 3, 4, 5] * 10)
+        exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
+Length: 50
+Categories (5, int64): [1, 2, 3, 4, 5]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(np.arange(20))
+        exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
+Length: 20
+Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]"""
+        self.assertEqual(repr(c), exp)
+
+    def test_categorical_repr_ordered(self):
+        c = pd.Categorical([1, 2 ,3], ordered=True)
+        exp = """[1, 2, 3]
+Categories (3, int64): [1 < 2 < 3]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical([1, 2 ,3, 1, 2 ,3], categories=[1, 2, 3], ordered=True)
+        exp = """[1, 2, 3, 1, 2, 3]
+Categories (3, int64): [1 < 2 < 3]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical([1, 2, 3, 4, 5] * 10, ordered=True)
+        exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
+Length: 50
+Categories (5, int64): [1 < 2 < 3 < 4 < 5]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(np.arange(20), ordered=True)
+        exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
+Length: 20
+Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]"""
+        self.assertEqual(repr(c), exp)
+
+    def test_categorical_repr_datetime(self):
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5)
+        c = pd.Categorical(idx)
+        exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
+                                 2011-01-01 12:00:00, 2011-01-01 13:00:00]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx)
+        exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
+                                 2011-01-01 12:00:00, 2011-01-01 13:00:00]"""
+        self.assertEqual(repr(c), exp)
+
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern')
+        c = pd.Categorical(idx)
+        exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
+                                 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
+                                 2011-01-01 13:00:00-05:00]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx)
+        exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
+                                 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
+                                 2011-01-01 13:00:00-05:00]"""
+        self.assertEqual(repr(c), exp)
+
+    def test_categorical_repr_datetime_ordered(self):
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5)
+        c = pd.Categorical(idx, ordered=True)
+        exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
+                                 2011-01-01 12:00:00 < 2011-01-01 13:00:00]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx, ordered=True)
+        exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
+                                 2011-01-01 12:00:00 < 2011-01-01 13:00:00]"""
+        self.assertEqual(repr(c), exp)
+
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern')
+        c = pd.Categorical(idx, ordered=True)
+        exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
+                                 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
+                                 2011-01-01 13:00:00-05:00]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx, ordered=True)
+        exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
+                                 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
+                                 2011-01-01 13:00:00-05:00]"""
+        self.assertEqual(repr(c), exp)
+
+    def test_categorical_repr_period(self):
+        idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5)
+        c = pd.Categorical(idx)
+        exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
+Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
+                         2011-01-01 13:00]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx)
+        exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
+Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
+                         2011-01-01 13:00]"""
+        self.assertEqual(repr(c), exp)
+
+        idx = pd.period_range('2011-01', freq='M', periods=5)
+        c = pd.Categorical(idx)
+        exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
+Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx)
+        exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
+Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
+        self.assertEqual(repr(c), exp)
+
+    def test_categorical_repr_period_ordered(self):
+        idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5)
+        c = pd.Categorical(idx, ordered=True)
+        exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
+Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
+                         2011-01-01 13:00]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx, ordered=True)
+        exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
+Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
+                         2011-01-01 13:00]"""
+        self.assertEqual(repr(c), exp)
+
+        idx = pd.period_range('2011-01', freq='M', periods=5)
+        c = pd.Categorical(idx, ordered=True)
+        exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
+Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx, ordered=True)
+        exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
+Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
+        self.assertEqual(repr(c), exp)
+
+    def test_categorical_repr_timedelta(self):
+        idx = pd.timedelta_range('1 days', periods=5)
+        c = pd.Categorical(idx)
+        exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
+Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx)
+        exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
+Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
+        self.assertEqual(repr(c), exp)
+
+        idx = pd.timedelta_range('1 hours', periods=20)
+        c = pd.Categorical(idx)
+        exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
+Length: 20
+Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
+                                   3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
+                                   18 days 01:00:00, 19 days 01:00:00]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx)
+        exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
+Length: 40
+Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
+                                   3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
+                                   18 days 01:00:00, 19 days 01:00:00]"""
+        self.assertEqual(repr(c), exp)
+
+    def test_categorical_repr_timedelta_ordered(self):
+        idx = pd.timedelta_range('1 days', periods=5)
+        c = pd.Categorical(idx, ordered=True)
+        exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
+Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx, ordered=True)
+        exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
+Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""
+        self.assertEqual(repr(c), exp)
+
+        idx = pd.timedelta_range('1 hours', periods=20)
+        c = pd.Categorical(idx, ordered=True)
+        exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
+Length: 20
+Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
+                                   3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
+                                   18 days 01:00:00 < 19 days 01:00:00]"""
+        self.assertEqual(repr(c), exp)
+
+        c = pd.Categorical(idx.append(idx), categories=idx, ordered=True)
+        exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
+Length: 40
+Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
+                                   3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
+                                   18 days 01:00:00 < 19 days 01:00:00]"""
+        self.assertEqual(repr(c), exp)
+
+    def test_categorical_series_repr(self):
+        s = pd.Series(pd.Categorical([1, 2 ,3]))
+        exp = """0    1
+1    2
+2    3
+dtype: category
+Categories (3, int64): [1, 2, 3]"""
+        self.assertEqual(repr(s), exp)
+
+        s = pd.Series(pd.Categorical(np.arange(10)))
+        exp = """0    0
+1    1
+2    2
+3    3
+4    4
+5    5
+6    6
+7    7
+8    8
+9    9
+dtype: category
+Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]"""
+        self.assertEqual(repr(s), exp)
+
+    def test_categorical_series_repr_ordered(self):
+        s = pd.Series(pd.Categorical([1, 2 ,3], ordered=True))
+        exp = """0    1
+1    2
+2    3
+dtype: category
+Categories (3, int64): [1 < 2 < 3]"""
+        self.assertEqual(repr(s), exp)
+
+        s = pd.Series(pd.Categorical(np.arange(10), ordered=True))
+        exp = """0    0
+1    1
+2    2
+3    3
+4    4
+5    5
+6    6
+7    7
+8    8
+9    9
+dtype: category
+Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]"""
+        self.assertEqual(repr(s), exp)
+
+    def test_categorical_series_repr_datetime(self):
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5)
+        s = pd.Series(pd.Categorical(idx))
+        exp = """0   2011-01-01 09:00:00
+1   2011-01-01 10:00:00
+2   2011-01-01 11:00:00
+3   2011-01-01 12:00:00
+4   2011-01-01 13:00:00
+dtype: category
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
+                                 2011-01-01 12:00:00, 2011-01-01 13:00:00]"""
+        self.assertEqual(repr(s), exp)
+
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern')
+        s = pd.Series(pd.Categorical(idx))
+        exp = """0   2011-01-01 09:00:00-05:00
+1   2011-01-01 10:00:00-05:00
+2   2011-01-01 11:00:00-05:00
+3   2011-01-01 12:00:00-05:00
+4   2011-01-01 13:00:00-05:00
+dtype: category
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
+                                 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
+                                 2011-01-01 13:00:00-05:00]"""
+        self.assertEqual(repr(s), exp)
+
+    def test_categorical_series_repr_datetime_ordered(self):
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5)
+        s = pd.Series(pd.Categorical(idx, ordered=True))
+        exp = """0   2011-01-01 09:00:00
+1   2011-01-01 10:00:00
+2   2011-01-01 11:00:00
+3   2011-01-01 12:00:00
+4   2011-01-01 13:00:00
+dtype: category
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
+                                 2011-01-01 12:00:00 < 2011-01-01 13:00:00]"""
+        self.assertEqual(repr(s), exp)
+
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern')
+        s = pd.Series(pd.Categorical(idx, ordered=True))
+        exp = """0   2011-01-01 09:00:00-05:00
+1   2011-01-01 10:00:00-05:00
+2   2011-01-01 11:00:00-05:00
+3   2011-01-01 12:00:00-05:00
+4   2011-01-01 13:00:00-05:00
+dtype: category
+Categories (5, datetime64[ns]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
+                                 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
+                                 2011-01-01 13:00:00-05:00]"""
+        self.assertEqual(repr(s), exp)
+
+    def test_categorical_series_repr_period(self):
+        idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5)
+        s = pd.Series(pd.Categorical(idx))
+        exp = """0   2011-01-01 09:00
+1   2011-01-01 10:00
+2   2011-01-01 11:00
+3   2011-01-01 12:00
+4   2011-01-01 13:00
+dtype: category
+Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
+                         2011-01-01 13:00]"""
+        self.assertEqual(repr(s), exp)
+
+        idx = pd.period_range('2011-01', freq='M', periods=5)
+        s = pd.Series(pd.Categorical(idx))
+        exp = """0   2011-01
+1   2011-02
+2   2011-03
+3   2011-04
+4   2011-05
+dtype: category
+Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
+        self.assertEqual(repr(s), exp)
+
+    def test_categorical_series_repr_period_ordered(self):
+        idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5)
+        s = pd.Series(pd.Categorical(idx, ordered=True))
+        exp = """0   2011-01-01 09:00
+1   2011-01-01 10:00
+2   2011-01-01 11:00
+3   2011-01-01 12:00
+4   2011-01-01 13:00
+dtype: category
+Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
+                         2011-01-01 13:00]"""
+        self.assertEqual(repr(s), exp)
+
+        idx = pd.period_range('2011-01', freq='M', periods=5)
+        s = pd.Series(pd.Categorical(idx, ordered=True))
+        exp = """0   2011-01
+1   2011-02
+2   2011-03
+3   2011-04
+4   2011-05
+dtype: category
+Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
+        self.assertEqual(repr(s), exp)
+
+    def test_categorical_series_repr_timedelta(self):
+        idx = pd.timedelta_range('1 days', periods=5)
+        s = pd.Series(pd.Categorical(idx))
+        exp = """0   1 days
+1   2 days
+2   3 days
+3   4 days
+4   5 days
+dtype: category
+Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
+        self.assertEqual(repr(s), exp)
+
+        idx = pd.timedelta_range('1 hours', periods=10)
+        s = pd.Series(pd.Categorical(idx))
+        exp = """0   0 days 01:00:00
+1   1 days 01:00:00
+2   2 days 01:00:00
+3   3 days 01:00:00
+4   4 days 01:00:00
+5   5 days 01:00:00
+6   6 days 01:00:00
+7   7 days 01:00:00
+8   8 days 01:00:00
+9   9 days 01:00:00
+dtype: category
+Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
+                                   3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00,
+                                   8 days 01:00:00, 9 days 01:00:00]"""
+        self.assertEqual(repr(s), exp)
+
+    def test_categorical_series_repr_timedelta_ordered(self):
+        idx = pd.timedelta_range('1 days', periods=5)
+        s = pd.Series(pd.Categorical(idx, ordered=True))
+        exp = """0   1 days
+1   2 days
+2   3 days
+3   4 days
+4   5 days
+dtype: category
+Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""
+        self.assertEqual(repr(s), exp)
+
+        idx = pd.timedelta_range('1 hours', periods=10)
+        s = pd.Series(pd.Categorical(idx, ordered=True))
+        exp = """0   0 days 01:00:00
+1   1 days 01:00:00
+2   2 days 01:00:00
+3   3 days 01:00:00
+4   4 days 01:00:00
+5   5 days 01:00:00
+6   6 days 01:00:00
+7   7 days 01:00:00
+8   8 days 01:00:00
+9   9 days 01:00:00
+dtype: category
+Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
+                                   3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 <
+                                   8 days 01:00:00 < 9 days 01:00:00]"""
+        self.assertEqual(repr(s), exp)
+
+    def test_categorical_index_repr(self):
+        idx = pd.CategoricalIndex(pd.Categorical([1, 2 ,3]))
+        exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')"""
+        self.assertEqual(repr(idx), exp)
+
+        i = pd.CategoricalIndex(pd.Categorical(np.arange(10)))
+        exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+    def test_categorical_index_repr_ordered(self):
+        i = pd.CategoricalIndex(pd.Categorical([1, 2 ,3], ordered=True))
+        exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        i = pd.CategoricalIndex(pd.Categorical(np.arange(10), ordered=True))
+        exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+    def test_categorical_index_repr_datetime(self):
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5)
+        i = pd.CategoricalIndex(pd.Categorical(idx))
+        exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
+                  '2011-01-01 11:00:00', '2011-01-01 12:00:00',
+                  '2011-01-01 13:00:00'],
+                 categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern')
+        i = pd.CategoricalIndex(pd.Categorical(idx))
+        exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
+                  '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
+                  '2011-01-01 13:00:00-05:00'],
+                 categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+    def test_categorical_index_repr_datetime_ordered(self):
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5)
+        i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True))
+        exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
+                  '2011-01-01 11:00:00', '2011-01-01 12:00:00',
+                  '2011-01-01 13:00:00'],
+                 categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern')
+        i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True))
+        exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
+                  '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
+                  '2011-01-01 13:00:00-05:00'],
+                 categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        i = pd.CategoricalIndex(pd.Categorical(idx.append(idx), ordered=True))
+        exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
+                  '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
+                  '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00',
+                  '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00',
+                  '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'],
+                 categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+    def test_categorical_index_repr_period(self):
+        # test all length
+        idx = pd.period_range('2011-01-01 09:00', freq='H', periods=1)
+        i = pd.CategoricalIndex(pd.Categorical(idx))
+        exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        idx = pd.period_range('2011-01-01 09:00', freq='H', periods=2)
+        i = pd.CategoricalIndex(pd.Categorical(idx))
+        exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        idx = pd.period_range('2011-01-01 09:00', freq='H', periods=3)
+        i = pd.CategoricalIndex(pd.Categorical(idx))
+        exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5)
+        i = pd.CategoricalIndex(pd.Categorical(idx))
+        exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
+                  '2011-01-01 12:00', '2011-01-01 13:00'],
+                 categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        i = pd.CategoricalIndex(pd.Categorical(idx.append(idx)))
+        exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
+                  '2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00',
+                  '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00',
+                  '2011-01-01 13:00'],
+                 categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        idx = pd.period_range('2011-01', freq='M', periods=5)
+        i = pd.CategoricalIndex(pd.Categorical(idx))
+        exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+    def test_categorical_index_repr_period_ordered(self):
+        idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5)
+        i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True))
+        exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
+                  '2011-01-01 12:00', '2011-01-01 13:00'],
+                 categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        idx = pd.period_range('2011-01', freq='M', periods=5)
+        i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True))
+        exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+    def test_categorical_index_repr_timedelta(self):
+        idx = pd.timedelta_range('1 days', periods=5)
+        i = pd.CategoricalIndex(pd.Categorical(idx))
+        exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        idx = pd.timedelta_range('1 hours', periods=10)
+        i = pd.CategoricalIndex(pd.Categorical(idx))
+        exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
+                  '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
+                  '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
+                  '9 days 01:00:00'],
+                 categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+    def test_categorical_index_repr_timedelta_ordered(self):
+        idx = pd.timedelta_range('1 days', periods=5)
+        i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True))
+        exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+        idx = pd.timedelta_range('1 hours', periods=10)
+        i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True))
+        exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
+                  '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
+                  '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
+                  '9 days 01:00:00'],
+                 categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')"""
+        self.assertEqual(repr(i), exp)
+
+    def test_categorical_frame(self):
+        # normal DataFrame
+        dt = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern')
+        p = pd.period_range('2011-01', freq='M', periods=5)
+        df = pd.DataFrame({'dt': dt, 'p': p})
+        exp = """                         dt       p
+0 2011-01-01 09:00:00-05:00 2011-01
+1 2011-01-01 10:00:00-05:00 2011-02
+2 2011-01-01 11:00:00-05:00 2011-03
+3 2011-01-01 12:00:00-05:00 2011-04
+4 2011-01-01 13:00:00-05:00 2011-05"""
+
+        df = pd.DataFrame({'dt': pd.Categorical(dt), 'p': pd.Categorical(p)})
+        self.assertEqual(repr(df), exp)
+
     def test_info(self):
 
         # make sure it works
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 15023b77694e6..c7418a5651ad7 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -132,6 +132,15 @@ def test_str(self):
         self.assertTrue("'foo'" in str(idx))
         self.assertTrue(idx.__class__.__name__ in str(idx))
 
+    def test_dtype_str(self):
+        for idx in self.indices.values():
+            dtype = idx.dtype_str
+            self.assertIsInstance(dtype, compat.string_types)
+            if isinstance(idx, PeriodIndex):
+                self.assertEqual(dtype, 'period')
+            else:
+                self.assertEqual(dtype, str(idx.dtype))
+
     def test_repr_max_seq_item_setting(self):
         # GH10182
         idx = self.create_index()
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index bb0eda8260704..e7b229e91cbc8 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -21,6 +21,8 @@
                                 _values_from_object, ABCSeries,
                                 is_integer, is_float, is_object_dtype)
 from pandas import compat
+from pandas.util.decorators import cache_readonly
+
 from pandas.lib import Timestamp, Timedelta
 import pandas.lib as lib
 import pandas.tslib as tslib
@@ -534,6 +536,11 @@ def shift(self, n):
         values[mask] = tslib.iNaT
         return PeriodIndex(data=values, name=self.name, freq=self.freq)
 
+    @cache_readonly
+    def dtype_str(self):
+        """ return the dtype str of the underlying data """
+        return self.inferred_type
+
     @property
     def inferred_type(self):
         # b/c data is represented as ints make sure we can't have ambiguous

From b410381375c4c6d3ad5bd45c8db631d257e8deec Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sun, 30 Mar 2014 21:18:22 +0900
Subject: [PATCH 04/12] ENH: duplicated and drop_duplicates now accept take=all
 kw

---
 doc/source/indexing.rst         |  10 +-
 doc/source/whatsnew/v0.17.0.txt |  10 ++
 pandas/core/base.py             |  27 +++--
 pandas/core/frame.py            |  27 +++--
 pandas/core/index.py            |  22 ++--
 pandas/core/series.py           |  13 ++-
 pandas/hashtable.pyx            |  28 ++++-
 pandas/lib.pyx                  |  26 +++--
 pandas/tests/test_base.py       |  69 ++++++++---
 pandas/tests/test_frame.py      | 197 +++++++++++++++++++++++++++++++-
 pandas/tests/test_index.py      |   6 +-
 pandas/tests/test_multilevel.py |  15 +++
 pandas/tests/test_series.py     |  72 +++++++++---
 pandas/tests/test_tseries.py    |  16 ++-
 14 files changed, 448 insertions(+), 90 deletions(-)

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 9f58ee2f8b99b..251d94cbdd911 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -1178,8 +1178,7 @@ takes as an argument the columns to use to identify duplicated rows.
 - ``drop_duplicates`` removes duplicate rows.
 
 By default, the first observed row of a duplicate set is considered unique, but
-each method has a ``take_last`` parameter that indicates the last observed row
-should be taken instead.
+each method has a ``keep`` parameter to specify targets to be kept.
 
 .. ipython:: python
 
@@ -1187,8 +1186,11 @@ should be taken instead.
                        'b' : ['x', 'y', 'y', 'x', 'y', 'x', 'x'],
                        'c' : np.random.randn(7)})
    df2.duplicated(['a','b'])
+   df2.duplicated(['a','b'], keep='last')
+   df2.duplicated(['a','b'], keep=False)
    df2.drop_duplicates(['a','b'])
-   df2.drop_duplicates(['a','b'], take_last=True)
+   df2.drop_duplicates(['a','b'], keep='last')
+   df2.drop_duplicates(['a','b'], keep=False)
 
 An alternative way to drop duplicates on the index is ``.groupby(level=0)`` combined with ``first()`` or ``last()``.
 
@@ -1199,7 +1201,7 @@ An alternative way to drop duplicates on the index is ``.groupby(level=0)`` comb
    df3.groupby(level=0).first()
 
    # a bit more verbose
-   df3.reset_index().drop_duplicates(subset='b', take_last=False).set_index('b')
+   df3.reset_index().drop_duplicates(subset='b', keep='first').set_index('b')
 
 .. _indexing.dictionarylike:
 
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 86bb78f4066ab..70d616ca72c1b 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -142,6 +142,15 @@ Other enhancements
 - ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`).
 
 - ``pd.pivot`` will now allow passing index as ``None`` (:issue:`3962`).
+- ``drop_duplicates`` and ``duplicated`` now accept ``keep`` keyword to target first, last, and all duplicates. ``take_last`` keyword is deprecated, see :ref:`deprecations <whatsnew_0170.deprecations>` (:issue:`6511`, :issue:`8505`)
+
+.. ipython :: python
+
+   s = pd.Series(['A', 'B', 'C', 'A', 'B', 'D'])
+   s.drop_duplicates()
+   s.drop_duplicates(keep='last')
+   s.drop_duplicates(keep=False)
+
 
 .. _whatsnew_0170.api:
 
@@ -520,6 +529,7 @@ Deprecations
   =====================  =================================
 
 - ``Categorical.name`` was deprecated to make ``Categorical`` more ``numpy.ndarray`` like. Use ``Series(cat, name="whatever")`` instead (:issue:`10482`).
+- ``drop_duplicates`` and ``duplicated``'s ``take_last`` keyword was removed in favor of ``keep``. (:issue:`6511`, :issue:`8505`)
 
 .. _whatsnew_0170.prior_deprecations:
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
index c3004aec60cc5..6d1c89a7a2f89 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -6,7 +6,7 @@
 from pandas.core import common as com
 import pandas.core.nanops as nanops
 import pandas.lib as lib
-from pandas.util.decorators import Appender, cache_readonly
+from pandas.util.decorators import Appender, cache_readonly, deprecate_kwarg
 from pandas.core.strings import StringMethods
 from pandas.core.common import AbstractMethodError
 
@@ -543,8 +543,12 @@ def _dir_deletions(self):
 
         Parameters
         ----------
-        take_last : boolean, default False
-            Take the last observed index in a group. Default first
+
+        keep : {'first', 'last', False}, default 'first'
+            - ``first`` : Drop duplicates except for the first occurrence.
+            - ``last`` : Drop duplicates except for the last occurrence.
+            - False : Drop all duplicates.
+        take_last : deprecated
         %(inplace)s
 
         Returns
@@ -552,9 +556,10 @@ def _dir_deletions(self):
         deduplicated : %(klass)s
         """)
 
+    @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
     @Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs)
-    def drop_duplicates(self, take_last=False, inplace=False):
-        duplicated = self.duplicated(take_last=take_last)
+    def drop_duplicates(self, keep='first', inplace=False):
+        duplicated = self.duplicated(keep=keep)
         result = self[np.logical_not(duplicated)]
         if inplace:
             return self._update_inplace(result)
@@ -566,18 +571,22 @@ def drop_duplicates(self, take_last=False, inplace=False):
 
         Parameters
         ----------
-        take_last : boolean, default False
-            Take the last observed index in a group. Default first
+        keep : {'first', 'last', False}, default 'first'
+            - ``first`` : Mark duplicates as ``True`` except for the first occurrence.
+            - ``last`` : Mark duplicates as ``True`` except for the last occurrence.
+            - False : Mark all duplicates as ``True``.
+        take_last : deprecated
 
         Returns
         -------
         duplicated : %(duplicated)s
         """)
 
+    @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
     @Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs)
-    def duplicated(self, take_last=False):
+    def duplicated(self, keep='first'):
         keys = com._ensure_object(self.values)
-        duplicated = lib.duplicated(keys, take_last=take_last)
+        duplicated = lib.duplicated(keys, keep=keep)
         try:
             return self._constructor(duplicated,
                                      index=self.index).__finalize__(self)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d8948bc82fe61..fe9c9bece1f79 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2866,8 +2866,9 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
         else:
             return result
 
+    @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
     @deprecate_kwarg(old_arg_name='cols', new_arg_name='subset')
-    def drop_duplicates(self, subset=None, take_last=False, inplace=False):
+    def drop_duplicates(self, subset=None, keep='first', inplace=False):
         """
         Return DataFrame with duplicate rows removed, optionally only
         considering certain columns
@@ -2877,8 +2878,11 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
         subset : column label or sequence of labels, optional
             Only consider certain columns for identifying duplicates, by
             default use all of the columns
-        take_last : boolean, default False
-            Take the last observed row in a row. Defaults to the first row
+        keep : {'first', 'last', False}, default 'first'
+            - ``first`` : Drop duplicates except for the first occurrence.
+            - ``last`` : Drop duplicates except for the last occurrence.
+            - False : Drop all duplicates.
+        take_last : deprecated
         inplace : boolean, default False
             Whether to drop duplicates in place or to return a copy
         cols : kwargs only argument of subset [deprecated]
@@ -2887,7 +2891,7 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
         -------
         deduplicated : DataFrame
         """
-        duplicated = self.duplicated(subset, take_last=take_last)
+        duplicated = self.duplicated(subset, keep=keep)
 
         if inplace:
             inds, = (-duplicated).nonzero()
@@ -2896,8 +2900,9 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
         else:
             return self[-duplicated]
 
+    @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
     @deprecate_kwarg(old_arg_name='cols', new_arg_name='subset')
-    def duplicated(self, subset=None, take_last=False):
+    def duplicated(self, subset=None, keep='first'):
         """
         Return boolean Series denoting duplicate rows, optionally only
         considering certain columns
@@ -2907,9 +2912,13 @@ def duplicated(self, subset=None, take_last=False):
         subset : column label or sequence of labels, optional
             Only consider certain columns for identifying duplicates, by
             default use all of the columns
-        take_last : boolean, default False
-            For a set of distinct duplicate rows, flag all but the last row as
-            duplicated. Default is for all but the first row to be flagged
+        keep : {'first', 'last', False}, default 'first'
+            - ``first`` : Mark duplicates as ``True`` except for the
+              first occurrence.
+            - ``last`` : Mark duplicates as ``True`` except for the
+              last occurrence.
+            - False : Mark all duplicates as ``True``.
+        take_last : deprecated
         cols : kwargs only argument of subset [deprecated]
 
         Returns
@@ -2935,7 +2944,7 @@ def f(vals):
         labels, shape = map(list, zip( * map(f, vals)))
 
         ids = get_group_index(labels, shape, sort=False, xnull=False)
-        return Series(duplicated_int64(ids, take_last), index=self.index)
+        return Series(duplicated_int64(ids, keep), index=self.index)
 
     #----------------------------------------------------------------------
     # Sorting
diff --git a/pandas/core/index.py b/pandas/core/index.py
index a9631d7aabedd..febcfa37994a3 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -16,7 +16,7 @@
 from pandas.lib import Timestamp, Timedelta, is_datetime_array
 from pandas.core.base import PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin, _shared_docs, PandasDelegate
 from pandas.util.decorators import (Appender, Substitution, cache_readonly,
-                                    deprecate)
+                                    deprecate, deprecate_kwarg)
 import pandas.core.common as com
 from pandas.core.common import (isnull, array_equivalent, is_dtype_equal, is_object_dtype,
                                 _values_from_object, is_float, is_integer, is_iterator, is_categorical_dtype,
@@ -2628,13 +2628,15 @@ def drop(self, labels, errors='raise'):
             indexer = indexer[~mask]
         return self.delete(indexer)
 
+    @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
     @Appender(_shared_docs['drop_duplicates'] % _index_doc_kwargs)
-    def drop_duplicates(self, take_last=False):
-        return super(Index, self).drop_duplicates(take_last=take_last)
+    def drop_duplicates(self, keep='first'):
+        return super(Index, self).drop_duplicates(keep=keep)
 
+    @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
     @Appender(_shared_docs['duplicated'] % _index_doc_kwargs)
-    def duplicated(self, take_last=False):
-        return super(Index, self).duplicated(take_last=take_last)
+    def duplicated(self, keep='first'):
+        return super(Index, self).duplicated(keep=keep)
 
     def _evaluate_with_timedelta_like(self, other, op, opstr):
         raise TypeError("can only perform ops with timedelta like values")
@@ -3065,10 +3067,11 @@ def _engine(self):
     def is_unique(self):
         return not self.duplicated().any()
 
+    @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
     @Appender(_shared_docs['duplicated'] % _index_doc_kwargs)
-    def duplicated(self, take_last=False):
+    def duplicated(self, keep='first'):
         from pandas.hashtable import duplicated_int64
-        return duplicated_int64(self.codes.astype('i8'), take_last)
+        return duplicated_int64(self.codes.astype('i8'), keep)
 
     def get_loc(self, key, method=None):
         """
@@ -4228,15 +4231,16 @@ def _has_complex_internals(self):
     def is_unique(self):
         return not self.duplicated().any()
 
+    @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
     @Appender(_shared_docs['duplicated'] % _index_doc_kwargs)
-    def duplicated(self, take_last=False):
+    def duplicated(self, keep='first'):
         from pandas.core.groupby import get_group_index
         from pandas.hashtable import duplicated_int64
 
         shape = map(len, self.levels)
         ids = get_group_index(self.labels, shape, sort=False, xnull=False)
 
-        return duplicated_int64(ids, take_last)
+        return duplicated_int64(ids, keep)
 
     def get_value(self, series, key):
         # somewhat broken encapsulation
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 6586fa10935e6..87fde996aaa67 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -46,7 +46,7 @@
 import pandas.core.datetools as datetools
 import pandas.core.format as fmt
 import pandas.core.nanops as nanops
-from pandas.util.decorators import Appender, cache_readonly
+from pandas.util.decorators import Appender, cache_readonly, deprecate_kwarg
 
 import pandas.lib as lib
 import pandas.tslib as tslib
@@ -1155,14 +1155,15 @@ def mode(self):
         from pandas.core.algorithms import mode
         return mode(self)
 
+    @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
     @Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs)
-    def drop_duplicates(self, take_last=False, inplace=False):
-        return super(Series, self).drop_duplicates(take_last=take_last,
-                                                   inplace=inplace)
+    def drop_duplicates(self, keep='first', inplace=False):
+        return super(Series, self).drop_duplicates(keep=keep, inplace=inplace)
 
+    @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
     @Appender(base._shared_docs['duplicated'] % _shared_doc_kwargs)
-    def duplicated(self, take_last=False):
-        return super(Series, self).duplicated(take_last=take_last)
+    def duplicated(self, keep='first'):
+        return super(Series, self).duplicated(keep=keep)
 
     def idxmin(self, axis=None, out=None, skipna=True):
         """
diff --git a/pandas/hashtable.pyx b/pandas/hashtable.pyx
index 3b3ea9fa032f8..7dbd1b45c938f 100644
--- a/pandas/hashtable.pyx
+++ b/pandas/hashtable.pyx
@@ -1026,25 +1026,41 @@ def mode_int64(int64_t[:] values):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def duplicated_int64(ndarray[int64_t, ndim=1] values, int take_last):
+def duplicated_int64(ndarray[int64_t, ndim=1] values, object keep='first'):
     cdef:
-        int ret = 0
+        int ret = 0, value, k
         Py_ssize_t i, n = len(values)
         kh_int64_t * table = kh_init_int64()
         ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')
 
     kh_resize_int64(table, min(n, _SIZE_HINT_LIMIT))
 
-    with nogil:
-        if take_last:
+    if keep not in ('last', 'first', False):
+        raise ValueError('keep must be either "first", "last" or False')
+
+    if keep == 'last':
+        with nogil:
             for i from n > i >=0:
                 kh_put_int64(table, values[i], &ret)
                 out[i] = ret == 0
-        else:
+    elif keep == 'first':
+        with nogil:
             for i from 0 <= i < n:
                 kh_put_int64(table, values[i], &ret)
                 out[i] = ret == 0
-
+    else:
+        with nogil:
+            for i from 0 <= i < n:
+                value = values[i]
+                k = kh_get_int64(table, value)
+                if k != table.n_buckets:
+                    out[table.vals[k]] = 1
+                    out[i] = 1
+                else:
+                    k = kh_put_int64(table, value, &ret)
+                    table.keys[k] = value
+                    table.vals[k] = i
+                    out[i] = 0
     kh_destroy_int64(table)
     return out
 
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index e839210fbbada..07f0c89535a77 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -1348,35 +1348,47 @@ def fast_zip_fillna(list ndarrays, fill_value=pandas_null):
 
     return result
 
-def duplicated(ndarray[object] values, take_last=False):
+
+def duplicated(ndarray[object] values, object keep='first'):
     cdef:
         Py_ssize_t i, n
-        set seen = set()
+        dict seen = dict()
         object row
 
     n = len(values)
     cdef ndarray[uint8_t] result = np.zeros(n, dtype=np.uint8)
 
-    if take_last:
+    if keep == 'last':
         for i from n > i >= 0:
             row = values[i]
-
             if row in seen:
                 result[i] = 1
             else:
-                seen.add(row)
+                seen[row] = i
                 result[i] = 0
-    else:
+    elif keep == 'first':
         for i from 0 <= i < n:
             row = values[i]
             if row in seen:
                 result[i] = 1
             else:
-                seen.add(row)
+                seen[row] = i
                 result[i] = 0
+    elif keep is False:
+        for i from 0 <= i < n:
+            row = values[i]
+            if row in seen:
+                result[i] = 1
+                result[seen[row]] = 1
+            else:
+                seen[row] = i
+                result[i] = 0
+    else:
+        raise ValueError('keep must be either "first", "last" or False')
 
     return result.view(np.bool_)
 
+
 def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups):
     cdef:
         Py_ssize_t i, group_size, n, start
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index d47e7dbe751c7..066b359d72b5c 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -683,6 +683,10 @@ def test_factorize(self):
 
     def test_duplicated_drop_duplicates(self):
         # GH 4060
+
+        import warnings
+        warnings.simplefilter('always')
+
         for original in self.objs:
 
             if isinstance(original, Index):
@@ -714,15 +718,36 @@ def test_duplicated_drop_duplicates(self):
                 self.assertTrue(duplicated.dtype == bool)
                 tm.assert_index_equal(idx.drop_duplicates(), original)
 
-                last_base = [False] * len(idx)
-                last_base[3] = True
-                last_base[5] = True
-                expected = np.array(last_base)
-                duplicated = idx.duplicated(take_last=True)
+                base = [False] * len(idx)
+                base[3] = True
+                base[5] = True
+                expected = np.array(base)
+
+                duplicated = idx.duplicated(keep='last')
+                tm.assert_numpy_array_equal(duplicated, expected)
+                self.assertTrue(duplicated.dtype == bool)
+                result = idx.drop_duplicates(keep='last')
+                tm.assert_index_equal(result, idx[~expected])
+
+                # deprecate take_last
+                with tm.assert_produces_warning(FutureWarning):
+                    duplicated = idx.duplicated(take_last=True)
+                tm.assert_numpy_array_equal(duplicated, expected)
+                self.assertTrue(duplicated.dtype == bool)
+                with tm.assert_produces_warning(FutureWarning):
+                    result = idx.drop_duplicates(take_last=True)
+                tm.assert_index_equal(result, idx[~expected])
+
+                base = [False] * len(original) + [True, True]
+                base[3] = True
+                base[5] = True
+                expected = np.array(base)
+
+                duplicated = idx.duplicated(keep=False)
                 tm.assert_numpy_array_equal(duplicated, expected)
                 self.assertTrue(duplicated.dtype == bool)
-                tm.assert_index_equal(idx.drop_duplicates(take_last=True),
-                                      idx[~np.array(last_base)])
+                result = idx.drop_duplicates(keep=False)
+                tm.assert_index_equal(result, idx[~expected])
 
                 with tm.assertRaisesRegexp(TypeError,
                                            "drop_duplicates\(\) got an unexpected keyword argument"):
@@ -745,13 +770,29 @@ def test_duplicated_drop_duplicates(self):
                 tm.assert_series_equal(s.duplicated(), expected)
                 tm.assert_series_equal(s.drop_duplicates(), original)
 
-                last_base = [False] * len(idx)
-                last_base[3] = True
-                last_base[5] = True
-                expected = Series(last_base, index=idx, name='a')
-                tm.assert_series_equal(s.duplicated(take_last=True), expected)
-                tm.assert_series_equal(s.drop_duplicates(take_last=True),
-                                       s[~np.array(last_base)])
+                base = [False] * len(idx)
+                base[3] = True
+                base[5] = True
+                expected = Series(base, index=idx, name='a')
+
+                tm.assert_series_equal(s.duplicated(keep='last'), expected)
+                tm.assert_series_equal(s.drop_duplicates(keep='last'),
+                                       s[~np.array(base)])
+
+                # deprecate take_last
+                with tm.assert_produces_warning(FutureWarning):
+                    tm.assert_series_equal(s.duplicated(take_last=True), expected)
+                with tm.assert_produces_warning(FutureWarning):
+                    tm.assert_series_equal(s.drop_duplicates(take_last=True),
+                                           s[~np.array(base)])
+                base = [False] * len(original) + [True, True]
+                base[3] = True
+                base[5] = True
+                expected = Series(base, index=idx, name='a')
+
+                tm.assert_series_equal(s.duplicated(keep=False), expected)
+                tm.assert_series_equal(s.drop_duplicates(keep=False),
+                                       s[~np.array(base)])
 
                 s.drop_duplicates(inplace=True)
                 tm.assert_series_equal(s, original)
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 77ef5fecf22c9..72eea5162caa5 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -7848,7 +7848,7 @@ def test_dropna_multiple_axes(self):
         inp.dropna(how='all', axis=(0, 1), inplace=True)
         assert_frame_equal(inp, expected)
 
-    def test_drop_duplicates(self):
+    def test_aaa_drop_duplicates(self):
         df = DataFrame({'AAA': ['foo', 'bar', 'foo', 'bar',
                                 'foo', 'bar', 'bar', 'foo'],
                         'B': ['one', 'one', 'two', 'two',
@@ -7861,10 +7861,21 @@ def test_drop_duplicates(self):
         expected = df[:2]
         assert_frame_equal(result, expected)
 
-        result = df.drop_duplicates('AAA', take_last=True)
+        result = df.drop_duplicates('AAA', keep='last')
         expected = df.ix[[6, 7]]
         assert_frame_equal(result, expected)
 
+        result = df.drop_duplicates('AAA', keep=False)
+        expected = df.ix[[]]
+        assert_frame_equal(result, expected)
+        self.assertEqual(len(result), 0)
+
+        # deprecate take_last
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.drop_duplicates('AAA', take_last=True)
+            expected = df.ix[[6, 7]]
+            assert_frame_equal(result, expected)
+
         # multi column
         expected = df.ix[[0, 1, 2, 3]]
         result = df.drop_duplicates(np.array(['AAA', 'B']))
@@ -7872,6 +7883,15 @@ def test_drop_duplicates(self):
         result = df.drop_duplicates(['AAA', 'B'])
         assert_frame_equal(result, expected)
 
+        result = df.drop_duplicates(('AAA', 'B'), keep='last')
+        expected = df.ix[[0, 5, 6, 7]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates(('AAA', 'B'), keep=False)
+        expected = df.ix[[0]]
+        assert_frame_equal(result, expected)
+
+        # deprecate take_last
         result = df.drop_duplicates(('AAA', 'B'), take_last=True)
         expected = df.ix[[0, 5, 6, 7]]
         assert_frame_equal(result, expected)
@@ -7884,10 +7904,53 @@ def test_drop_duplicates(self):
         expected = df2.drop_duplicates(['AAA', 'B'])
         assert_frame_equal(result, expected)
 
+        result = df2.drop_duplicates(keep='last')
+        expected = df2.drop_duplicates(['AAA', 'B'], keep='last')
+        assert_frame_equal(result, expected)
+
+        result = df2.drop_duplicates(keep=False)
+        expected = df2.drop_duplicates(['AAA', 'B'], keep=False)
+        assert_frame_equal(result, expected)
+
+        # deprecate take_last
         result = df2.drop_duplicates(take_last=True)
         expected = df2.drop_duplicates(['AAA', 'B'], take_last=True)
         assert_frame_equal(result, expected)
 
+    def test_drop_duplicates_for_take_all(self):
+        df = DataFrame({'AAA': ['foo', 'bar', 'baz', 'bar',
+                                'foo', 'bar', 'qux', 'foo'],
+                        'B': ['one', 'one', 'two', 'two',
+                              'two', 'two', 'one', 'two'],
+                        'C': [1, 1, 2, 2, 2, 2, 1, 2],
+                        'D': lrange(8)})
+
+        # single column
+        result = df.drop_duplicates('AAA')
+        expected = df.iloc[[0, 1, 2, 6]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates('AAA', keep='last')
+        expected = df.iloc[[2, 5, 6, 7]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates('AAA', keep=False)
+        expected = df.iloc[[2, 6]]
+        assert_frame_equal(result, expected)
+
+        # multiple columns
+        result = df.drop_duplicates(['AAA', 'B'])
+        expected = df.iloc[[0, 1, 2, 3, 4, 6]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates(['AAA', 'B'], keep='last')
+        expected = df.iloc[[0, 1, 2, 5, 6, 7]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates(['AAA', 'B'], keep=False)
+        expected = df.iloc[[0, 1, 2, 6]]
+        assert_frame_equal(result, expected)
+
     def test_drop_duplicates_deprecated_warning(self):
         df = DataFrame({'AAA': ['foo', 'bar', 'foo', 'bar',
                                 'foo', 'bar', 'bar', 'foo'],
@@ -7914,6 +7977,14 @@ def test_drop_duplicates_deprecated_warning(self):
         self.assertRaises(TypeError, df.drop_duplicates,
                           kwargs={'subset': 'AAA', 'bad_arg': True})
 
+        # deprecate take_last
+        # Raises warning
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.drop_duplicates(take_last=False, subset='AAA')
+        assert_frame_equal(result, expected)
+
+        self.assertRaises(ValueError, df.drop_duplicates, keep='invalid_name')
+
     def test_drop_duplicates_tuple(self):
         df = DataFrame({('AA', 'AB'): ['foo', 'bar', 'foo', 'bar',
                                        'foo', 'bar', 'bar', 'foo'],
@@ -7927,6 +7998,16 @@ def test_drop_duplicates_tuple(self):
         expected = df[:2]
         assert_frame_equal(result, expected)
 
+        result = df.drop_duplicates(('AA', 'AB'), keep='last')
+        expected = df.ix[[6, 7]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates(('AA', 'AB'), keep=False)
+        expected = df.ix[[]] # empty df
+        self.assertEqual(len(result), 0)
+        assert_frame_equal(result, expected)
+
+        # deprecate take_last
         result = df.drop_duplicates(('AA', 'AB'), take_last=True)
         expected = df.ix[[6, 7]]
         assert_frame_equal(result, expected)
@@ -7950,6 +8031,16 @@ def test_drop_duplicates_NA(self):
         expected = df.ix[[0, 2, 3]]
         assert_frame_equal(result, expected)
 
+        result = df.drop_duplicates('A', keep='last')
+        expected = df.ix[[1, 6, 7]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates('A', keep=False)
+        expected = df.ix[[]] # empty df
+        assert_frame_equal(result, expected)
+        self.assertEqual(len(result), 0)
+
+        # deprecate take_last
         result = df.drop_duplicates('A', take_last=True)
         expected = df.ix[[1, 6, 7]]
         assert_frame_equal(result, expected)
@@ -7959,6 +8050,15 @@ def test_drop_duplicates_NA(self):
         expected = df.ix[[0, 2, 3, 6]]
         assert_frame_equal(result, expected)
 
+        result = df.drop_duplicates(['A', 'B'], keep='last')
+        expected = df.ix[[1, 5, 6, 7]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates(['A', 'B'], keep=False)
+        expected = df.ix[[6]]
+        assert_frame_equal(result, expected)
+
+        # deprecate take_last
         result = df.drop_duplicates(['A', 'B'], take_last=True)
         expected = df.ix[[1, 5, 6, 7]]
         assert_frame_equal(result, expected)
@@ -7976,6 +8076,16 @@ def test_drop_duplicates_NA(self):
         expected = df[:2]
         assert_frame_equal(result, expected)
 
+        result = df.drop_duplicates('C', keep='last')
+        expected = df.ix[[3, 7]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates('C', keep=False)
+        expected = df.ix[[]] # empty df
+        assert_frame_equal(result, expected)
+        self.assertEqual(len(result), 0)
+
+        # deprecate take_last
         result = df.drop_duplicates('C', take_last=True)
         expected = df.ix[[3, 7]]
         assert_frame_equal(result, expected)
@@ -7985,10 +8095,53 @@ def test_drop_duplicates_NA(self):
         expected = df.ix[[0, 1, 2, 4]]
         assert_frame_equal(result, expected)
 
+        result = df.drop_duplicates(['C', 'B'], keep='last')
+        expected = df.ix[[1, 3, 6, 7]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates(['C', 'B'], keep=False)
+        expected = df.ix[[1]]
+        assert_frame_equal(result, expected)
+
+        # deprecate take_last
         result = df.drop_duplicates(['C', 'B'], take_last=True)
         expected = df.ix[[1, 3, 6, 7]]
         assert_frame_equal(result, expected)
 
+    def test_drop_duplicates_NA_for_take_all(self):
+        # none
+        df = DataFrame({'A': [None, None, 'foo', 'bar',
+                              'foo', 'baz', 'bar', 'qux'],
+                        'C': [1.0, np.nan, np.nan, np.nan, 1., 2., 3, 1.]})
+
+        # single column
+        result = df.drop_duplicates('A')
+        expected = df.iloc[[0, 2, 3, 5, 7]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates('A', keep='last')
+        expected = df.iloc[[1, 4, 5, 6, 7]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates('A', keep=False)
+        expected = df.iloc[[5, 7]]
+        assert_frame_equal(result, expected)
+
+        # nan
+
+        # single column
+        result = df.drop_duplicates('C')
+        expected = df.iloc[[0, 1, 5, 6]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates('C', keep='last')
+        expected = df.iloc[[3, 5, 6, 7]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates('C', keep=False)
+        expected = df.iloc[[5, 6]]
+        assert_frame_equal(result, expected)
+
     def test_drop_duplicates_inplace(self):
         orig = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                                 'foo', 'bar', 'bar', 'foo'],
@@ -8004,6 +8157,20 @@ def test_drop_duplicates_inplace(self):
         result = df
         assert_frame_equal(result, expected)
 
+        df = orig.copy()
+        df.drop_duplicates('A', keep='last', inplace=True)
+        expected = orig.ix[[6, 7]]
+        result = df
+        assert_frame_equal(result, expected)
+
+        df = orig.copy()
+        df.drop_duplicates('A', keep=False, inplace=True)
+        expected = orig.ix[[]]
+        result = df
+        assert_frame_equal(result, expected)
+        self.assertEqual(len(df), 0)
+
+        # deprecate take_last
         df = orig.copy()
         df.drop_duplicates('A', take_last=True, inplace=True)
         expected = orig.ix[[6, 7]]
@@ -8017,6 +8184,19 @@ def test_drop_duplicates_inplace(self):
         result = df
         assert_frame_equal(result, expected)
 
+        df = orig.copy()
+        df.drop_duplicates(['A', 'B'], keep='last', inplace=True)
+        expected = orig.ix[[0, 5, 6, 7]]
+        result = df
+        assert_frame_equal(result, expected)
+
+        df = orig.copy()
+        df.drop_duplicates(['A', 'B'], keep=False, inplace=True)
+        expected = orig.ix[[0]]
+        result = df
+        assert_frame_equal(result, expected)
+
+        # deprecate take_last
         df = orig.copy()
         df.drop_duplicates(['A', 'B'], take_last=True, inplace=True)
         expected = orig.ix[[0, 5, 6, 7]]
@@ -8033,6 +8213,19 @@ def test_drop_duplicates_inplace(self):
         result = df2
         assert_frame_equal(result, expected)
 
+        df2 = orig2.copy()
+        df2.drop_duplicates(keep='last', inplace=True)
+        expected = orig2.drop_duplicates(['A', 'B'], keep='last')
+        result = df2
+        assert_frame_equal(result, expected)
+
+        df2 = orig2.copy()
+        df2.drop_duplicates(keep=False, inplace=True)
+        expected = orig2.drop_duplicates(['A', 'B'], keep=False)
+        result = df2
+        assert_frame_equal(result, expected)
+
+        # deprecate take_last
         df2 = orig2.copy()
         df2.drop_duplicates(take_last=True, inplace=True)
         expected = orig2.drop_duplicates(['A', 'B'], take_last=True)
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index c7418a5651ad7..d6e57e76d0ec9 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -4720,9 +4720,9 @@ def check(nlevels, with_nulls):
         labels = [np.random.choice(n, k * n) for lev in levels]
         mi = MultiIndex(levels=levels, labels=labels)
 
-        for take_last in [False, True]:
-            left = mi.duplicated(take_last=take_last)
-            right = pd.lib.duplicated(mi.values, take_last=take_last)
+        for keep in ['first', 'last', False]:
+            left = mi.duplicated(keep=keep)
+            right = pd.lib.duplicated(mi.values, keep=keep)
             tm.assert_numpy_array_equal(left, right)
 
         # GH5873
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 65ba5fd036a35..fbe4eefabe02d 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -2135,6 +2135,21 @@ def test_duplicated_drop_duplicates(self):
         expected = MultiIndex.from_arrays(([1, 2, 3, 2 ,3], [1, 1, 1, 2, 2]))
         tm.assert_index_equal(idx.drop_duplicates(), expected)
 
+        expected = np.array([True, False, False, False, False, False])
+        duplicated = idx.duplicated(keep='last')
+        tm.assert_numpy_array_equal(duplicated, expected)
+        self.assertTrue(duplicated.dtype == bool)
+        expected = MultiIndex.from_arrays(([2, 3, 1, 2 ,3], [1, 1, 1, 2, 2]))
+        tm.assert_index_equal(idx.drop_duplicates(keep='last'), expected)
+
+        expected = np.array([True, False, False, True, False, False])
+        duplicated = idx.duplicated(keep=False)
+        tm.assert_numpy_array_equal(duplicated, expected)
+        self.assertTrue(duplicated.dtype == bool)
+        expected = MultiIndex.from_arrays(([2, 3, 2 ,3], [1, 1, 2, 2]))
+        tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)
+
+        # deprecate take_last
         expected = np.array([True, False, False, False, False, False])
         duplicated = idx.duplicated(take_last=True)
         tm.assert_numpy_array_equal(duplicated, expected)
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 66a38cd858846..31843616956f6 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -4782,29 +4782,63 @@ def test_axis_alias(self):
         self.assertEqual(s._get_axis_name('rows'), 'index')
 
     def test_drop_duplicates(self):
-        s = Series([1, 2, 3, 3])
+        # check both int and object
+        for s in [Series([1, 2, 3, 3]), Series(['1', '2', '3', '3'])]:
+            expected = Series([False, False, False, True])
+            assert_series_equal(s.duplicated(), expected)
+            assert_series_equal(s.drop_duplicates(), s[~expected])
+            sc = s.copy()
+            sc.drop_duplicates(inplace=True)
+            assert_series_equal(sc, s[~expected])
 
-        result = s.duplicated()
-        expected = Series([False, False, False, True])
-        assert_series_equal(result, expected)
+            expected = Series([False, False, True, False])
+            assert_series_equal(s.duplicated(keep='last'), expected)
+            assert_series_equal(s.drop_duplicates(keep='last'), s[~expected])
+            sc = s.copy()
+            sc.drop_duplicates(keep='last', inplace=True)
+            assert_series_equal(sc, s[~expected])
+            # deprecate take_last
+            assert_series_equal(s.duplicated(take_last=True), expected)
+            assert_series_equal(s.drop_duplicates(take_last=True), s[~expected])
+            sc = s.copy()
+            sc.drop_duplicates(take_last=True, inplace=True)
+            assert_series_equal(sc, s[~expected])
 
-        result = s.duplicated(take_last=True)
-        expected = Series([False, False, True, False])
-        assert_series_equal(result, expected)
+            expected = Series([False, False, True, True])
+            assert_series_equal(s.duplicated(keep=False), expected)
+            assert_series_equal(s.drop_duplicates(keep=False), s[~expected])
+            sc = s.copy()
+            sc.drop_duplicates(keep=False, inplace=True)
+            assert_series_equal(sc, s[~expected])
+
+        for s in [Series([1, 2, 3, 5, 3, 2, 4]),
+                  Series(['1', '2', '3', '5', '3', '2', '4'])]:
+            expected = Series([False, False, False, False, True, True, False])
+            assert_series_equal(s.duplicated(), expected)
+            assert_series_equal(s.drop_duplicates(), s[~expected])
+            sc = s.copy()
+            sc.drop_duplicates(inplace=True)
+            assert_series_equal(sc, s[~expected])
 
-        result = s.drop_duplicates()
-        expected = s[[True, True, True, False]]
-        assert_series_equal(result, expected)
-        sc = s.copy()
-        sc.drop_duplicates(inplace=True)
-        assert_series_equal(sc, expected)
+            expected = Series([False, True, True, False, False, False, False])
+            assert_series_equal(s.duplicated(keep='last'), expected)
+            assert_series_equal(s.drop_duplicates(keep='last'), s[~expected])
+            sc = s.copy()
+            sc.drop_duplicates(keep='last', inplace=True)
+            assert_series_equal(sc, s[~expected])
+            # deprecate take_last
+            assert_series_equal(s.duplicated(take_last=True), expected)
+            assert_series_equal(s.drop_duplicates(take_last=True), s[~expected])
+            sc = s.copy()
+            sc.drop_duplicates(take_last=True, inplace=True)
+            assert_series_equal(sc, s[~expected])
 
-        result = s.drop_duplicates(take_last=True)
-        expected = s[[True, True, False, True]]
-        assert_series_equal(result, expected)
-        sc = s.copy()
-        sc.drop_duplicates(take_last=True, inplace=True)
-        assert_series_equal(sc, expected)
+            expected = Series([False, True, True, False, True, True, False])
+            assert_series_equal(s.duplicated(keep=False), expected)
+            assert_series_equal(s.drop_duplicates(keep=False), s[~expected])
+            sc = s.copy()
+            sc.drop_duplicates(keep=False, inplace=True)
+            assert_series_equal(sc, s[~expected])
 
     def test_sort(self):
         ts = self.ts.copy()
diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py
index 035b3ac07342d..f10d541a7e23b 100644
--- a/pandas/tests/test_tseries.py
+++ b/pandas/tests/test_tseries.py
@@ -275,10 +275,18 @@ def test_duplicated_with_nas():
     expected = [False, False, False, True, False, True]
     assert(np.array_equal(result, expected))
 
-    result = lib.duplicated(keys, take_last=True)
+    result = lib.duplicated(keys, keep='first')
+    expected = [False, False, False, True, False, True]
+    assert(np.array_equal(result, expected))
+
+    result = lib.duplicated(keys, keep='last')
     expected = [True, False, True, False, False, False]
     assert(np.array_equal(result, expected))
 
+    result = lib.duplicated(keys, keep=False)
+    expected = [True, False, True, True, False, True]
+    assert(np.array_equal(result, expected))
+
     keys = np.empty(8, dtype=object)
     for i, t in enumerate(zip([0, 0, nan, nan] * 2, [0, nan, 0, nan] * 2)):
         keys[i] = t
@@ -289,10 +297,14 @@ def test_duplicated_with_nas():
     expected = falses + trues
     assert(np.array_equal(result, expected))
 
-    result = lib.duplicated(keys, take_last=True)
+    result = lib.duplicated(keys, keep='last')
     expected = trues + falses
     assert(np.array_equal(result, expected))
 
+    result = lib.duplicated(keys, keep=False)
+    expected = trues + trues
+    assert(np.array_equal(result, expected))
+
 
 def test_maybe_booleans_to_slice():
     arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8)

From d44ecc3c8716883675cf76c9d1a3c5828b18047e Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Fri, 26 Jun 2015 04:54:37 +0900
Subject: [PATCH 05/12] TST: make assertion messages more understandable

---
 pandas/io/tests/test_json/test_pandas.py |  28 +-
 pandas/src/testing.pyx                   |  83 +++++-
 pandas/tests/test_index.py               |  35 ++-
 pandas/tests/test_testing.py             | 353 +++++++++++++++++++++-
 pandas/util/testing.py                   | 357 ++++++++++++++++++-----
 5 files changed, 763 insertions(+), 93 deletions(-)

diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py
index c145c717df4c4..66c2bbde0b3f8 100644
--- a/pandas/io/tests/test_json/test_pandas.py
+++ b/pandas/io/tests/test_json/test_pandas.py
@@ -178,7 +178,10 @@ def _check_orient(df, orient, dtype=None, numpy=False,
                 self.assertTrue(df.columns.equals(unser.columns))
             elif orient == "values":
                 # index and cols are not captured in this orientation
-                assert_almost_equal(df.values, unser.values)
+                if numpy is True and df.shape == (0, 0):
+                    assert unser.shape[0] == 0
+                else:
+                    assert_almost_equal(df.values, unser.values)
             elif orient == "split":
                 # index and col labels might not be strings
                 unser.index = [str(i) for i in unser.index]
@@ -670,15 +673,20 @@ def test_doc_example(self):
     def test_misc_example(self):
 
         # parsing unordered input fails
-        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]',numpy=True)
-        expected = DataFrame([[1,2],[1,2]],columns=['a','b'])
-        with tm.assertRaisesRegexp(AssertionError,
-                                   '\[index\] left \[.+\], right \[.+\]'):
+        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True)
+        expected = DataFrame([[1,2], [1,2]], columns=['a', 'b'])
+
+        error_msg = """DataFrame\\.index are different
+
+DataFrame\\.index values are different \\(100\\.0 %\\)
+\\[left\\]:  Index\\(\\[u?'a', u?'b'\\], dtype='object'\\)
+\\[right\\]: Int64Index\\(\\[0, 1\\], dtype='int64'\\)"""
+        with tm.assertRaisesRegexp(AssertionError, error_msg):
             assert_frame_equal(result, expected)
 
         result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]')
-        expected = DataFrame([[1,2],[1,2]],columns=['a','b'])
-        assert_frame_equal(result,expected)
+        expected = DataFrame([[1,2], [1,2]], columns=['a','b'])
+        assert_frame_equal(result, expected)
 
     @network
     def test_round_trip_exception_(self):
@@ -739,3 +747,9 @@ def my_handler_raises(obj):
             raise TypeError("raisin")
         self.assertRaises(TypeError, DataFrame({'a': [1, 2, object()]}).to_json,
                           default_handler=my_handler_raises)
+
+
+if __name__ == '__main__':
+    import nose
+    nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb',
+                         '--pdb-failure', '-s'], exit=False)
\ No newline at end of file
diff --git a/pandas/src/testing.pyx b/pandas/src/testing.pyx
index 4977a80acc936..1abc758559e70 100644
--- a/pandas/src/testing.pyx
+++ b/pandas/src/testing.pyx
@@ -55,11 +55,39 @@ cpdef assert_dict_equal(a, b, bint compare_keys=True):
 
     return True
 
-cpdef assert_almost_equal(a, b, bint check_less_precise=False):
+cpdef assert_almost_equal(a, b, bint check_less_precise=False,
+                          obj=None, lobj=None, robj=None):
+    """Check that left and right objects are almost equal.
+
+    Parameters
+    ----------
+    a : object
+    b : object
+    check_less_precise : bool, default False
+        Specify comparison precision.
+        5 digits (False) or 3 digits (True) after decimal points are compared.
+    obj : str, default None
+        Specify object name being compared, internally used to show appropriate
+        assertion message
+    lobj : str, default None
+        Specify left object name being compared, internally used to show
+        appropriate assertion message
+    robj : str, default None
+        Specify right object name being compared, internally used to show
+        appropriate assertion message
+    """
+
     cdef:
         int decimal
+        double diff = 0.0
         Py_ssize_t i, na, nb
         double fa, fb
+        bint is_unequal = False
+
+    if lobj is None:
+        lobj = a
+    if robj is None:
+        robj = b
 
     if isinstance(a, dict) or isinstance(b, dict):
         return assert_dict_equal(a, b)
@@ -70,33 +98,62 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False):
         return True
 
     if isiterable(a):
-        assert isiterable(b), (
-            "First object is iterable, second isn't: %r != %r" % (a, b)
-        )
+
+        if not isiterable(b):
+            from pandas.util.testing import raise_assert_detail
+            if obj is None:
+                obj = 'Iterable'
+            msg = "First object is iterable, second isn't"
+            raise_assert_detail(obj, msg, a, b)
+
         assert has_length(a) and has_length(b), (
             "Can't compare objects without length, one or both is invalid: "
             "(%r, %r)" % (a, b)
         )
 
-        na, nb = len(a), len(b)
-        assert na == nb, (
-            "Length of two iterators not the same: %r != %r" % (na, nb)
-        )
         if isinstance(a, np.ndarray) and isinstance(b, np.ndarray):
+            if obj is None:
+                obj = 'numpy array'
+            na, nb = a.size, b.size
+            if a.shape != b.shape:
+                from pandas.util.testing import raise_assert_detail
+                raise_assert_detail(obj, '{0} shapes are different'.format(obj),
+                                    a.shape, b.shape)
             try:
                 if np.array_equal(a, b):
                     return True
             except:
                 pass
+        else:
+            if obj is None:
+                obj = 'Iterable'
+            na, nb = len(a), len(b)
+
+        if na != nb:
+            from pandas.util.testing import raise_assert_detail
+            raise_assert_detail(obj, '{0} length are different'.format(obj),
+                                na, nb)
+
+        for i in xrange(len(a)):
+            try:
+                assert_almost_equal(a[i], b[i], check_less_precise)
+            except AssertionError:
+                is_unequal = True
+                diff += 1
 
-        for i in xrange(na):
-            assert_almost_equal(a[i], b[i], check_less_precise)
+        if is_unequal:
+            from pandas.util.testing import raise_assert_detail
+            msg = '{0} values are different ({1} %)'.format(obj, np.round(diff * 100.0 / na, 5))
+            raise_assert_detail(obj, msg, lobj, robj)
 
         return True
+
     elif isiterable(b):
-        assert False, (
-            "Second object is iterable, first isn't: %r != %r" % (a, b)
-        )
+        from pandas.util.testing import raise_assert_detail
+        if obj is None:
+            obj = 'Iterable'
+        msg = "Second object is iterable, first isn't"
+        raise_assert_detail(obj, msg, a, b)
 
     if isnull(a):
         assert isnull(b), (
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index d6e57e76d0ec9..3c988943301c0 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -3371,7 +3371,10 @@ def test_inplace_mutation_resets_values(self):
 
         # make sure label setting works too
         labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
-        exp_values = np.array([(long(1), 'a')] * 6, dtype=object)
+        exp_values = np.empty((6, ), dtype=object)
+        exp_values[:] = [(long(1), 'a')] * 6
+        # must be 1d array of tuples
+        self.assertEqual(exp_values.shape, (6, ))
         new_values = mi2.set_labels(labels2).values
         # not inplace shouldn't change
         assert_almost_equal(mi2._tuples, vals2)
@@ -4772,8 +4775,20 @@ def test_repr_roundtrip(self):
 
         mi = MultiIndex.from_product([list('ab'),range(3)],names=['first','second'])
         str(mi)
-        tm.assert_index_equal(eval(repr(mi)),mi,exact=True)
-
+        
+        if compat.PY3:
+            tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
+        else:
+            result = eval(repr(mi))
+            # string coerces to unicode
+            tm.assert_index_equal(result, mi, exact=False)
+            self.assertEqual(mi.get_level_values('first').inferred_type, 'string')
+            self.assertEqual(result.get_level_values('first').inferred_type, 'unicode')
+            
+        mi_u = MultiIndex.from_product([list(u'ab'),range(3)],names=['first','second'])
+        result = eval(repr(mi_u))
+        tm.assert_index_equal(result, mi_u, exact=True)            
+            
         # formatting
         if compat.PY3:
             str(mi)
@@ -4783,7 +4798,19 @@ def test_repr_roundtrip(self):
         # long format
         mi = MultiIndex.from_product([list('abcdefg'),range(10)],names=['first','second'])
         result = str(mi)
-        tm.assert_index_equal(eval(repr(mi)),mi,exact=True)
+
+        if compat.PY3:
+            tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
+        else:
+            result = eval(repr(mi))
+            # string coerces to unicode
+            tm.assert_index_equal(result, mi, exact=False)
+            self.assertEqual(mi.get_level_values('first').inferred_type, 'string')
+            self.assertEqual(result.get_level_values('first').inferred_type, 'unicode')
+
+        mi = MultiIndex.from_product([list(u'abcdefg'),range(10)],names=['first','second'])
+        result = eval(repr(mi_u))
+        tm.assert_index_equal(result, mi_u, exact=True)     
 
     def test_str(self):
         # tested elsewhere
diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py
index 668579911d6d5..f4fbc19535107 100644
--- a/pandas/tests/test_testing.py
+++ b/pandas/tests/test_testing.py
@@ -10,7 +10,8 @@
 import pandas.util.testing as tm
 from pandas.util.testing import (
     assert_almost_equal, assertRaisesRegexp, raise_with_traceback,
-    assert_series_equal, assert_frame_equal, RNGContext
+    assert_index_equal, assert_series_equal, assert_frame_equal,
+    assert_numpy_array_equal, assert_isinstance, RNGContext
 )
 
 # let's get meta.
@@ -132,6 +133,275 @@ def test_raise_with_traceback(self):
                 raise_with_traceback(e, traceback)
 
 
+class TestAssertNumpyArrayEqual(tm.TestCase):
+
+    def test_numpy_array_equal_message(self):
+
+        expected = """numpy array are different
+
+numpy array shapes are different
+\\[left\\]:  \\(2,\\)
+\\[right\\]: \\(3,\\)"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]))
+
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]))
+
+        # scalar comparison
+        expected = """: 1 != 2"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(1, 2)
+        expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_almost_equal(1, 2)
+
+        # array / scalar array comparison
+        expected = """(numpy array|Iterable) are different
+
+First object is iterable, second isn't
+\\[left\\]:  \\[1\\]
+\\[right\\]: 1"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(np.array([1]), 1)
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_almost_equal(np.array([1]), 1)
+
+        # scalar / array comparison
+        expected = """(numpy array|Iterable) are different
+
+Second object is iterable, first isn't
+\\[left\\]:  1
+\\[right\\]: \\[1\\]"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(1, np.array([1]))
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_almost_equal(1, np.array([1]))
+
+        expected = """numpy array are different
+
+numpy array values are different \\(66\\.66667 %\\)
+\\[left\\]:  \\[nan, 2\\.0, 3\\.0\\]
+\\[right\\]: \\[1\\.0, nan, 3\\.0\\]"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3]))
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_almost_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3]))
+
+        expected = """numpy array are different
+
+numpy array values are different \\(50\\.0 %\\)
+\\[left\\]:  \\[1, 2\\]
+\\[right\\]: \\[1, 3\\]"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3]))
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_almost_equal(np.array([1, 2]), np.array([1, 3]))
+
+
+        expected = """numpy array are different
+
+numpy array values are different \\(50\\.0 %\\)
+\\[left\\]:  \\[1\\.1, 2\\.000001\\]
+\\[right\\]: \\[1\\.1, 2.0\\]"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0]))
+
+        # must pass
+        assert_almost_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0]))
+
+        expected = """numpy array are different
+
+numpy array values are different \\(16\\.66667 %\\)
+\\[left\\]:  \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\]
+\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]),
+                                     np.array([[1, 3], [3, 4], [5, 6]]))
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]),
+                                np.array([[1, 3], [3, 4], [5, 6]]))
+
+        expected = """numpy array are different
+
+numpy array values are different \\(25\\.0 %\\)
+\\[left\\]:  \\[\\[1, 2\\], \\[3, 4\\]\\]
+\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(np.array([[1, 2], [3, 4]]),
+                                     np.array([[1, 3], [3, 4]]))
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_almost_equal(np.array([[1, 2], [3, 4]]),
+                                np.array([[1, 3], [3, 4]]))
+
+        # allow to overwrite message
+        expected = """Index are different
+
+Index shapes are different
+\\[left\\]:  \\(2,\\)
+\\[right\\]: \\(3,\\)"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]),
+                                     obj='Index')
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]),
+                                obj='Index')
+
+    def test_assert_almost_equal_iterable_message(self):
+
+        expected = """Iterable are different
+
+Iterable length are different
+\\[left\\]:  2
+\\[right\\]: 3"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_almost_equal([1, 2], [3, 4, 5])
+
+        expected = """Iterable are different
+
+Iterable values are different \\(50\\.0 %\\)
+\\[left\\]:  \\[1, 2\\]
+\\[right\\]: \\[1, 3\\]"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_almost_equal([1, 2], [1, 3])
+
+
+class TestAssertIndexEqual(unittest.TestCase):
+    _multiprocess_can_split_ = True
+
+    def test_index_equal_message(self):
+
+        expected = """Index are different
+
+Index levels are different
+\\[left\\]:  1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
+\\[right\\]: 2, MultiIndex\\(levels=\\[\\[u?'A', u?'B'\\], \\[1, 2, 3, 4\\]\\],
+           labels=\\[\\[0, 0, 1, 1\\], \\[0, 1, 2, 3\\]\\]\\)"""
+        idx1 = pd.Index([1, 2, 3])
+        idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)])
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2, exact=False)
+
+
+        expected = """MultiIndex level \\[1\\] are different
+
+MultiIndex level \\[1\\] values are different \\(25\\.0 %\\)
+\\[left\\]:  Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\)
+\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""
+        idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3), ('B', 4)])
+        idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)])
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2, check_exact=False)
+
+        expected = """Index are different
+
+Index length are different
+\\[left\\]:  3, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
+\\[right\\]: 4, Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""
+        idx1 = pd.Index([1, 2, 3])
+        idx2 = pd.Index([1, 2, 3, 4])
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2, check_exact=False)
+
+        expected = """Index are different
+
+Index classes are different
+\\[left\\]:  Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
+\\[right\\]: Float64Index\\(\\[1\\.0, 2\\.0, 3\\.0\\], dtype='float64'\\)"""
+        idx1 = pd.Index([1, 2, 3])
+        idx2 = pd.Index([1, 2, 3.0])
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2, exact=True)
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2, exact=True, check_exact=False)
+
+        expected = """Index are different
+
+Index values are different \\(33\\.33333 %\\)
+\\[left\\]:  Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\)
+\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)"""
+        idx1 = pd.Index([1, 2, 3.])
+        idx2 = pd.Index([1, 2, 3.0000000001])
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+
+        # must success
+        assert_index_equal(idx1, idx2, check_exact=False)
+
+        expected = """Index are different
+
+Index values are different \\(33\\.33333 %\\)
+\\[left\\]:  Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\)
+\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)"""
+        idx1 = pd.Index([1, 2, 3.])
+        idx2 = pd.Index([1, 2, 3.0001])
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2, check_exact=False)
+        # must success
+        assert_index_equal(idx1, idx2, check_exact=False, check_less_precise=True)
+
+        expected = """Index are different
+
+Index values are different \\(33\\.33333 %\\)
+\\[left\\]:  Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
+\\[right\\]: Int64Index\\(\\[1, 2, 4\\], dtype='int64'\\)"""
+        idx1 = pd.Index([1, 2, 3])
+        idx2 = pd.Index([1, 2, 4])
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2, check_less_precise=True)
+
+        expected = """MultiIndex level \\[1\\] are different
+
+MultiIndex level \\[1\\] values are different \\(25\\.0 %\\)
+\\[left\\]:  Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\)
+\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""
+        idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3), ('B', 4)])
+        idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)])
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2, check_exact=False)
+
+    def test_index_equal_metadata_message(self):
+
+        expected = """Index are different
+
+Attribute "names" are different
+\\[left\\]:  \\[None\\]
+\\[right\\]: \\[u?'x'\\]"""
+        idx1 = pd.Index([1, 2, 3])
+        idx2 = pd.Index([1, 2, 3], name='x')
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+
+        # same name, should pass
+        assert_index_equal(pd.Index([1, 2, 3], name=np.nan),
+                              pd.Index([1, 2, 3], name=np.nan))
+        assert_index_equal(pd.Index([1, 2, 3], name=pd.NaT),
+                              pd.Index([1, 2, 3], name=pd.NaT))
+
+
+        expected = """Index are different
+
+Attribute "names" are different
+\\[left\\]:  \\[nan\\]
+\\[right\\]: \\[NaT\\]"""
+        idx1 = pd.Index([1, 2, 3], name=np.nan)
+        idx2 = pd.Index([1, 2, 3], name=pd.NaT)
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+
+
 class TestAssertSeriesEqual(tm.TestCase):
     _multiprocess_can_split_ = True
 
@@ -191,6 +461,28 @@ def test_multiindex_dtype(self):
                 {'a':[1.0,2.0],'b':[2.1,1.5],'c':['l1','l2']}, index=['a','b'])
         self._assert_not_equal(df1.c, df2.c, check_index_type=True)
 
+    def test_series_equal_message(self):
+
+        expected = """Series are different
+
+Series length are different
+\\[left\\]:  3, Int64Index\\(\\[0, 1, 2\\], dtype='int64'\\)
+\\[right\\]: 4, Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\)"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 3, 4]))
+
+
+        expected = """Series are different
+
+Series values are different \\(33\\.33333 %\\)
+\\[left\\]:  \\[1, 2, 3\\]
+\\[right\\]: \\[1, 2, 4\\]"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]))
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]),
+                                   check_less_precise=True)
+
 
 class TestAssertFrameEqual(tm.TestCase):
     _multiprocess_can_split_ = True
@@ -224,6 +516,65 @@ def test_empty_dtypes(self):
         self._assert_equal(df1, df2, check_dtype=False)
         self._assert_not_equal(df1, df2, check_dtype=True)
 
+    def test_frame_equal_message(self):
+
+        expected = """DataFrame are different
+
+DataFrame shape \\(number of rows\\) are different
+\\[left\\]:  3, Int64Index\\(\\[0, 1, 2\\], dtype='int64'\\)
+\\[right\\]: 4, Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\)"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A':[1, 2, 3]}),
+                                  pd.DataFrame({'A':[1, 2, 3, 4]}))
+
+
+        expected = """DataFrame are different
+
+DataFrame shape \\(number of columns\\) are different
+\\[left\\]:  2, Index\\(\\[u?'A', u?'B'\\], dtype='object'\\)
+\\[right\\]: 1, Index\\(\\[u?'A'\\], dtype='object'\\)"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}),
+                                  pd.DataFrame({'A':[1, 2, 3]}))
+
+
+        expected = """DataFrame\\.index are different
+
+DataFrame\\.index values are different \\(33\\.33333 %\\)
+\\[left\\]:  Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\)
+\\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]},
+                                               index=['a', 'b', 'c']),
+                                  pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]},
+                                               index=['a', 'b', 'd']))
+
+        expected = """DataFrame\\.columns are different
+
+DataFrame\\.columns values are different \\(50\\.0 %\\)
+\\[left\\]:  Index\\(\\[u?'A', u?'B'\\], dtype='object'\\)
+\\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]},
+                                               index=['a', 'b', 'c']),
+                                  pd.DataFrame({'A':[1, 2, 3], 'b':[4, 5, 6]},
+                                               index=['a', 'b', 'c']))
+
+
+        expected = """DataFrame\\.iloc\\[:, 1\\] are different
+
+DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)
+\\[left\\]:  \\[4, 5, 6\\]
+\\[right\\]: \\[4, 5, 7\\]"""
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}),
+                               pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 7]}))
+
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}),
+                                  pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 7]}),
+                                  by_blocks=True)
+
 
 class TestRNGContext(unittest.TestCase):
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 979ac007c7500..4b7c8d4540e0f 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -23,8 +23,9 @@
 import numpy as np
 
 import pandas as pd
-from pandas.core.common import (is_sequence, array_equivalent, is_list_like, is_number,
-                                is_datetimelike_v_numeric, is_datetimelike_v_object)
+from pandas.core.common import (is_sequence, array_equivalent, is_list_like,
+                                is_datetimelike_v_numeric, is_datetimelike_v_object,
+                                is_number, pprint_thing, take_1d)
 import pandas.compat as compat
 from pandas.compat import(
     filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter,
@@ -536,23 +537,128 @@ def assert_equal(a, b, msg=""):
     assert a == b, "%s: %r != %r" % (msg.format(a,b), a, b)
 
 
-def assert_index_equal(left, right, exact=False, check_names=True):
+def assert_index_equal(left, right, exact=False, check_names=True,
+                       check_less_precise=False, check_exact=True, obj='Index'):
+    """Check that left and right Index are equal.
+
+    Parameters
+    ----------
+    left : Index
+    right : Index
+    exact : bool, default False
+        Whether to check the Index class, dtype and inferred_type are identical.
+    check_names : bool, default True
+        Whether to check the names attribute.
+    check_less_precise : bool, default False
+        Specify comparison precision. Only used when check_exact is False.
+        5 digits (False) or 3 digits (True) after decimal points are compared.
+    check_exact : bool, default True
+        Whether to compare number exactly.
+    obj : str, default 'Index'
+        Specify object name being compared, internally used to show appropriate
+        assertion message
+    """
+
+    def _check_types(l, r, obj='Index'):
+        if exact:
+            if type(l) != type(r):
+                msg = '{0} classes are different'.format(obj)
+                raise_assert_detail(obj, msg, l, r)
+            assert_attr_equal('dtype', l, r, obj=obj)
+            assert_attr_equal('inferred_type', l, r, obj=obj)
+
+    def _get_ilevel_values(index, level):
+        # accept level number only
+        unique = index.levels[level]
+        labels = index.labels[level]
+        filled = take_1d(unique.values, labels, fill_value=unique._na_value)
+        values = unique._simple_new(filled, index.names[level],
+                                    freq=getattr(unique, 'freq', None),
+                                    tz=getattr(unique, 'tz', None))
+        return values
+
+    # instance validation
     assertIsInstance(left, Index, '[index] ')
     assertIsInstance(right, Index, '[index] ')
-    if not left.equals(right) or (exact and type(left) != type(right)):
-        raise AssertionError("[index] left [{0} {1}], right [{2} {3}]".format(left.dtype,
-                                                                              left,
-                                                                              right,
-                                                                              right.dtype))
+
+    # class / dtype comparison
+    _check_types(left, right)
+
+    # level comparison
+    if left.nlevels != right.nlevels:
+        raise_assert_detail(obj, '{0} levels are different'.format(obj),
+                            '{0}, {1}'.format(left.nlevels, left),
+                            '{0}, {1}'.format(right.nlevels, right))
+
+    # length comparison
+    if len(left) != len(right):
+        raise_assert_detail(obj, '{0} length are different'.format(obj),
+                           '{0}, {1}'.format(len(left), left),
+                           '{0}, {1}'.format(len(right), right))
+
+    # MultiIndex special comparison for little-friendly error messages
+    if left.nlevels > 1:
+        for level in range(left.nlevels):
+            # cannot use get_level_values here because it can change dtype
+            llevel = _get_ilevel_values(left, level)
+            rlevel = _get_ilevel_values(right, level)
+
+            lobj = 'MultiIndex level [{0}]'.format(level)
+            assert_index_equal(llevel, rlevel,
+                               exact=exact, check_names=check_names,
+                               check_less_precise=check_less_precise,
+                               check_exact=check_exact, obj=lobj)
+            # get_level_values may change dtype
+            _check_types(left.levels[level], right.levels[level], obj=obj)
+
+    if check_exact:
+        if not left.equals(right):
+            diff = np.sum((left.values != right.values).astype(int)) * 100.0 / len(left)
+            msg = '{0} values are different ({1} %)'.format(obj, np.round(diff, 5))
+            raise_assert_detail(obj, msg, left, right)
+    else:
+        assert_almost_equal(left.values, right.values,
+                            check_less_precise=check_less_precise,
+                            obj=obj, lobj=left, robj=right)
+
+    # metadata comparison
     if check_names:
-        assert_attr_equal('names', left, right)
+        assert_attr_equal('names', left, right, obj=obj)
+
 
+def assert_attr_equal(attr, left, right, obj='Attributes'):
+    """checks attributes are equal. Both objects must have attribute.
+
+    Parameters
+    ----------
+    attr : str
+        Attribute name being compared.
+    left : object
+    right : object
+    obj : str, default 'Attributes'
+        Specify object name being compared, internally used to show appropriate
+        assertion message
+    """
 
-def assert_attr_equal(attr, left, right):
-    """checks attributes are equal. Both objects must have attribute."""
     left_attr = getattr(left, attr)
     right_attr = getattr(right, attr)
-    assert_equal(left_attr,right_attr,"attr is not equal [{0}]" .format(attr))
+
+    if left_attr is right_attr:
+        return True
+    elif (is_number(left_attr) and np.isnan(left_attr) and
+          is_number(right_attr) and np.isnan(right_attr)):
+        # np.nan
+        return True
+
+    result = left_attr == right_attr
+    if not isinstance(result, bool):
+        result = result.all()
+
+    if result:
+        return True
+    else:
+        raise_assert_detail(obj, 'Attribute "{0}" are different'.format(attr),
+                        left_attr, right_attr)
 
 
 def isiterable(obj):
@@ -607,6 +713,7 @@ def assertIsInstance(obj, cls, msg=''):
 def assert_isinstance(obj, class_type_or_tuple, msg=''):
     return deprecate('assert_isinstance', assertIsInstance)(obj, class_type_or_tuple, msg=msg)
 
+
 def assertNotIsInstance(obj, cls, msg=''):
     """Test that obj is not an instance of cls
     (which can be a class or a tuple of classes,
@@ -630,8 +737,23 @@ def assert_categorical_equal(res, exp):
         raise AssertionError("ordered not the same")
 
 
-def assert_numpy_array_equal(np_array, assert_equal,
-                             strict_nan=False, err_msg=None):
+def raise_assert_detail(obj, message, left, right):
+    if isinstance(left, np.ndarray):
+        left = pprint_thing(left)
+    if isinstance(right, np.ndarray):
+        right = pprint_thing(right)
+
+    msg = """{0} are different
+
+{1}
+[left]:  {2}
+[right]: {3}""".format(obj, message, left, right)
+    raise AssertionError(msg)
+
+
+def assert_numpy_array_equal(left, right,
+                             strict_nan=False, err_msg=None,
+                             obj='numpy array'):
     """Checks that 'np_array' is equivalent to 'assert_equal'.
 
     This is similar to ``numpy.testing.assert_array_equal``, but can
@@ -639,10 +761,42 @@ def assert_numpy_array_equal(np_array, assert_equal,
     equivalent if the arrays have equal non-NaN elements,
     and `np.nan` in corresponding locations.
     """
-    if array_equivalent(np_array, assert_equal, strict_nan=strict_nan):
+
+    # compare shape and values
+    if array_equivalent(left, right, strict_nan=strict_nan):
         return
+
     if err_msg is None:
-        err_msg = '{0} is not equivalent to {1}.'.format(np_array, assert_equal)
+        # show detailed error
+
+        if np.isscalar(left) and np.isscalar(right):
+            # show scalar comparison error
+            assert_equal(left, right)
+        elif is_list_like(left) and is_list_like(right):
+            # some test cases pass list
+            left = np.asarray(left)
+            right = np.array(right)
+
+            if left.shape != right.shape:
+                raise_assert_detail(obj, '{0} shapes are different'.format(obj),
+                                    left.shape, right.shape)
+
+            diff = 0
+            for l, r in zip(left, right):
+                # count up differences
+                if not array_equivalent(l, r, strict_nan=strict_nan):
+                    diff += 1
+
+            diff = diff * 100.0 / left.size
+            msg = '{0} values are different ({1} %)'.format(obj, np.round(diff, 5))
+            raise_assert_detail(obj, msg, left, right)
+        elif is_list_like(left):
+            msg = "First object is iterable, second isn't"
+            raise_assert_detail(obj, msg, left, right)
+        else:
+            msg = "Second object is iterable, first isn't"
+            raise_assert_detail(obj, msg, left, right)
+
     raise AssertionError(err_msg)
 
 
@@ -651,17 +805,62 @@ def assert_series_equal(left, right, check_dtype=True,
                         check_index_type=False,
                         check_series_type=False,
                         check_less_precise=False,
-                        check_exact=False,
                         check_names=True,
-                        check_datetimelike_compat=False):
+                        check_exact=False,
+                        check_datetimelike_compat=False,
+                        obj='Series'):
+
+    """Check that left and right Series are equal.
+
+    Parameters
+    ----------
+    left : Series
+    right : Series
+    check_dtype : bool, default True
+        Whether to check the Series dtype is identical.
+    check_index_type : bool, default False
+        Whether to check the Index class, dtype and inferred_type are identical.
+    check_series_type : bool, default False
+        Whether to check the Series class is identical.
+    check_less_precise : bool, default False
+        Specify comparison precision. Only used when check_exact is False.
+        5 digits (False) or 3 digits (True) after decimal points are compared.
+    check_exact : bool, default False
+        Whether to compare number exactly.
+    check_names : bool, default True
+        Whether to check the Series and Index names attribute.
+    check_dateteimelike_compat : bool, default False
+        Compare datetime-like which is comparable ignoring dtype.
+    obj : str, default 'Series'
+        Specify object name being compared, internally used to show appropriate
+        assertion message
+    """
+
+    # instance validation
+    assertIsInstance(left, Series, '[Series] ')
+    assertIsInstance(right, Series, '[Series] ')
+
     if check_series_type:
         assertIsInstance(left, type(right))
+
+    # length comparison
+    if len(left) != len(right):
+        raise_assert_detail(obj, 'Series length are different',
+                            '{0}, {1}'.format(len(left), left.index),
+                            '{0}, {1}'.format(len(right), right.index))
+
+    # index comparison
+    assert_index_equal(left.index, right.index, exact=check_index_type,
+                       check_names=check_names,
+                       check_less_precise=check_less_precise, check_exact=check_exact,
+                       obj='{0}.index'.format(obj))
+
     if check_dtype:
         assert_attr_equal('dtype', left, right)
+
     if check_exact:
-        if not np.array_equal(left.values, right.values):
-            raise AssertionError('{0} is not equal to {1}.'.format(left.values,
-                                                                   right.values))
+        assert_numpy_array_equal(left.get_values(), right.get_values(),
+                                 obj='{0}'.format(obj))
     elif check_datetimelike_compat:
         # we want to check only if we have compat dtypes
         # e.g. integer and M|m are NOT compat, but we can simply check the values in that case
@@ -675,27 +874,12 @@ def assert_series_equal(left, right, check_dtype=True,
         else:
             assert_numpy_array_equal(left.values, right.values)
     else:
-        assert_almost_equal(left.values, right.values, check_less_precise)
-    if check_less_precise:
-        assert_almost_equal(
-            left.index.values, right.index.values, check_less_precise)
-    else:
-        assert_index_equal(left.index, right.index, check_names=check_names)
-    if check_index_type:
-        for level in range(left.index.nlevels):
-            lindex = left.index.get_level_values(level)
-            rindex = right.index.get_level_values(level)
-            assertIsInstance(lindex, type(rindex))
-            assert_attr_equal('dtype', lindex, rindex)
-            assert_attr_equal('inferred_type', lindex, rindex)
+        assert_almost_equal(left.get_values(), right.get_values(),
+                            check_less_precise, obj='{0}'.format(obj))
+
+    # metadata comparison
     if check_names:
-        if is_number(left.name) and np.isnan(left.name):
-            # Series.name can be np.nan in some test cases
-            assert is_number(right.name) and np.isnan(right.name)
-        elif left.name is pd.NaT:
-            assert right.name is pd.NaT
-        else:
-            assert_attr_equal('name', left, right)
+        assert_attr_equal('name', left, right, obj=obj)
 
 
 # This could be refactored to use the NDFrame.equals method
@@ -707,19 +891,69 @@ def assert_frame_equal(left, right, check_dtype=True,
                        check_names=True,
                        by_blocks=False,
                        check_exact=False,
-                       check_datetimelike_compat=False):
+                       check_datetimelike_compat=False,
+                       obj='DataFrame'):
+
+    """Check that left and right DataFrame are equal.
+
+    Parameters
+    ----------
+    left : DataFrame
+    right : DataFrame
+    check_dtype : bool, default True
+        Whether to check the DataFrame dtype is identical.
+    check_index_type : bool, default False
+        Whether to check the Index class, dtype and inferred_type are identical.
+    check_column_type : bool, default False
+        Whether to check the columns class, dtype and inferred_type are identical.
+    check_frame_type : bool, default False
+        Whether to check the DataFrame class is identical.
+    check_less_precise : bool, default False
+        Specify comparison precision. Only used when check_exact is False.
+        5 digits (False) or 3 digits (True) after decimal points are compared.
+    check_names : bool, default True
+        Whether to check the Index names attribute.
+    by_blocks : bool, default False
+        Specify how to compare internal data. If False, compare by columns.
+        If True, compare by blocks.
+    check_exact : bool, default False
+        Whether to compare number exactly.
+    check_dateteimelike_compat : bool, default False
+        Compare datetime-like which is comparable ignoring dtype.
+    obj : str, default 'DataFrame'
+        Specify object name being compared, internally used to show appropriate
+        assertion message
+    """
+
+    # instance validation
+    assertIsInstance(left, DataFrame, '[DataFrame] ')
+    assertIsInstance(right, DataFrame, '[DataFrame] ')
+
     if check_frame_type:
         assertIsInstance(left, type(right))
-    assertIsInstance(left, DataFrame)
-    assertIsInstance(right, DataFrame)
 
-    if check_less_precise:
-        if not by_blocks:
-            assert_almost_equal(left.columns, right.columns)
-        assert_almost_equal(left.index, right.index)
-    else:
-        if not by_blocks:
-            assert_index_equal(left.columns, right.columns, check_names=check_names)
+    # shape comparison (row)
+    if left.shape[0] != right.shape[0]:
+        raise_assert_detail(obj, 'DataFrame shape (number of rows) are different',
+                            '{0}, {1}'.format(left.shape[0], left.index),
+                            '{0}, {1}'.format(right.shape[0], right.index))
+    # shape comparison (columns)
+    if left.shape[1] != right.shape[1]:
+        raise_assert_detail(obj, 'DataFrame shape (number of columns) are different',
+                            '{0}, {1}'.format(left.shape[1], left.columns),
+                            '{0}, {1}'.format(right.shape[1], right.columns))
+
+    # index comparison
+    assert_index_equal(left.index, right.index, exact=check_index_type,
+                       check_names=check_names,
+                       check_less_precise=check_less_precise, check_exact=check_exact,
+                       obj='{0}.index'.format(obj))
+
+    # column comparison
+    assert_index_equal(left.columns, right.columns, exact=check_column_type,
+                       check_names=check_names,
+                       check_less_precise=check_less_precise, check_exact=check_exact,
+                       obj='{0}.columns'.format(obj))
 
     # compare by blocks
     if by_blocks:
@@ -728,7 +962,8 @@ def assert_frame_equal(left, right, check_dtype=True,
         for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
             assert dtype in lblocks
             assert dtype in rblocks
-            assert_frame_equal(lblocks[dtype],rblocks[dtype], check_dtype=check_dtype)
+            assert_frame_equal(lblocks[dtype], rblocks[dtype],
+                               check_dtype=check_dtype, obj='DataFrame.blocks')
 
     # compare by columns
     else:
@@ -742,22 +977,8 @@ def assert_frame_equal(left, right, check_dtype=True,
                                 check_less_precise=check_less_precise,
                                 check_exact=check_exact,
                                 check_names=check_names,
-                                check_datetimelike_compat=check_datetimelike_compat)
-
-    if check_index_type:
-        for level in range(left.index.nlevels):
-            lindex = left.index.get_level_values(level)
-            rindex = right.index.get_level_values(level)
-            assertIsInstance(lindex, type(rindex))
-            assert_attr_equal('dtype', lindex, rindex)
-            assert_attr_equal('inferred_type', lindex, rindex)
-    if check_column_type:
-        assertIsInstance(left.columns, type(right.columns))
-        assert_attr_equal('dtype', left.columns, right.columns)
-        assert_attr_equal('inferred_type', left.columns, right.columns)
-    if check_names:
-        assert_attr_equal('names', left.index, right.index)
-        assert_attr_equal('names', left.columns, right.columns)
+                                check_datetimelike_compat=check_datetimelike_compat,
+                                obj='DataFrame.iloc[:, {0}]'.format(i))
 
 
 def assert_panelnd_equal(left, right,

From 2f2c5744d895433b2096d884228236b01113123a Mon Sep 17 00:00:00 2001
From: ganego <ganego@sogetthis.com>
Date: Mon, 10 Aug 2015 14:49:43 +0200
Subject: [PATCH 06/12] Update install.rst

- Added hint regarding pip install on low memory machines.
- Added hint to python 3 version of pandas from distributon repos.
---
 doc/source/install.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/install.rst b/doc/source/install.rst
index aaa39dd383e2e..42cfd95becabb 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -153,7 +153,8 @@ and can take a few minutes to complete.
 Installing using your Linux distribution's package manager.
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-
+The commands in this table will install pandas for Python 2 from your distribution.
+To install pandas for Python 3 you may need to use the package ``python3-pandas``.
 
 .. csv-table::
     :header: "Distribution", "Status", "Download / Repository Link", "Install method"

From 17917cbf964f7b62918ffb1b1eb4b6e095b61958 Mon Sep 17 00:00:00 2001
From: Gianluca Rossi <grossi@quantcast.com>
Date: Tue, 11 Aug 2015 21:56:14 +0100
Subject: [PATCH 07/12] BUG: Allow 'read_sql_table' to read from views. Solves
 #10750.

---
 doc/source/whatsnew/v0.17.0.txt | 13 +------------
 pandas/io/tests/test_sql.py     |  3 +++
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 70d616ca72c1b..142bb6b4e8f9e 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -142,15 +142,8 @@ Other enhancements
 - ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`).
 
 - ``pd.pivot`` will now allow passing index as ``None`` (:issue:`3962`).
-- ``drop_duplicates`` and ``duplicated`` now accept ``keep`` keyword to target first, last, and all duplicates. ``take_last`` keyword is deprecated, see :ref:`deprecations <whatsnew_0170.deprecations>` (:issue:`6511`, :issue:`8505`)
-
-.. ipython :: python
-
-   s = pd.Series(['A', 'B', 'C', 'A', 'B', 'D'])
-   s.drop_duplicates()
-   s.drop_duplicates(keep='last')
-   s.drop_duplicates(keep=False)
 
+- ``read_sql_table`` will now allow reading from views (:issue:`10750`).
 
 .. _whatsnew_0170.api:
 
@@ -529,7 +522,6 @@ Deprecations
   =====================  =================================
 
 - ``Categorical.name`` was deprecated to make ``Categorical`` more ``numpy.ndarray`` like. Use ``Series(cat, name="whatever")`` instead (:issue:`10482`).
-- ``drop_duplicates`` and ``duplicated``'s ``take_last`` keyword was removed in favor of ``keep``. (:issue:`6511`, :issue:`8505`)
 
 .. _whatsnew_0170.prior_deprecations:
 
@@ -616,9 +608,6 @@ Bug Fixes
 
 
 - Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`)
-- Bug in ``Categorical`` may not representing properly when category contains ``tz`` or ``Period`` (:issue:`10713`)
-- Bug in ``Categorical.__iter__`` may not returning correct ``datetime`` and ``Period`` (:issue:`10713`)
-
 
 - Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`).
 - Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`)
diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
index 434f8c4b71e85..5ac7f84c6da1f 100644
--- a/pandas/io/tests/test_sql.py
+++ b/pandas/io/tests/test_sql.py
@@ -166,6 +166,7 @@
         'sqlite': """
                 CREATE VIEW iris_view AS
                 SELECT * FROM iris;
+<<<<<<< HEAD
                 """,
         'mysql': """
                 CREATE VIEW iris_view AS
@@ -174,6 +175,8 @@
         'postgresql': """
                 CREATE VIEW iris_view AS
                 SELECT * FROM iris;
+=======
+>>>>>>> BUG: Add ability to 'read_sql_table' to read views and implement unit test to check behaviour. Closes #10750.
                 """
     }
 }

From 5939fa409e4462e6fe9649ec1bf0d29759a3e1cd Mon Sep 17 00:00:00 2001
From: Gianluca Rossi <grossi@quantcast.com>
Date: Tue, 11 Aug 2015 22:27:25 +0100
Subject: [PATCH 08/12] BUG: Allow 'read_sql_table' to read from views. Solves
 #10750.

---
 doc/source/whatsnew/v0.17.0.txt | 6 ++----
 pandas/io/tests/test_sql.py     | 6 ++++++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 843cc32a8ab9b..e50deb689214a 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -143,6 +143,7 @@ Other enhancements
 - ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`).
 
 - ``pd.pivot`` will now allow passing index as ``None`` (:issue:`3962`).
+
 - ``drop_duplicates`` and ``duplicated`` now accept ``keep`` keyword to target first, last, and all duplicates. ``take_last`` keyword is deprecated, see :ref:`deprecations <whatsnew_0170.deprecations>` (:issue:`6511`, :issue:`8505`)
 
 .. ipython :: python
@@ -152,6 +153,7 @@ Other enhancements
    s.drop_duplicates(keep='last')
    s.drop_duplicates(keep=False)
 
+- ``read_sql_table`` will now allow reading from views (:issue:`10750`).
 
 - ``concat`` will now inherit the existing series names (even when some are missing), if new ones are not provided through the ``keys`` argument (:issue:`10698`).
 
@@ -554,7 +556,6 @@ Deprecations
   =====================  =================================
 
 - ``Categorical.name`` was deprecated to make ``Categorical`` more ``numpy.ndarray`` like. Use ``Series(cat, name="whatever")`` instead (:issue:`10482`).
-- ``drop_duplicates`` and ``duplicated``'s ``take_last`` keyword was removed in favor of ``keep``. (:issue:`6511`, :issue:`8505`)
 
 .. _whatsnew_0170.prior_deprecations:
 
@@ -641,9 +642,6 @@ Bug Fixes
 
 
 - Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`)
-- Bug in ``Categorical`` may not representing properly when category contains ``tz`` or ``Period`` (:issue:`10713`)
-- Bug in ``Categorical.__iter__`` may not returning correct ``datetime`` and ``Period`` (:issue:`10713`)
-
 
 - Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`).
 - Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`)
diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
index 859c6d3250121..402a7af9b6c62 100644
--- a/pandas/io/tests/test_sql.py
+++ b/pandas/io/tests/test_sql.py
@@ -161,6 +161,12 @@
                 SELECT * FROM iris WHERE
                 "Name"=%(name)s AND "SepalLength"=%(length)s
                 """
+    },
+    'create_view': {
+        'sqlite': """
+                CREATE VIEW iris_view AS
+                SELECT * FROM iris;
+                """
     }
 }
 

From 36acb79400ab755b271bc01d630eb2d38f49ea66 Mon Sep 17 00:00:00 2001
From: Gianluca Rossi <grossi@quantcast.com>
Date: Tue, 11 Aug 2015 22:46:30 +0100
Subject: [PATCH 09/12] # This is a combination of 5 commits. # The first
 commit's message is: Merge

# This is the 2nd commit message:

BUG: Categorical doesn't show tzinfo properly

# This is the 3rd commit message:

ENH: duplicated and drop_duplicates now accept take=all kw

# This is the 4th commit message:

TST: make assertion messages more understandable

# This is the 5th commit message:

Update install.rst

- Added hint regarding pip install on low memory machines.
- Added hint to python 3 version of pandas from distributon repos.
---
 doc/source/whatsnew/v0.17.0.txt | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 2344f81f291b4..4ac17367c7c2d 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -143,6 +143,15 @@ Other enhancements
 - ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`).
 
 - ``pd.pivot`` will now allow passing index as ``None`` (:issue:`3962`).
+- ``drop_duplicates`` and ``duplicated`` now accept ``keep`` keyword to target first, last, and all duplicates. ``take_last`` keyword is deprecated, see :ref:`deprecations <whatsnew_0170.deprecations>` (:issue:`6511`, :issue:`8505`)
+
+.. ipython :: python
+
+   s = pd.Series(['A', 'B', 'C', 'A', 'B', 'D'])
+   s.drop_duplicates()
+   s.drop_duplicates(keep='last')
+   s.drop_duplicates(keep=False)
+
 
 - ``drop_duplicates`` and ``duplicated`` now accept ``keep`` keyword to target first, last, and all duplicates. ``take_last`` keyword is deprecated, see :ref:`deprecations <whatsnew_0170.deprecations>` (:issue:`6511`, :issue:`8505`)
 
@@ -558,6 +567,7 @@ Deprecations
   =====================  =================================
 
 - ``Categorical.name`` was deprecated to make ``Categorical`` more ``numpy.ndarray`` like. Use ``Series(cat, name="whatever")`` instead (:issue:`10482`).
+- ``drop_duplicates`` and ``duplicated``'s ``take_last`` keyword was removed in favor of ``keep``. (:issue:`6511`, :issue:`8505`)
 
 .. _whatsnew_0170.prior_deprecations:
 
@@ -644,6 +654,9 @@ Bug Fixes
 
 
 - Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`)
+- Bug in ``Categorical`` may not representing properly when category contains ``tz`` or ``Period`` (:issue:`10713`)
+- Bug in ``Categorical.__iter__`` may not returning correct ``datetime`` and ``Period`` (:issue:`10713`)
+
 
 - Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`).
 - Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`)

From 1af51797b4c5b81db57d6f7d1878b4c55dd4bacc Mon Sep 17 00:00:00 2001
From: Gianluca Rossi <grossi@quantcast.com>
Date: Tue, 11 Aug 2015 22:49:01 +0100
Subject: [PATCH 10/12] Add whatsnew note

---
 doc/source/whatsnew/v0.17.0.txt | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 4ac17367c7c2d..29895867740ae 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -143,15 +143,8 @@ Other enhancements
 - ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`).
 
 - ``pd.pivot`` will now allow passing index as ``None`` (:issue:`3962`).
-- ``drop_duplicates`` and ``duplicated`` now accept ``keep`` keyword to target first, last, and all duplicates. ``take_last`` keyword is deprecated, see :ref:`deprecations <whatsnew_0170.deprecations>` (:issue:`6511`, :issue:`8505`)
-
-.. ipython :: python
-
-   s = pd.Series(['A', 'B', 'C', 'A', 'B', 'D'])
-   s.drop_duplicates()
-   s.drop_duplicates(keep='last')
-   s.drop_duplicates(keep=False)
 
+- ``read_sql_table`` will now allow reading from views (:issue:`10750`).
 
 - ``drop_duplicates`` and ``duplicated`` now accept ``keep`` keyword to target first, last, and all duplicates. ``take_last`` keyword is deprecated, see :ref:`deprecations <whatsnew_0170.deprecations>` (:issue:`6511`, :issue:`8505`)
 
@@ -567,7 +560,6 @@ Deprecations
   =====================  =================================
 
 - ``Categorical.name`` was deprecated to make ``Categorical`` more ``numpy.ndarray`` like. Use ``Series(cat, name="whatever")`` instead (:issue:`10482`).
-- ``drop_duplicates`` and ``duplicated``'s ``take_last`` keyword was removed in favor of ``keep``. (:issue:`6511`, :issue:`8505`)
 
 .. _whatsnew_0170.prior_deprecations:
 
@@ -654,9 +646,6 @@ Bug Fixes
 
 
 - Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`)
-- Bug in ``Categorical`` may not representing properly when category contains ``tz`` or ``Period`` (:issue:`10713`)
-- Bug in ``Categorical.__iter__`` may not returning correct ``datetime`` and ``Period`` (:issue:`10713`)
-
 
 - Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`).
 - Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`)

From 942cec2ab2623403b98c5731aa44ceca58acdea9 Mon Sep 17 00:00:00 2001
From: Gianluca Rossi <grossi@quantcast.com>
Date: Sat, 8 Aug 2015 22:14:34 +0100
Subject: [PATCH 11/12] Add ability to 'read_sql_table' to read views and
 implemnt unit test to check behaviour

---
 pandas/io/sql.py            |  2 +-
 pandas/io/tests/test_sql.py | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 8eefe4ba98876..b587ec128c016 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -337,7 +337,7 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
     from sqlalchemy.schema import MetaData
     meta = MetaData(con, schema=schema)
     try:
-        meta.reflect(only=[table_name])
+        meta.reflect(only=[table_name], views=True)
     except sqlalchemy.exc.InvalidRequestError:
         raise ValueError("Table %s not found" % table_name)
 
diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
index 859c6d3250121..434f8c4b71e85 100644
--- a/pandas/io/tests/test_sql.py
+++ b/pandas/io/tests/test_sql.py
@@ -161,6 +161,20 @@
                 SELECT * FROM iris WHERE
                 "Name"=%(name)s AND "SepalLength"=%(length)s
                 """
+    },
+    'create_view': {
+        'sqlite': """
+                CREATE VIEW iris_view AS
+                SELECT * FROM iris;
+                """,
+        'mysql': """
+                CREATE VIEW iris_view AS
+                SELECT * FROM iris;
+                """,
+        'postgresql': """
+                CREATE VIEW iris_view AS
+                SELECT * FROM iris;
+                """
     }
 }
 
@@ -244,6 +258,10 @@ def _load_iris_data(self):
             for row in r:
                 self._get_exec().execute(ins, row)
 
+    def _load_iris_view(self):
+        self.drop_table('iris_view')
+        self._get_exec().execute(SQL_STRINGS['create_view'][self.flavor])
+
     def _check_iris_loaded_frame(self, iris_frame):
         pytype = iris_frame.dtypes[0].type
         row = iris_frame.iloc[0]
@@ -482,6 +500,7 @@ class _TestSQLApi(PandasSQLTest):
     def setUp(self):
         self.conn = self.connect()
         self._load_iris_data()
+        self._load_iris_view()
         self._load_test1_data()
         self._load_test2_data()
         self._load_test3_data()
@@ -492,6 +511,11 @@ def test_read_sql_iris(self):
             "SELECT * FROM iris", self.conn)
         self._check_iris_loaded_frame(iris_frame)
 
+    def test_read_sql_view(self):
+        iris_frame = sql.read_sql_query(
+            "SELECT * FROM iris_view", self.conn)
+        self._check_iris_loaded_frame(iris_frame)
+
     def test_legacy_read_frame(self):
         with tm.assert_produces_warning(FutureWarning):
             iris_frame = sql.read_frame(

From 3d342785b470d8d00ca23b4d171787179bca146c Mon Sep 17 00:00:00 2001
From: Gianluca Rossi <grossi@quantcast.com>
Date: Wed, 12 Aug 2015 00:33:50 +0100
Subject: [PATCH 12/12] Add whatsnew note and remove redundant tests

---
 doc/source/whatsnew/v0.17.0.txt | 3 +++
 pandas/io/tests/test_sql.py     | 8 --------
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 70d616ca72c1b..0b65d4651d133 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -137,6 +137,7 @@ Other enhancements
 - ``.as_blocks`` will now take a ``copy`` optional argument to return a copy of the data, default is to copy (no change in behavior from prior versions), (:issue:`9607`)
 
 - ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`).
+
 - ``pd.read_stata`` will now read Stata 118 type files. (:issue:`9882`)
 
 - ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`).
@@ -152,6 +153,8 @@ Other enhancements
    s.drop_duplicates(keep=False)
 
 
+- ``read_sql_table`` will now allow reading from views (:issue:`10750`).
+
 .. _whatsnew_0170.api:
 
 .. _whatsnew_0170.api_breaking:
diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
index 434f8c4b71e85..c78d193124b76 100644
--- a/pandas/io/tests/test_sql.py
+++ b/pandas/io/tests/test_sql.py
@@ -164,14 +164,6 @@
     },
     'create_view': {
         'sqlite': """
-                CREATE VIEW iris_view AS
-                SELECT * FROM iris;
-                """,
-        'mysql': """
-                CREATE VIEW iris_view AS
-                SELECT * FROM iris;
-                """,
-        'postgresql': """
                 CREATE VIEW iris_view AS
                 SELECT * FROM iris;
                 """