From 432c67254edc09a3ef456047156c814e4b43fed1 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 29 Apr 2013 12:04:14 -0400
Subject: [PATCH 1/5] BUG: GH3468 Fix assigning a new index to a duplicate
 index in a DataFrame would fail

---
 RELEASE.rst                |  2 ++
 pandas/core/internals.py   | 15 ++++++++++-----
 pandas/tests/test_frame.py | 15 +++++++++++++++
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index f3fb98535cb61..e368b70b721ce 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -61,6 +61,7 @@ pandas 0.11.1
   - Fix regression in a DataFrame apply with axis=1, objects were not being converted back
     to base dtypes correctly (GH3480_)
   - Fix issue when storing uint dtypes in an HDFStore. (GH3493_)
+  - Fix assigning a new index to a duplicate index in a DataFrame would fail (GH3468_)
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
 .. _GH3251: https://github.com/pydata/pandas/issues/3251
@@ -75,6 +76,7 @@ pandas 0.11.1
 .. _GH3455: https://github.com/pydata/pandas/issues/3455
 .. _GH3457: https://github.com/pydata/pandas/issues/3457
 .. _GH3461: https://github.com/pydata/pandas/issues/3461
+.. _GH3468: https://github.com/pydata/pandas/issues/3468
 .. _GH3448: https://github.com/pydata/pandas/issues/3448
 .. _GH3449: https://github.com/pydata/pandas/issues/3449
 .. _GH3493: https://github.com/pydata/pandas/issues/3493
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 03cfd18f5afe5..2052b269a8165 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -56,11 +56,16 @@ def _gi(self, arg):
     @property
     def ref_locs(self):
         if self._ref_locs is None:
-            indexer = self.ref_items.get_indexer(self.items)
-            indexer = com._ensure_platform_int(indexer)
-            if (indexer == -1).any():
-                raise AssertionError('Some block items were not in block '
-                                     'ref_items')
+            ri = self.ref_items
+            if ri.is_unique:
+                indexer = ri.get_indexer(self.items)
+                indexer = com._ensure_platform_int(indexer)
+                if (indexer == -1).any():
+                    raise AssertionError('Some block items were not in block '
+                                         'ref_items')
+            else:
+                indexer = np.arange(len(ri))
+
             self._ref_locs = indexer
         return self._ref_locs
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 7bafed216b9b9..6b69de604818f 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -9201,6 +9201,21 @@ def test_assign_columns(self):
         assert_series_equal(self.frame['C'], frame['baz'])
         assert_series_equal(self.frame['hi'], frame['foo2'])
 
+    def test_assign_columns_with_dups(self):
+
+        # GH 3468 related
+        df = DataFrame([[1,2]], columns=['a','a'])
+        df.columns = ['a','a.1']
+
+        expected = DataFrame([[1,2]], columns=['a','a.1'])
+        assert_frame_equal(df, expected)
+
+        df = DataFrame([[1,2]], columns=['a','a'])
+        df.columns = ['b','b']
+
+        expected = DataFrame([[1,2]], columns=['b','b'])
+        assert_frame_equal(df, expected)
+
     def test_cast_internals(self):
         casted = DataFrame(self.frame._data, dtype=int)
         expected = DataFrame(self.frame._series, dtype=int)

From 4c756e207ac8b6fe4411176bda30e089b8b3c9cc Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 29 Apr 2013 16:09:03 -0400
Subject: [PATCH 2/5] ENH: support for having duplicative indices across blocks
 (dtypes)

BUG: fix construction of a DataFrame with duplicative indices
---
 RELEASE.rst                    |   6 ++
 pandas/core/internals.py       | 138 +++++++++++++++++++++++----------
 pandas/tests/test_frame.py     |  34 +++++++-
 pandas/tests/test_indexing.py  |   7 ++
 pandas/tests/test_internals.py |   2 +-
 5 files changed, 145 insertions(+), 42 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index e368b70b721ce..feb94053b5f73 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -62,6 +62,12 @@ pandas 0.11.1
     to base dtypes correctly (GH3480_)
   - Fix issue when storing uint dtypes in an HDFStore. (GH3493_)
   - Fix assigning a new index to a duplicate index in a DataFrame would fail (GH3468_)
+  - ref_locs support to allow duplicative indices across dtypes (GH3468_)
+  - Non-unique index support clarified (GH3468_)
+
+    - Fix assigning a new index to a duplicate index in a DataFrame would fail
+    - Fix construction of a DataFrame with a duplicate index
+    - ref_locs support to allow duplicative indices across dtypes
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
 .. _GH3251: https://github.com/pydata/pandas/issues/3251
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 2052b269a8165..5b690869708dd 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -56,15 +56,11 @@ def _gi(self, arg):
     @property
     def ref_locs(self):
         if self._ref_locs is None:
-            ri = self.ref_items
-            if ri.is_unique:
-                indexer = ri.get_indexer(self.items)
-                indexer = com._ensure_platform_int(indexer)
-                if (indexer == -1).any():
-                    raise AssertionError('Some block items were not in block '
-                                         'ref_items')
-            else:
-                indexer = np.arange(len(ri))
+            indexer = self.ref_items.get_indexer(self.items)
+            indexer = com._ensure_platform_int(indexer)
+            if (indexer == -1).any():
+                raise AssertionError('Some block items were not in block '
+                                     'ref_items')
 
             self._ref_locs = indexer
         return self._ref_locs
@@ -884,7 +880,7 @@ class BlockManager(object):
     -----
     This is *not* a public API class
     """
-    __slots__ = ['axes', 'blocks', '_known_consolidated', '_is_consolidated']
+    __slots__ = ['axes', 'blocks', '_known_consolidated', '_is_consolidated', '_ref_locs']
 
     def __init__(self, blocks, axes, do_integrity_check=True):
         self.axes = [_ensure_index(ax) for ax in axes]
@@ -920,11 +916,83 @@ def set_axis(self, axis, value):
         if len(value) != len(cur_axis):
             raise Exception('Length mismatch (%d vs %d)'
                             % (len(value), len(cur_axis)))
+
         self.axes[axis] = value
 
         if axis == 0:
-            for block in self.blocks:
-                block.set_ref_items(self.items, maybe_rename=True)
+            # unique, we can take
+            if cur_axis.is_unique:
+                for block in self.blocks:
+                    block.set_ref_items(self.items, maybe_rename=True)
+
+            # compute a duplicate indexer that we can use to take
+            # the new items from ref_items (in place of _ref_items)
+            else:
+                self.set_ref_locs(cur_axis)
+                for block in self.blocks:
+                    block.set_ref_items(self.items, maybe_rename=True)
+
+    def set_ref_locs(self, labels = None):
+        # if we have a non-unique index on this axis, set the indexers
+        # we need to set an absolute indexer for the blocks
+        # return the indexer if we are not unique
+        if labels is None:
+            labels = self.items
+
+        if labels.is_unique: 
+            return None
+
+        #### THIS IS POTENTIALLY VERY SLOW #####
+
+        # if we are already computed, then we are done
+        if getattr(self,'_ref_locs',None) is not None:
+            return self._ref_locs
+
+        blocks = self.blocks
+
+        # initialize
+        blockmap = dict()
+        for b in blocks:
+            arr = np.empty(len(b.items),dtype='int64')
+            arr.fill(-1)
+            b._ref_locs = arr
+
+            # add this block to the blockmap for each
+            # of the items in the block
+            for item in b.items:
+               if item not in blockmap:
+                   blockmap[item] = []
+               blockmap[item].append(b)
+
+        rl = np.empty(len(labels),dtype=object)
+        for i, item in enumerate(labels.values):
+
+            try:
+                block = blockmap[item].pop(0)
+            except:
+                raise Exception("not enough items in set_ref_locs")
+
+            indexer = np.arange(len(block.items))
+            mask = (block.items == item) & (block._ref_locs == -1)
+            if not mask.any():
+
+                # this case will catch a comparison of a index of tuples
+                mask = np.empty(len(block.items),dtype=bool)
+                mask.fill(False)
+                for j, (bitem, brl) in enumerate(zip(block.items,block._ref_locs)):
+                    mask[j] = bitem == item and brl == -1
+
+            indices = indexer[mask]
+            if len(indices):
+                idx = indices[0]
+            else:
+                raise Exception("already set too many items in set_ref_locs")
+
+            block._ref_locs[idx] = i
+            rl[i] = (block,idx)
+           
+        self._ref_locs = rl
+        return rl
 
     # make items read only for now
     def _get_items(self):
@@ -1392,26 +1460,11 @@ def iget(self, i):
         item = self.items[i]
         if self.items.is_unique:
             return self.get(item)
-        else:
-            # ugh
-            try:
-                inds, = (self.items == item).nonzero()
-            except AttributeError:  # MultiIndex
-                inds, = self.items.map(lambda x: x == item).nonzero()
-
-            _, block = self._find_block(item)
-
-            try:
-                binds, = (block.items == item).nonzero()
-            except AttributeError:  # MultiIndex
-                binds, = block.items.map(lambda x: x == item).nonzero()
 
-            for j, (k, b) in enumerate(zip(inds, binds)):
-                if i == k:
-                    return block.values[b]
-
-            raise Exception('Cannot have duplicate column names '
-                            'split across dtypes')
+        # compute the duplicative indexer if needed
+        ref_locs = self.set_ref_locs()
+        b, loc = ref_locs[i]
+        return b.values[loc]
 
     def get_scalar(self, tup):
         """
@@ -1587,6 +1640,8 @@ def _reindex_indexer_items(self, new_items, indexer, fill_value):
         # keep track of what items aren't found anywhere
         mask = np.zeros(len(item_order), dtype=bool)
 
+        new_axes = [new_items] + self.axes[1:]
+
         new_blocks = []
         for blk in self.blocks:
             blk_indexer = blk.items.get_indexer(item_order)
@@ -1610,7 +1665,7 @@ def _reindex_indexer_items(self, new_items, indexer, fill_value):
             new_blocks.append(na_block)
             new_blocks = _consolidate(new_blocks, new_items)
 
-        return BlockManager(new_blocks, [new_items] + self.axes[1:])
+        return BlockManager(new_blocks, new_axes)
 
     def reindex_items(self, new_items, copy=True, fill_value=np.nan):
         """
@@ -1624,6 +1679,7 @@ def reindex_items(self, new_items, copy=True, fill_value=np.nan):
 
         # TODO: this part could be faster (!)
         new_items, indexer = self.items.reindex(new_items)
+        new_axes = [new_items] + self.axes[1:]
 
         # could have so me pathological (MultiIndex) issues here
         new_blocks = []
@@ -1648,7 +1704,7 @@ def reindex_items(self, new_items, copy=True, fill_value=np.nan):
                 new_blocks.append(na_block)
                 new_blocks = _consolidate(new_blocks, new_items)
 
-        return BlockManager(new_blocks, [new_items] + self.axes[1:])
+        return BlockManager(new_blocks, new_axes)
 
     def _make_na_block(self, items, ref_items, fill_value=np.nan):
         # TODO: infer dtypes other than float64 from fill_value
@@ -1690,11 +1746,11 @@ def merge(self, other, lsuffix=None, rsuffix=None):
         this, other = self._maybe_rename_join(other, lsuffix, rsuffix)
 
         cons_items = this.items + other.items
-        consolidated = _consolidate(this.blocks + other.blocks, cons_items)
-
         new_axes = list(this.axes)
         new_axes[0] = cons_items
 
+        consolidated = _consolidate(this.blocks + other.blocks, cons_items)
+
         return BlockManager(consolidated, new_axes)
 
     def _maybe_rename_join(self, other, lsuffix, rsuffix, copydata=True):
@@ -1907,7 +1963,6 @@ def form_blocks(arrays, names, axes):
 
         na_block = make_block(block_values, extra_items, items)
         blocks.append(na_block)
-        blocks = _consolidate(blocks, items)
 
     return blocks
 
@@ -1958,9 +2013,6 @@ def _shape_compat(x):
 
     names, arrays = zip(*tuples)
 
-    # index may box values
-    items = ref_items[ref_items.isin(names)]
-
     first = arrays[0]
     shape = (len(arrays),) + _shape_compat(first)
 
@@ -1968,6 +2020,14 @@ def _shape_compat(x):
     for i, arr in enumerate(arrays):
         stacked[i] = _asarray_compat(arr)
 
+    # index may box values
+    if ref_items.is_unique:
+        items = ref_items[ref_items.isin(names)]
+    else:
+        items = _ensure_index([ n for n in names if n in ref_items ])
+        if len(items) != len(stacked):
+            raise Exception("invalid names passed _stack_arrays")
+
     return items, stacked
 
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 6b69de604818f..ee409c4a83256 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -9204,18 +9204,48 @@ def test_assign_columns(self):
     def test_assign_columns_with_dups(self):
 
         # GH 3468 related
+
+        # basic
         df = DataFrame([[1,2]], columns=['a','a'])
         df.columns = ['a','a.1']
-
+        str(df)
         expected = DataFrame([[1,2]], columns=['a','a.1'])
         assert_frame_equal(df, expected)
 
+        df = DataFrame([[1,2,3]], columns=['b','a','a'])
+        df.columns = ['b','a','a.1']
+        str(df)
+        expected = DataFrame([[1,2,3]], columns=['b','a','a.1'])
+        assert_frame_equal(df, expected)
+
+        # with a dup index
         df = DataFrame([[1,2]], columns=['a','a'])
         df.columns = ['b','b']
-
+        str(df)
         expected = DataFrame([[1,2]], columns=['b','b'])
         assert_frame_equal(df, expected)
 
+        # multi-dtype
+        df = DataFrame([[1,2,1.,2.,3.,'foo','bar']], columns=['a','a','b','b','d','c','c'])
+        df.columns = list('ABCDEFG')
+        str(df)
+        expected = DataFrame([[1,2,1.,2.,3.,'foo','bar']], columns=list('ABCDEFG'))
+        assert_frame_equal(df, expected)
+
+        # this is an error because we cannot disambiguate the dup columns
+        self.assertRaises(Exception, lambda x: DataFrame([[1,2,'foo','bar']], columns=['a','a','a','a']))
+
+        # dups across blocks
+        df_float  = DataFrame(np.random.randn(10, 3),dtype='float64')
+        df_int    = DataFrame(np.random.randn(10, 3),dtype='int64')
+        df_bool   = DataFrame(True,index=df_float.index,columns=df_float.columns)
+        df_object = DataFrame('foo',index=df_float.index,columns=df_float.columns)
+        df_dt     = DataFrame(Timestamp('20010101'),index=df_float.index,columns=df_float.columns)
+        df        = pan.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1)
+
+        result = df._data.set_ref_locs()
+        self.assert_(len(result) == len(df.columns))
+
     def test_cast_internals(self):
         casted = DataFrame(self.frame._data, dtype=int)
         expected = DataFrame(self.frame._series, dtype=int)
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
index 86cd0ef524b35..8e1ea569973a6 100644
--- a/pandas/tests/test_indexing.py
+++ b/pandas/tests/test_indexing.py
@@ -772,6 +772,13 @@ def test_dups_fancy_indexing(self):
         expected = Index(['b','a','a'])
         self.assert_(result.equals(expected))
 
+        # across dtypes
+        df = DataFrame([[1,2,1.,2.,3.,'foo','bar']], columns=list('aaaaaaa'))
+        result = DataFrame([[1,2,1.,2.,3.,'foo','bar']])
+        result.columns = list('aaaaaaa')
+        assert_frame_equal(df,result)
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py
index eec5f5632d36b..e25bd0de769a7 100644
--- a/pandas/tests/test_internals.py
+++ b/pandas/tests/test_internals.py
@@ -268,7 +268,7 @@ def test_duplicate_item_failure(self):
             b.ref_items = items
 
         mgr = BlockManager(blocks, [items, np.arange(N)])
-        self.assertRaises(Exception, mgr.iget, 1)
+        mgr.iget(1)
 
     def test_contains(self):
         self.assert_('a' in self.mgr)

From b4677c195f337224cd960fcb2d856ca122a98b5c Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Tue, 30 Apr 2013 13:13:30 -0400
Subject: [PATCH 3/5] BUG: enabled applymap to work (and updated
 internals/convert to use iget) when

     using a non-unique index (GH2786 for the warning and GH3230 for applymap)

TST: test for GH2194 (which is fixed)
---
 RELEASE.rst                |  6 ++++++
 pandas/core/frame.py       |  3 ---
 pandas/core/internals.py   | 25 +++++++++++++------------
 pandas/tests/test_frame.py | 28 +++++++++++++++++++++-------
 4 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index feb94053b5f73..38298fde12ff0 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -68,8 +68,14 @@ pandas 0.11.1
     - Fix assigning a new index to a duplicate index in a DataFrame would fail
     - Fix construction of a DataFrame with a duplicate index
     - ref_locs support to allow duplicative indices across dtypes
+      (GH2194_)
+    - applymap on a DataFrame with a non-unique index now works
+      (removed warning) (GH2786_), and fix (GH3230_)
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
+.. _GH2786: https://github.com/pydata/pandas/issues/2786
+.. _GH2194: https://github.com/pydata/pandas/issues/2194
+.. _GH3230: https://github.com/pydata/pandas/issues/3230
 .. _GH3251: https://github.com/pydata/pandas/issues/3251
 .. _GH3379: https://github.com/pydata/pandas/issues/3379
 .. _GH3480: https://github.com/pydata/pandas/issues/3480
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2cb7608c7aba6..8bfdee3b75170 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4261,9 +4261,6 @@ def infer(x):
             if com.is_datetime64_dtype(x):
                 x = lib.map_infer(x, lib.Timestamp)
             return lib.map_infer(x, func)
-        #GH2786
-        if not self.columns.is_unique:
-            raise ValueError("applymap does not support dataframes having duplicate column labels")
         return self.apply(infer)
 
     #----------------------------------------------------------------------
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 5b690869708dd..c874b061dd63d 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -165,6 +165,9 @@ def get(self, item):
         loc = self.items.get_loc(item)
         return self.values[loc]
 
+    def iget(self, i):
+        return self.values[i]
+
     def set(self, item, value):
         """
         Modify Block in-place with new item value
@@ -711,7 +714,7 @@ def convert(self, convert_dates = True, convert_numeric = True, copy = True):
         # attempt to create new type blocks
         blocks = []
         for i, c in enumerate(self.items):
-            values = self.get(c)
+            values = self.iget(i)
 
             values = com._possibly_convert_objects(values, convert_dates=convert_dates, convert_numeric=convert_numeric)
             values = _block_shape(values)
@@ -920,17 +923,14 @@ def set_axis(self, axis, value):
         self.axes[axis] = value
 
         if axis == 0:
-            # unique, we can take
-            if cur_axis.is_unique:
-                for block in self.blocks:
-                    block.set_ref_items(self.items, maybe_rename=True)
 
-            # compute a duplicate indexer that we can use to take
-            # the new items from ref_items (in place of _ref_items)
-            else:
+            # we have a non-unique index, so setup the ref_locs
+            if not cur_axis.is_unique:
                 self.set_ref_locs(cur_axis)
-                for block in self.blocks:
-                    block.set_ref_items(self.items, maybe_rename=True)
+
+            # take via ref_locs
+            for block in self.blocks:
+                block.set_ref_items(self.items, maybe_rename=True)
 
     def set_ref_locs(self, labels = None):
         # if we have a non-unique index on this axis, set the indexers
@@ -945,8 +945,9 @@ def set_ref_locs(self, labels = None):
         #### THIS IS POTENTIALLY VERY SLOW #####
 
         # if we are already computed, then we are done
-        if getattr(self,'_ref_locs',None) is not None:
-            return self._ref_locs
+        rl = getattr(self,'_ref_locs',None)
+        if rl is not None:
+            return rl
 
         blocks = self.blocks
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index ee409c4a83256..cb3799c28d0cf 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -7492,12 +7492,15 @@ def test_applymap(self):
         self.assert_(result.dtypes[0] == object)
 
         # GH2786
-        df = DataFrame(np.random.random((3,4)))
-        df.columns = ['a','a','a','a']
-        try:
-            df.applymap(str)
-        except ValueError as e:
-            self.assertTrue("support" in str(e))
+        df  = DataFrame(np.random.random((3,4)))
+        df2 = df.copy()
+        cols = ['a','a','a','a']
+        df.columns = cols
+
+        expected = df2.applymap(str)
+        expected.columns = cols
+        result = df.applymap(str)
+        assert_frame_equal(result,expected)
 
     def test_filter(self):
         # items
@@ -9201,7 +9204,7 @@ def test_assign_columns(self):
         assert_series_equal(self.frame['C'], frame['baz'])
         assert_series_equal(self.frame['hi'], frame['foo2'])
 
-    def test_assign_columns_with_dups(self):
+    def test_columns_with_dups(self):
 
         # GH 3468 related
 
@@ -9246,6 +9249,17 @@ def test_assign_columns_with_dups(self):
         result = df._data.set_ref_locs()
         self.assert_(len(result) == len(df.columns))
 
+        # testing iget
+        for i in range(len(df.columns)):
+             df.iloc[:,i]
+
+        # dup columns across dtype GH 2079/2194
+        vals = [[1, -1, 2.], [2, -2, 3.]] 
+        rs = DataFrame(vals, columns=['A', 'A', 'B']) 
+        xp = DataFrame(vals) 
+        xp.columns = ['A', 'A', 'B'] 
+        assert_frame_equal(rs, xp) 
+
     def test_cast_internals(self):
         casted = DataFrame(self.frame._data, dtype=int)
         expected = DataFrame(self.frame._series, dtype=int)

From b8382a3ca71f1c06d453b909005024a0ff7cab93 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Wed, 1 May 2013 20:23:43 -0400
Subject: [PATCH 4/5] BUG: GH3495 change core/format/CSVFormatter.save to allow
 generic way of dealing

     with columns duplicate or not
---
 RELEASE.rst                   |  10 +-
 pandas/core/format.py         |  35 ++---
 pandas/core/internals.py      | 232 ++++++++++++++++++++++------------
 pandas/tests/test_frame.py    |  34 +++--
 pandas/tests/test_indexing.py |   6 +
 5 files changed, 198 insertions(+), 119 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index 38298fde12ff0..1a86ac02b2f7e 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -61,16 +61,15 @@ pandas 0.11.1
   - Fix regression in a DataFrame apply with axis=1, objects were not being converted back
     to base dtypes correctly (GH3480_)
   - Fix issue when storing uint dtypes in an HDFStore. (GH3493_)
-  - Fix assigning a new index to a duplicate index in a DataFrame would fail (GH3468_)
-  - ref_locs support to allow duplicative indices across dtypes (GH3468_)
   - Non-unique index support clarified (GH3468_)
 
-    - Fix assigning a new index to a duplicate index in a DataFrame would fail
+    - Fix assigning a new index to a duplicate index in a DataFrame would fail (GH3468_)
     - Fix construction of a DataFrame with a duplicate index
-    - ref_locs support to allow duplicative indices across dtypes
-      (GH2194_)
+    - ref_locs support to allow duplicative indices across dtypes,
+      allows iget support to always find the index (even across dtypes) (GH2194_)
     - applymap on a DataFrame with a non-unique index now works
       (removed warning) (GH2786_), and fix (GH3230_)
+    - Fix to_csv to handle non-unique columns (GH3495_)
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
 .. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -91,6 +90,7 @@ pandas 0.11.1
 .. _GH3468: https://github.com/pydata/pandas/issues/3468
 .. _GH3448: https://github.com/pydata/pandas/issues/3448
 .. _GH3449: https://github.com/pydata/pandas/issues/3449
+.. _GH3495: https://github.com/pydata/pandas/issues/3495
 .. _GH3493: https://github.com/pydata/pandas/issues/3493
 
 
diff --git a/pandas/core/format.py b/pandas/core/format.py
index 5b68b26a41b77..fa2135bb4310c 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -820,21 +820,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
         self.blocks = self.obj._data.blocks
         ncols = sum(len(b.items) for b in self.blocks)
         self.data =[None] * ncols
-
-        if self.obj.columns.is_unique:
-            self.colname_map = dict((k,i) for i,k in  enumerate(self.obj.columns))
-        else:
-            ks = [set(x.items) for x in self.blocks]
-            u = len(reduce(lambda a,x: a.union(x),ks,set()))
-            t = sum(map(len,ks))
-            if u != t:
-                if len(set(self.cols)) != len(self.cols):
-                    raise NotImplementedError("duplicate columns with differing dtypes are unsupported")
-            else:
-                # if columns are not unique and we acces this,
-                # we're doing it wrong
-                pass
-
+        self.column_map = self.obj._data.get_items_map()
 
         if chunksize is None:
             chunksize = (100000/ (len(self.cols) or 1)) or 1
@@ -1034,18 +1020,13 @@ def _save_chunk(self, start_i, end_i):
 
         # create the data for a chunk
         slicer = slice(start_i,end_i)
-        if self.obj.columns.is_unique:
-            for i in range(len(self.blocks)):
-                b = self.blocks[i]
-                d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
-                for j, k in enumerate(b.items):
-                    # self.data is a preallocated list
-                    self.data[self.colname_map[k]] = d[j]
-        else:
-            # self.obj should contain a proper view of the dataframes
-            # with the specified ordering of cols if cols was specified
-            for i in range(len(self.obj.columns)):
-                self.data[i] = self.obj.icol(i).values[slicer].tolist()
+        for i in range(len(self.blocks)):
+            b = self.blocks[i]
+            d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
+            for i, item in enumerate(b.items):
+
+                # self.data is a preallocated list
+                self.data[self.column_map[b][i]] = d[i]
 
         ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index c874b061dd63d..5c0f9253beb62 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -65,6 +65,11 @@ def ref_locs(self):
             self._ref_locs = indexer
         return self._ref_locs
 
+    def set_ref_locs(self, placement):
+        """ explicity set the ref_locs indexer, only necessary for duplicate indicies """
+        if placement is not None:
+            self._ref_locs = np.array(placement,dtype='int64')
+
     def set_ref_items(self, ref_items, maybe_rename=True):
         """
         If maybe_rename=True, need to set the items for this guy
@@ -883,7 +888,7 @@ class BlockManager(object):
     -----
     This is *not* a public API class
     """
-    __slots__ = ['axes', 'blocks', '_known_consolidated', '_is_consolidated', '_ref_locs']
+    __slots__ = ['axes', 'blocks', '_known_consolidated', '_is_consolidated', '_ref_locs', '_items_map']
 
     def __init__(self, blocks, axes, do_integrity_check=True):
         self.axes = [_ensure_index(ax) for ax in axes]
@@ -901,6 +906,10 @@ def __init__(self, blocks, axes, do_integrity_check=True):
 
         self._consolidate_check()
 
+        # we have a duplicate items index, setup the block maps
+        if not self.items.is_unique:
+            self._set_ref_locs(do_refs=True)
+
     @classmethod
     def make_empty(self):
         return BlockManager([], [[], []])
@@ -924,76 +933,135 @@ def set_axis(self, axis, value):
 
         if axis == 0:
 
-            # we have a non-unique index, so setup the ref_locs
-            if not cur_axis.is_unique:
-                self.set_ref_locs(cur_axis)
+            # set/reset ref_locs based on the current index
+            # and map the new index if needed
+            self._set_ref_locs(labels=cur_axis)
 
             # take via ref_locs
             for block in self.blocks:
                 block.set_ref_items(self.items, maybe_rename=True)
 
-    def set_ref_locs(self, labels = None):
-        # if we have a non-unique index on this axis, set the indexers
-        # we need to set an absolute indexer for the blocks
-        # return the indexer if we are not unique
+            # set/reset ref_locs based on the new index
+            self._set_ref_locs(labels=value, do_refs=True)
+
+    def _set_ref_locs(self, labels=None, do_refs=False):
+        """
+        if we have a non-unique index on this axis, set the indexers
+        we need to set an absolute indexer for the blocks
+        return the indexer if we are not unique
+        
+        labels : the (new) labels for this manager
+        ref    : boolean, whether to set the labels (one a 1-1 mapping)
+
+        """
+
+        im = None
         if labels is None:
             labels = self.items
+        else:
+            _ensure_index(labels)
 
-        if labels.is_unique: 
-            return None
+        # we are unique, and coming from a unique
+        if labels.is_unique and not do_refs:
 
-        #### THIS IS POTENTIALLY VERY SLOW #####
+            # reset our ref locs
+            self._ref_locs = None
+            for b in self.blocks:
+                b._ref_locs = None
 
-        # if we are already computed, then we are done
-        rl = getattr(self,'_ref_locs',None)
-        if rl is not None:
-            return rl
+            return None
 
-        blocks = self.blocks
+        # we are going to a non-unique index
+        # we have ref_locs on the block at this point
+        #   or if ref_locs are not set, then we must assume a block
+        #   ordering
+        if not labels.is_unique and do_refs:
+
+            # create the items map
+            im = getattr(self,'_items_map',None)
+            if im is None:
+
+                im = dict()
+                def maybe_create_block(block):
+                    try:
+                        return d[block]
+                    except:
+                        im[block] = l = [ None ] * len(block.items)
+                    return l
+
+                count_items = 0
+                for block in self.blocks:
+
+                    # if we have a duplicate index but
+                    # _ref_locs have not been set....then
+                    # have to assume ordered blocks are passed
+                    num_items = len(block.items)
+                    try:
+                        rl = block.ref_locs
+                    except:
+                        rl = np.arange(num_items) + count_items
+
+                    m = maybe_create_block(block)
+                    for i, item in enumerate(block.items):
+                        m[i] = rl[i]
+                    count_items += num_items
+
+                self._items_map = im
+
+            # create the _ref_loc map here
+            rl = np.empty(len(labels),dtype=object)
+            for block, items in im.items():
+                for i, loc in enumerate(items):
+                    rl[loc] = (block,i)
+            self._ref_locs = rl
+            return rl
 
-        # initialize
-        blockmap = dict()
-        for b in blocks:
-            arr = np.empty(len(b.items),dtype='int64')
-            arr.fill(-1)
-            b._ref_locs = arr
+        # return our cached _ref_locs (or will compute again 
+        # when we recreate the block manager if needed
+        return getattr(self,'_ref_locs',None)
 
-            # add this block to the blockmap for each
-            # of the items in the block
-            for item in b.items:
-               if item not in blockmap:
-                   blockmap[item] = []
-               blockmap[item].append(b)
+    def get_items_map(self):
+        """ 
+        return an inverted ref_loc map for an item index
+        block -> item (in that block) location -> column location
+        """
 
-        rl = np.empty(len(labels),dtype=object)
-        for i, item in enumerate(labels.values):
+        # cache check
+        im = getattr(self,'_items_map',None)
+        if im is not None:
+            return im
+        
+        im = dict()
+        rl = self._set_ref_locs()
 
+        def maybe_create_block(block):
             try:
-                block = blockmap[item].pop(0)
+                return im[block]
             except:
-                raise Exception("not enough items in set_ref_locs")
+                im[block] = l = [ None ] * len(block.items)
+            return l
 
-            indexer = np.arange(len(block.items))
-            mask = (block.items == item) & (block._ref_locs == -1)
-            if not mask.any():
+        # we have a non-duplicative index
+        if rl is None:
 
-                # this case will catch a comparison of a index of tuples
-                mask = np.empty(len(block.items),dtype=bool)
-                mask.fill(False)
-                for j, (bitem, brl) in enumerate(zip(block.items,block._ref_locs)):
-                    mask[j] = bitem == item and brl == -1
+            axis = self.axes[0]
+            for block in self.blocks:
 
-            indices = indexer[mask]
-            if len(indices):
-                idx = indices[0]
-            else:
-                raise Exception("already set too many items in set_ref_locs")
+                m = maybe_create_block(block)
+                for i, item in enumerate(block.items):
+                    m[i] = axis.get_loc(item)
+
+
+        # use the ref_locs to construct the map
+        else:
 
-            block._ref_locs[idx] = i
-            rl[i] = (block,idx)
-           
-        self._ref_locs = rl
-        return rl
+            for i, (block, idx) in enumerate(rl):
+                
+                m = maybe_create_block(block)
+                m[idx] = i
+
+        self._items_map = im
+        return im
 
     # make items read only for now
     def _get_items(self):
@@ -1259,13 +1327,16 @@ def get_slice(self, slobj, axis=0, raise_on_error=False):
                                   new_items, 
                                   klass=blk.__class__,
                                   fastpath=True)
+                newb.set_ref_locs(blk._ref_locs)
                 new_blocks = [newb]
             else:
                 return self.reindex_items(new_items)
         else:
             new_blocks = self._slice_blocks(slobj, axis)
 
-        return BlockManager(new_blocks, new_axes, do_integrity_check=False)
+        bm = BlockManager(new_blocks, new_axes, do_integrity_check=False)
+        bm._consolidate_inplace()
+        return bm
 
     def _slice_blocks(self, slobj, axis):
         new_blocks = []
@@ -1280,6 +1351,7 @@ def _slice_blocks(self, slobj, axis):
                               block.ref_items, 
                               klass=block.__class__,
                               fastpath=True)
+            newb.set_ref_locs(block._ref_locs)
             new_blocks.append(newb)
         return new_blocks
 
@@ -1463,9 +1535,9 @@ def iget(self, i):
             return self.get(item)
 
         # compute the duplicative indexer if needed
-        ref_locs = self.set_ref_locs()
+        ref_locs = self._set_ref_locs()
         b, loc = ref_locs[i]
-        return b.values[loc]
+        return b.iget(loc)
 
     def get_scalar(self, tup):
         """
@@ -1904,54 +1976,55 @@ def form_blocks(arrays, names, axes):
     bool_items = []
     object_items = []
     datetime_items = []
-    for k, v in zip(names, arrays):
+    for i, (k, v) in enumerate(zip(names, arrays)):
         if issubclass(v.dtype.type, np.floating):
-            float_items.append((k, v))
+            float_items.append((i, k, v))
         elif issubclass(v.dtype.type, np.complexfloating):
-            complex_items.append((k, v))
+            complex_items.append((i, k, v))
         elif issubclass(v.dtype.type, np.datetime64):
             if v.dtype != _NS_DTYPE:
                 v = tslib.cast_to_nanoseconds(v)
 
             if hasattr(v, 'tz') and v.tz is not None:
-                object_items.append((k, v))
+                object_items.append((i, k, v))
             else:
-                datetime_items.append((k, v))
+                datetime_items.append((i, k, v))
         elif issubclass(v.dtype.type, np.integer):
             if v.dtype == np.uint64:
                 # HACK #2355 definite overflow
                 if (v > 2 ** 63 - 1).any():
-                    object_items.append((k, v))
+                    object_items.append((i, k, v))
                     continue
-            int_items.append((k, v))
+            int_items.append((i, k, v))
         elif v.dtype == np.bool_:
-            bool_items.append((k, v))
+            bool_items.append((i, k, v))
         else:
-            object_items.append((k, v))
+            object_items.append((i, k, v))
 
+    is_unique = items.is_unique
     blocks = []
     if len(float_items):
-        float_blocks = _multi_blockify(float_items, items)
+        float_blocks = _multi_blockify(float_items, items, is_unique=is_unique)
         blocks.extend(float_blocks)
 
     if len(complex_items):
-        complex_blocks = _simple_blockify(complex_items, items, np.complex128)
+        complex_blocks = _simple_blockify(complex_items, items, np.complex128, is_unique=is_unique)
         blocks.extend(complex_blocks)
 
     if len(int_items):
-        int_blocks = _multi_blockify(int_items, items)
+        int_blocks = _multi_blockify(int_items, items, is_unique=is_unique)
         blocks.extend(int_blocks)
 
     if len(datetime_items):
-        datetime_blocks = _simple_blockify(datetime_items, items, _NS_DTYPE)
+        datetime_blocks = _simple_blockify(datetime_items, items, _NS_DTYPE, is_unique=is_unique)
         blocks.extend(datetime_blocks)
 
     if len(bool_items):
-        bool_blocks = _simple_blockify(bool_items, items, np.bool_)
+        bool_blocks = _simple_blockify(bool_items, items, np.bool_, is_unique=is_unique)
         blocks.extend(bool_blocks)
 
     if len(object_items) > 0:
-        object_blocks = _simple_blockify(object_items, items, np.object_)
+        object_blocks = _simple_blockify(object_items, items, np.object_, is_unique=is_unique)
         blocks.extend(object_blocks)
 
     if len(extra_items):
@@ -1959,7 +2032,6 @@ def form_blocks(arrays, names, axes):
 
         # empty items -> dtype object
         block_values = np.empty(shape, dtype=object)
-
         block_values.fill(nan)
 
         na_block = make_block(block_values, extra_items, items)
@@ -1968,28 +2040,32 @@ def form_blocks(arrays, names, axes):
     return blocks
 
 
-def _simple_blockify(tuples, ref_items, dtype):
+def _simple_blockify(tuples, ref_items, dtype, is_unique=True):
     """ return a single array of a block that has a single dtype; if dtype is not None, coerce to this dtype """
-    block_items, values = _stack_arrays(tuples, ref_items, dtype)
+    block_items, values, placement = _stack_arrays(tuples, ref_items, dtype)
 
     # CHECK DTYPE?
     if dtype is not None and values.dtype != dtype:  # pragma: no cover
         values = values.astype(dtype)
 
-    return [ make_block(values, block_items, ref_items) ]
-
+    block = make_block(values, block_items, ref_items)
+    if not is_unique:
+        block.set_ref_locs(placement)
+    return [ block ]
 
-def _multi_blockify(tuples, ref_items, dtype = None):
+def _multi_blockify(tuples, ref_items, dtype = None, is_unique=True):
     """ return an array of blocks that potentially have different dtypes """
 
     # group by dtype
-    grouper = itertools.groupby(tuples, lambda x: x[1].dtype)
+    grouper = itertools.groupby(tuples, lambda x: x[2].dtype)
 
     new_blocks = []
     for dtype, tup_block in grouper:
 
-        block_items, values = _stack_arrays(list(tup_block), ref_items, dtype)
+        block_items, values, placement = _stack_arrays(list(tup_block), ref_items, dtype)
         block = make_block(values, block_items, ref_items)
+        if not is_unique:
+            block.set_ref_locs(placement)
         new_blocks.append(block)
 
     return new_blocks
@@ -2012,7 +2088,7 @@ def _shape_compat(x):
         else:
             return x.shape
 
-    names, arrays = zip(*tuples)
+    placement, names, arrays = zip(*tuples)
 
     first = arrays[0]
     shape = (len(arrays),) + _shape_compat(first)
@@ -2029,7 +2105,7 @@ def _shape_compat(x):
         if len(items) != len(stacked):
             raise Exception("invalid names passed _stack_arrays")
 
-    return items, stacked
+    return items, stacked, placement
 
 
 def _blocks_to_series_dict(blocks, index=None):
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index cb3799c28d0cf..69225c40e36df 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -4973,17 +4973,33 @@ def test_to_csv_dups_cols(self):
 
         with ensure_clean() as filename:
             df.to_csv(filename) # single dtype, fine
+            result = read_csv(filename,index_col=0)
+            result.columns = df.columns
+            assert_frame_equal(result,df)
 
-        df_float  = DataFrame(np.random.randn(1000, 30),dtype='float64')
-        df_int    = DataFrame(np.random.randn(1000, 30),dtype='int64')
-        df_bool   = DataFrame(True,index=df_float.index,columns=df_float.columns)
-        df_object = DataFrame('foo',index=df_float.index,columns=df_float.columns)
-        df_dt     = DataFrame(Timestamp('20010101'),index=df_float.index,columns=df_float.columns)
-        df        = pan.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1)
+        df_float  = DataFrame(np.random.randn(1000, 3),dtype='float64')
+        df_int    = DataFrame(np.random.randn(1000, 3),dtype='int64')
+        df_bool   = DataFrame(True,index=df_float.index,columns=range(3))
+        df_object = DataFrame('foo',index=df_float.index,columns=range(3))
+        df_dt     = DataFrame(Timestamp('20010101'),index=df_float.index,columns=range(3))
+        df        = pan.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1, ignore_index=True)
 
-        #### this raises because we have duplicate column names across dtypes ####
+        cols = []
+        for i in range(5):
+            cols.extend([0,1,2])
+        df.columns = cols
+
+        from pandas import to_datetime
         with ensure_clean() as filename:
-            self.assertRaises(Exception, df.to_csv, filename)
+            df.to_csv(filename)
+            result = read_csv(filename,index_col=0)
+          
+            # date cols
+            for i in ['0.4','1.4','2.4']:
+                 result[i] = to_datetime(result[i])
+
+            result.columns = df.columns
+            assert_frame_equal(result,df)
 
         # GH3457
         from pandas.util.testing import makeCustomDataframe as mkdf
@@ -9246,7 +9262,7 @@ def test_columns_with_dups(self):
         df_dt     = DataFrame(Timestamp('20010101'),index=df_float.index,columns=df_float.columns)
         df        = pan.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1)
 
-        result = df._data.set_ref_locs()
+        result = df._data._set_ref_locs()
         self.assert_(len(result) == len(df.columns))
 
         # testing iget
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
index 8e1ea569973a6..ae71ec8b35422 100644
--- a/pandas/tests/test_indexing.py
+++ b/pandas/tests/test_indexing.py
@@ -774,8 +774,14 @@ def test_dups_fancy_indexing(self):
 
         # across dtypes
         df = DataFrame([[1,2,1.,2.,3.,'foo','bar']], columns=list('aaaaaaa'))
+        df.head()
+        str(df)
         result = DataFrame([[1,2,1.,2.,3.,'foo','bar']])
         result.columns = list('aaaaaaa')
+
+        df_v  = df.iloc[:,4]
+        res_v = result.iloc[:,4]
+
         assert_frame_equal(df,result)
 
 

From 8c08acaef77e3b901dcbe09b612de785b0c5e782 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Thu, 2 May 2013 10:11:09 -0400
Subject: [PATCH 5/5] PERF: allow a cache_readonly to be 'set' if
 allow_settings is passed on the decoration

      useful when specifiying an index that is **known** to be unique (e.g. in the case
      of a default range index)
---
 pandas/core/common.py     |  1 +
 pandas/core/index.py      |  2 +-
 pandas/src/properties.pyx | 35 +++++++++++++++++++++++++----------
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index e6ce9fc5fc925..490f269c8c104 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -1156,6 +1156,7 @@ def _default_index(n):
     values = np.arange(n, dtype=np.int64)
     result = values.view(Int64Index)
     result.name = None
+    result.is_unique = True
     return result
 
 
diff --git a/pandas/core/index.py b/pandas/core/index.py
index 34edd26a49617..101b69ffc3c7e 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -278,7 +278,7 @@ def is_monotonic(self):
     def is_lexsorted_for_tuple(self, tup):
         return True
 
-    @cache_readonly
+    @cache_readonly(allow_setting=True)
     def is_unique(self):
         return self._engine.is_unique
 
diff --git a/pandas/src/properties.pyx b/pandas/src/properties.pyx
index 53bb561ef9110..1df11cecf7b94 100644
--- a/pandas/src/properties.pyx
+++ b/pandas/src/properties.pyx
@@ -4,16 +4,20 @@ from cpython cimport PyDict_Contains, PyDict_GetItem, PyDict_GetItem
 cdef class cache_readonly(object):
 
     cdef readonly:
-        object fget, name
+        object func, name, allow_setting
 
-    def __init__(self, func):
-        self.fget = func
-        self.name = func.__name__
+    def __init__(self, func=None, allow_setting=False):
+        if func is not None:
+            self.func = func
+            self.name = func.__name__
+        self.allow_setting = allow_setting
 
-    def __get__(self, obj, type):
-        if obj is None:
-            return self.fget
+    def __call__(self, func, doc=None):
+        self.func = func
+        self.name = func.__name__
+        return self
 
+    def __get__(self, obj, typ):
         # Get the cache or set a default one if needed
 
         cache = getattr(obj, '_cache', None)
@@ -23,12 +27,23 @@ cdef class cache_readonly(object):
         if PyDict_Contains(cache, self.name):
             # not necessary to Py_INCREF
             val = <object> PyDict_GetItem(cache, self.name)
-            return val
         else:
-            val = self.fget(obj)
+            val = self.func(obj)
             PyDict_SetItem(cache, self.name, val)
-            return val
+        return val
+
+    def __set__(self, obj, value):
+
+        if not self.allow_setting:
+            raise Exception("cannot set values for [%s]" % self.name)
+
+        # Get the cache or set a default one if needed
+        cache = getattr(obj, '_cache', None)
+        if cache is None:
+            cache = obj._cache = {}
 
+        PyDict_SetItem(cache, self.name, value)
+            
 cdef class AxisProperty(object):
     cdef:
         Py_ssize_t axis