From 164b1ce1e051385efa654ac997bb887899964c94 Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Sat, 6 Jul 2013 13:46:45 +0100 Subject: [PATCH 1/3] ENH col_level argument to melt --- pandas/core/reshape.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index e9d5fe124fc74..067398742912d 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -601,7 +601,7 @@ def _stack_multi_columns(frame, level=-1, dropna=True): def melt(frame, id_vars=None, value_vars=None, - var_name=None, value_name='value'): + var_name=None, value_name='value', col_level=None): """ "Unpivots" a DataFrame from wide format to long format, optionally leaving id variables set @@ -613,6 +613,7 @@ def melt(frame, id_vars=None, value_vars=None, value_vars : tuple, list, or ndarray var_name : scalar, if None uses frame.column.name or 'variable' value_name : scalar, default 'value' + col_level : scalar, if columns are a MultiIndex then use this level to melt Examples -------- @@ -652,6 +653,9 @@ def melt(frame, id_vars=None, value_vars=None, else: frame = frame.copy() + if col_level: # allow list? + frame.columns = frame.columns.get_level_values(col_level) # frame is a copy + if var_name is None: var_name = frame.columns.name if frame.columns.name is not None else 'variable' From 02f2c420a68010e74f38a35fe8dcb0cef5a39ba8 Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Sat, 6 Jul 2013 17:13:30 +0100 Subject: [PATCH 2/3] ENH MultiIndex columns with melt --- doc/source/release.rst | 1 + pandas/core/index.py | 4 ++- pandas/core/reshape.py | 65 ++++++++++++++++++++++++++---------- pandas/tests/test_index.py | 2 ++ pandas/tests/test_reshape.py | 18 ++++++++++ 5 files changed, 72 insertions(+), 18 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index f4d61e70e94b3..b827af2173412 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -77,6 +77,7 @@ pandas 0.12 to specify custom column names of the returned DataFrame (:issue:`3649`), thanks @hoechenberger. If ``var_name`` is not specified and ``dataframe.columns.name`` is not None, then this will be used as the ``var_name`` (:issue:`4144`). + Also support for MultiIndex columns. - clipboard functions use pyperclip (no dependencies on Windows, alternative dependencies offered for Linux) (:issue:`3837`). - Plotting functions now raise a ``TypeError`` before trying to plot anything diff --git a/pandas/core/index.py b/pandas/core/index.py index a3aa0804bcfe2..7b20d791c6593 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1653,7 +1653,9 @@ def get_level_values(self, level): num = self._get_level_number(level) unique_vals = self.levels[num] # .values labels = self.labels[num] - return unique_vals.take(labels) + values = unique_vals.take(labels) + values.name = self.names[num] + return values def format(self, space=2, sparsify=None, adjoin=True, names=False, na_rep='NaN', formatter=None): diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 067398742912d..b6ab308ccfa2f 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -617,24 +617,42 @@ def melt(frame, id_vars=None, value_vars=None, Examples -------- + >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, + 'B': {0: 1, 1: 3, 2: 5}, + 'C': {0: 2, 1: 4, 2: 6}}) + >>> df - A B C - a 1 2 - b 3 4 - c 5 6 + A B C + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 >>> melt(df, id_vars=['A'], value_vars=['B']) - A variable value - a B 1 - b B 3 - c B 5 + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 >>> melt(df, id_vars=['A'], value_vars=['B'], ... var_name='myVarname', value_name='myValname') - A myVarname myValname - a B 1 - b B 3 - c B 5 + A myVarname myValname + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> df.columns = [list('ABC'), list('DEF')] + + >>> melt(df, col_level=0, id_vars=['A'], value_vars=['B']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> melt(df, id_vars=[('A', 'D')], value_vars=[('B', 'E')]) + (A, D) variable_0 variable_1 value + 0 a B E 1 + 1 b B E 3 + 2 c B E 5 """ # TODO: what about the existing index? @@ -653,11 +671,17 @@ def melt(frame, id_vars=None, value_vars=None, else: frame = frame.copy() - if col_level: # allow list? + if col_level is not None: # allow list or other? frame.columns = frame.columns.get_level_values(col_level) # frame is a copy if var_name is None: - var_name = frame.columns.name if frame.columns.name is not None else 'variable' + if isinstance(frame.columns, MultiIndex): + if len(frame.columns.names) == len(set(frame.columns.names)): + var_name = frame.columns.names + else: + var_name = ['variable_%s' % i for i in range(len(frame.columns.names))] + else: + var_name = frame.columns.name if frame.columns.name is not None else 'variable' N, K = frame.shape K -= len(id_vars) @@ -666,11 +690,18 @@ def melt(frame, id_vars=None, value_vars=None, for col in id_vars: mdata[col] = np.tile(frame.pop(col).values, K) - mcolumns = id_vars + [var_name, value_name] + if isinstance(var_name, list): + mcolumns = id_vars + var_name + [value_name] + else: + mcolumns = id_vars + [var_name, value_name] mdata[value_name] = frame.values.ravel('F') - mdata[var_name] = np.asarray(frame.columns).repeat(N) - + if isinstance(frame.columns, MultiIndex): + for i, col in enumerate(var_name): + mdata[col] = np.asarray(frame.columns.get_level_values(i)).repeat(N) + else: # assume isinstance(frame.columns, Index): + mdata[var_name] = np.asarray(frame.columns).repeat(N) + return DataFrame(mdata, columns=mcolumns) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index d9808ab48ca41..33533104919db 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1029,6 +1029,8 @@ def test_get_level_values(self): expected = ['foo', 'foo', 'bar', 'baz', 'qux', 'qux'] self.assert_(np.array_equal(result, expected)) + self.assertEquals(result.name, 'first') + result = self.index.get_level_values('first') expected = self.index.get_level_values(0) self.assert_(np.array_equal(result, expected)) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 09c63746c8d4b..b24e097238a70 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -10,6 +10,7 @@ import nose from pandas import DataFrame +import pandas as pd from numpy import nan import numpy as np @@ -30,6 +31,12 @@ def setUp(self): self.var_name = 'var' self.value_name = 'val' + self.df1 = pd.DataFrame([[ 1.067683, -1.110463, 0.20867 ], + [-1.321405, 0.368915, -1.055342], + [-0.807333, 0.08298 , -0.873361]]) + self.df1.columns = [list('ABC'), list('abc')] + self.df1.columns.names = ['CAP', 'low'] + def test_default_col_names(self): result = melt(self.df) self.assertEqual(result.columns.tolist(), ['variable', 'value']) @@ -128,6 +135,17 @@ def test_custom_var_and_value_name(self): result20 = melt(self.df) self.assertEqual(result20.columns.tolist(), ['foo', 'value']) + def test_col_level(self): + res1 = melt(self.df1, col_level=0) + res2 = melt(self.df1, col_level='CAP') + self.assertEqual(res1.columns.tolist(), ['CAP', 'value']) + self.assertEqual(res1.columns.tolist(), ['CAP', 'value']) + + def test_multiindex(self): + res = pd.melt(self.df1) + self.assertEqual(res.columns.tolist(), ['CAP', 'low', 'value']) + + class TestConvertDummies(unittest.TestCase): def test_convert_dummies(self): df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', From 92fdeff679185b49591e3cf1ac1bbd2a88b93339 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 11 Jul 2013 12:06:27 -0400 Subject: [PATCH 3/3] CLN/ENH: clean up docstrings remove some cruft and generalize --- pandas/core/reshape.py | 65 ++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index b6ab308ccfa2f..1b3aa0f962e10 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -5,19 +5,20 @@ import numpy as np +import six + from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.categorical import Categorical from pandas.core.common import (notnull, _ensure_platform_int, _maybe_promote, - _maybe_upcast, isnull) + isnull) from pandas.core.groupby import (get_group_index, _compress_group_index, decons_group_index) import pandas.core.common as com import pandas.algos as algos -from pandas import lib -from pandas.core.index import MultiIndex, Index +from pandas.core.index import MultiIndex class ReshapeError(Exception): @@ -35,21 +36,26 @@ class _Unstacker(object): Examples -------- + >>> import pandas as pd + >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), + ... ('two', 'a'), ('two', 'b')]) + >>> s = pd.Series(np.arange(1.0, 5.0), index=index) >>> s - one a 1. - one b 2. - two a 3. - two b 4. + one a 1 + b 2 + two a 3 + b 4 + dtype: float64 >>> s.unstack(level=-1) a b - one 1. 2. - two 3. 4. + one 1 2 + two 3 4 >>> s.unstack(level=0) one two - a 1. 2. - b 3. 4. + a 1 2 + b 3 4 Returns ------- @@ -159,7 +165,7 @@ def get_result(self): values[j] = orig_values[i] else: index = index.take(self.unique_groups) - + return DataFrame(values, index=index, columns=columns) def get_new_values(self): @@ -617,9 +623,10 @@ def melt(frame, id_vars=None, value_vars=None, Examples -------- + >>> import pandas as pd >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, - 'B': {0: 1, 1: 3, 2: 5}, - 'C': {0: 2, 1: 4, 2: 6}}) + ... 'B': {0: 1, 1: 3, 2: 5}, + ... 'C': {0: 2, 1: 4, 2: 6}}) >>> df A B C @@ -632,7 +639,7 @@ def melt(frame, id_vars=None, value_vars=None, 0 a B 1 1 b B 3 2 c B 5 - + >>> melt(df, id_vars=['A'], value_vars=['B'], ... var_name='myVarname', value_name='myValname') A myVarname myValname @@ -679,9 +686,13 @@ def melt(frame, id_vars=None, value_vars=None, if len(frame.columns.names) == len(set(frame.columns.names)): var_name = frame.columns.names else: - var_name = ['variable_%s' % i for i in range(len(frame.columns.names))] + var_name = ['variable_%s' % i for i in + xrange(len(frame.columns.names))] else: - var_name = frame.columns.name if frame.columns.name is not None else 'variable' + var_name = [frame.columns.name if frame.columns.name is not None + else 'variable'] + if isinstance(var_name, six.string_types): + var_name = [var_name] N, K = frame.shape K -= len(id_vars) @@ -690,17 +701,12 @@ def melt(frame, id_vars=None, value_vars=None, for col in id_vars: mdata[col] = np.tile(frame.pop(col).values, K) - if isinstance(var_name, list): - mcolumns = id_vars + var_name + [value_name] - else: - mcolumns = id_vars + [var_name, value_name] + mcolumns = id_vars + var_name + [value_name] mdata[value_name] = frame.values.ravel('F') - if isinstance(frame.columns, MultiIndex): - for i, col in enumerate(var_name): - mdata[col] = np.asarray(frame.columns.get_level_values(i)).repeat(N) - else: # assume isinstance(frame.columns, Index): - mdata[var_name] = np.asarray(frame.columns).repeat(N) + for i, col in enumerate(var_name): + # asanyarray will keep the columns as an Index + mdata[col] = np.asanyarray(frame.columns.get_level_values(i)).repeat(N) return DataFrame(mdata, columns=mcolumns) @@ -718,13 +724,16 @@ def lreshape(data, groups, dropna=True, label=None): Examples -------- + >>> import pandas as pd + >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], + ... 'team': ['Red Sox', 'Yankees'], + ... 'year1': [2007, 2008], 'year2': [2008, 2008]}) >>> data hr1 hr2 team year1 year2 0 514 545 Red Sox 2007 2008 1 573 526 Yankees 2007 2008 - >>> pd.lreshape(data, {'year': ['year1', 'year2'], - 'hr': ['hr1', 'hr2']}) + >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) team hr year 0 Red Sox 514 2007 1 Yankees 573 2007