diff --git a/doc/source/whatsnew/v0.12.0.txt b/doc/source/whatsnew/v0.12.0.txt index c4188898bdf71..27aa47a6bb097 100644 --- a/doc/source/whatsnew/v0.12.0.txt +++ b/doc/source/whatsnew/v0.12.0.txt @@ -236,10 +236,10 @@ I/O Enhancements .. ipython:: python from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) - df.to_csv('mi.csv',tupleize_cols=False) + df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + df.to_csv('mi.csv') print(open('mi.csv').read()) - pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1],tupleize_cols=False) + pd.read_csv('mi.csv', header=[0,1,2,3], index_col=[0,1]) .. ipython:: python :suppress: diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 4d43660960597..8238cc32d7bb0 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -380,12 +380,29 @@ New Behavior For ease of creation of series of categorical data, we have added the ability to pass keywords when calling ``.astype()``. These are passed directly to the constructor. -.. ipython:: python - - s = Series(["a","b","c","a"]).astype('category',ordered=True) - s - s = Series(["a","b","c","a"]).astype('category',categories=list('abcdef'),ordered=False) - s +.. code-block:: python + + In [54]: s = Series(["a","b","c","a"]).astype('category',ordered=True) + + In [55]: s + Out[55]: + 0 a + 1 b + 2 c + 3 a + dtype: category + Categories (3, object): [a < b < c] + + In [56]: s = Series(["a","b","c","a"]).astype('category',categories=list('abcdef'),ordered=False) + + In [57]: s + Out[57]: + 0 a + 1 b + 2 c + 3 a + dtype: category + Categories (6, object): [a, b, c, d, e, f] .. _whatsnew_0160.api_breaking.other: diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index 1a3b8319aeb59..b1e8aa10457f8 100644 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -41,48 +41,94 @@ indexing with duplicates. This is a container around a ``Categorical`` (introduc and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1, setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``. -.. ipython :: python +.. code-block:: ipython + + In [1]: df = DataFrame({'A' : np.arange(6), + ...: 'B' : Series(list('aabbca')).astype('category', + ...: categories=list('cab')) + ...: }) + ...: + + In [2]: df + Out[2]: + A B + 0 0 a + 1 1 a + 2 2 b + 3 3 b + 4 4 c + 5 5 a + + In [3]: df.dtypes + Out[3]: + A int64 + B category + dtype: object + + In [4]: df.B.cat.categories + Out[4]: Index(['c', 'a', 'b'], dtype='object') - df = DataFrame({'A' : np.arange(6), - 'B' : Series(list('aabbca')).astype('category', - categories=list('cab')) - }) - df - df.dtypes - df.B.cat.categories setting the index, will create create a ``CategoricalIndex`` -.. ipython :: python +.. code-block:: ipython + + In [5]: df2 = df.set_index('B') - df2 = df.set_index('B') - df2.index + In [6]: df2.index + Out[6]: CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') indexing with ``__getitem__/.iloc/.loc/.ix`` works similarly to an Index with duplicates. The indexers MUST be in the category or the operation will raise. -.. ipython :: python +.. code-block:: ipython - df2.loc['a'] + In [7]: df2.loc['a'] + Out[7]: + A + B + a 0 + a 1 + a 5 and preserves the ``CategoricalIndex`` -.. ipython :: python +.. code-block:: ipython + + In [8]: df2.loc['a'].index + Out[8]: CategoricalIndex(['a', 'a', 'a'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') - df2.loc['a'].index sorting will order by the order of the categories -.. ipython :: python +.. code-block:: ipython - df2.sort_index() + In [9]: df2.sort_index() + Out[9]: + A + B + c 4 + a 0 + a 1 + a 5 + b 2 + b 3 groupby operations on the index will preserve the index nature as well -.. ipython :: python +.. code-block:: ipython + + In [10]: df2.groupby(level=0).sum() + Out[10]: + A + B + c 4 + a 6 + b 5 + + In [11]: df2.groupby(level=0).sum().index + Out[11]: CategoricalIndex(['c', 'a', 'b'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') - df2.groupby(level=0).sum() - df2.groupby(level=0).sum().index reindexing operations, will return a resulting index based on the type of the passed indexer, meaning that passing a list will return a plain-old-``Index``; indexing with @@ -90,12 +136,31 @@ a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the c of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with values NOT in the categories, similarly to how you can reindex ANY pandas index. -.. ipython :: python +.. code-block:: ipython - df2.reindex(['a','e']) - df2.reindex(['a','e']).index - df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))) - df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index + In [12]: df2.reindex(['a','e']) + Out[12]: + A + B + a 0.0 + a 1.0 + a 5.0 + e NaN + + In [13]: df2.reindex(['a','e']).index + Out[13]: Index(['a', 'a', 'a', 'e'], dtype='object', name='B') + + In [14]: df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))) + Out[14]: + A + B + a 0.0 + a 1.0 + a 5.0 + e NaN + + In [15]: df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index + Out[15]: CategoricalIndex(['a', 'a', 'a', 'e'], categories=['a', 'b', 'c', 'd', 'e'], ordered=False, name='B', dtype='category') See the :ref:`documentation ` for more. (:issue:`7629`, :issue:`10038`, :issue:`10039`) diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 7f74d8a769e4b..ca386da03295d 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -440,13 +440,23 @@ Previous behavior: New Behavior: -.. ipython:: python +.. code-block:: python # Output is a Series - df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum()) + In [55]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum()) + Out[55]: + date + 2000-10-31 10 + 2000-11-30 13 + Freq: M, dtype: int64 # Output is a DataFrame - df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum()) + In [56]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum()) + Out[56]: + value + date + 2000-10-31 10 + 2000-11-30 13 .. _whatsnew_0181.read_csv_exceptions: