diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst index 6ef2a61228ad2..ecb062c7d3680 100644 --- a/doc/source/whatsnew/v0.14.0.rst +++ b/doc/source/whatsnew/v0.14.0.rst @@ -5,11 +5,6 @@ v0.14.0 (May 31 , 2014) {{ header }} -.. ipython:: python - :suppress: - - from pandas import * # noqa F401, F403 - This is a major release from 0.13.1 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all @@ -64,20 +59,20 @@ API changes .. ipython:: python - dfl = DataFrame(np.random.randn(5,2),columns=list('AB')) + dfl = pd.DataFrame(np.random.randn(5, 2), columns=list('AB')) dfl - dfl.iloc[:,2:3] - dfl.iloc[:,1:3] + dfl.iloc[:, 2:3] + dfl.iloc[:, 1:3] dfl.iloc[4:6] These are out-of-bounds selections .. code-block:: python - dfl.iloc[[4,5,6]] + >>> dfl.iloc[[4, 5, 6]] IndexError: positional indexers are out-of-bounds - dfl.iloc[:,4] + >>> dfl.iloc[:, 4] IndexError: single positional indexer is out-of-bounds - Slicing with negative start, stop & step values handles corner cases better (:issue:`6531`): @@ -120,8 +115,8 @@ API changes .. ipython:: python - i = pd.Index([1, 2, 3, 'a' , 'b', 'c']) - i[[0,1,2]] + i = pd.Index([1, 2, 3, 'a', 'b', 'c']) + i[[0, 1, 2]] i.drop(['a', 'b', 'c']) Previously, the above operation would return ``Int64Index``. If you'd like @@ -129,7 +124,7 @@ API changes .. ipython:: python - i[[0,1,2]].astype(np.int_) + i[[0, 1, 2]].astype(np.int_) - ``set_index`` no longer converts MultiIndexes to an Index of tuples. For example, the old behavior returned an Index in this case (:issue:`6459`): @@ -140,9 +135,9 @@ API changes np.random.seed(1234) from itertools import product tuples = list(product(('a', 'b'), ('c', 'd'))) - mi = MultiIndex.from_tuples(tuples) - df_multi = DataFrame(np.random.randn(4, 2), index=mi) - tuple_ind = pd.Index(tuples,tupleize_cols=False) + mi = pd.MultiIndex.from_tuples(tuples) + df_multi = pd.DataFrame(np.random.randn(4, 2), index=mi) + tuple_ind = pd.Index(tuples, tupleize_cols=False) df_multi.index .. ipython:: python @@ -180,9 +175,13 @@ API changes .. code-block:: ipython - In [1]: df = DataFrame(np.random.randn(10,4),columns=list('ABCD')) + In [1]: df = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD')) + + In [4]: covs = pd.rolling_cov(df[['A', 'B', 'C']], + ....: df[['B', 'C', 'D']], + ....: 5, + ....: pairwise=True) - In [4]: covs = pd.rolling_cov(df[['A','B','C']], df[['B','C','D']], 5, pairwise=True) In [5]: covs[df.index[-1]] Out[5]: @@ -214,11 +213,12 @@ API changes .. code-block:: python - x = pd.Series(np.random.rand(10) > 0.5) - y = True - x + y # warning generated: should do x | y instead - x / y # this raises because it doesn't make sense - + >>> x = pd.Series(np.random.rand(10) > 0.5) + >>> y = True + >>> x + y # warning generated: should do x | y instead + UserWarning: evaluating in Python space because the '+' operator is not + supported by numexpr for the bool dtype, use '|' instead + >>> x / y # this raises because it doesn't make sense NotImplementedError: operator '/' not implemented for bool dtypes - In ``HDFStore``, ``select_as_multiple`` will always raise a ``KeyError``, when a key or the selector is not found (:issue:`6177`) @@ -274,17 +274,19 @@ Display Changes .. ipython:: python - dfd = pd.DataFrame(np.arange(25).reshape(-1,5), index=[0,1,2,3,4], columns=[0,1,2,3,4]) + dfd = pd.DataFrame(np.arange(25).reshape(-1, 5), + index=[0, 1, 2, 3, 4], + columns=[0, 1, 2, 3, 4]) # show dimensions since this is truncated with pd.option_context('display.max_rows', 2, 'display.max_columns', 2, 'display.show_dimensions', 'truncate'): - print(dfd) + print(dfd) # will not show dimensions since it is not truncated with pd.option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.show_dimensions', 'truncate'): - print(dfd) + print(dfd) - Regression in the display of a MultiIndexed Series with ``display.max_rows`` is less than the length of the series (:issue:`7101`) @@ -347,7 +349,7 @@ More consistent behaviour for some groupby methods: .. ipython:: python - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) g = df.groupby('A') g.nth(0) @@ -361,7 +363,7 @@ More consistent behaviour for some groupby methods: .. ipython:: python - gf = df.groupby('A',as_index=False) + gf = df.groupby('A', as_index=False) gf.nth(0) gf.nth(0, dropna='any') @@ -370,7 +372,7 @@ More consistent behaviour for some groupby methods: .. ipython:: python - df = DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B']) + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B']) g = df.groupby('A') g.count() g.describe() @@ -379,8 +381,8 @@ More consistent behaviour for some groupby methods: .. ipython:: python - df = DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B']) - g = df.groupby('A',as_index=False) + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B']) + g = df.groupby('A', as_index=False) g.count() g.describe() @@ -434,7 +436,7 @@ This ``engine`` can then be used to write or read data to/from this database: .. ipython:: python - df = pd.DataFrame({'A': [1,2,3], 'B': ['a', 'b', 'c']}) + df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}) df.to_sql('db_table', engine, index=False) You can read data from a database by specifying the table name: @@ -496,15 +498,15 @@ See also issues (:issue:`6134`, :issue:`4036`, :issue:`3057`, :issue:`2598`, :is You should do this: - .. code-block:: python + .. code-block:: python - df.loc[(slice('A1','A3'),.....),:] + >>> df.loc[(slice('A1', 'A3'), ...), :] # noqa: E901 rather than this: - .. code-block:: python + .. code-block:: python - df.loc[(slice('A1','A3'),.....)] + >>> df.loc[(slice('A1', 'A3'), ...)] # noqa: E901 .. warning:: @@ -512,62 +514,63 @@ See also issues (:issue:`6134`, :issue:`4036`, :issue:`3057`, :issue:`2598`, :is .. ipython:: python - def mklbl(prefix,n): - return ["%s%s" % (prefix,i) for i in range(n)] - - index = MultiIndex.from_product([mklbl('A',4), - mklbl('B',2), - mklbl('C',4), - mklbl('D',2)]) - columns = MultiIndex.from_tuples([('a','foo'),('a','bar'), - ('b','foo'),('b','bah')], - names=['lvl0', 'lvl1']) - df = DataFrame(np.arange(len(index)*len(columns)).reshape((len(index),len(columns))), - index=index, - columns=columns).sort_index().sort_index(axis=1) + def mklbl(prefix, n): + return ["%s%s" % (prefix, i) for i in range(n)] + + index = pd.MultiIndex.from_product([mklbl('A', 4), + mklbl('B', 2), + mklbl('C', 4), + mklbl('D', 2)]) + columns = pd.MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + df = pd.DataFrame(np.arange(len(index) * len(columns)).reshape((len(index), + len(columns))), + index=index, + columns=columns).sort_index().sort_index(axis=1) df Basic MultiIndex slicing using slices, lists, and labels. .. ipython:: python - df.loc[(slice('A1','A3'),slice(None), ['C1','C3']),:] + df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] You can use a ``pd.IndexSlice`` to shortcut the creation of these slices .. ipython:: python idx = pd.IndexSlice - df.loc[idx[:,:,['C1','C3']],idx[:,'foo']] + df.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']] It is possible to perform quite complicated selections using this method on multiple axes at the same time. .. ipython:: python - df.loc['A1',(slice(None),'foo')] - df.loc[idx[:,:,['C1','C3']],idx[:,'foo']] + df.loc['A1', (slice(None), 'foo')] + df.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']] Using a boolean indexer you can provide selection related to the *values*. .. ipython:: python - mask = df[('a','foo')]>200 - df.loc[idx[mask,:,['C1','C3']],idx[:,'foo']] + mask = df[('a', 'foo')] > 200 + df.loc[idx[mask, :, ['C1', 'C3']], idx[:, 'foo']] You can also specify the ``axis`` argument to ``.loc`` to interpret the passed slicers on a single axis. .. ipython:: python - df.loc(axis=0)[:,:,['C1','C3']] + df.loc(axis=0)[:, :, ['C1', 'C3']] Furthermore you can *set* the values using these methods .. ipython:: python df2 = df.copy() - df2.loc(axis=0)[:,:,['C1','C3']] = -10 + df2.loc(axis=0)[:, :, ['C1', 'C3']] = -10 df2 You can use a right-hand-side of an alignable object as well. @@ -575,7 +578,7 @@ You can use a right-hand-side of an alignable object as well. .. ipython:: python df2 = df.copy() - df2.loc[idx[:,:,['C1','C3']],:] = df2*1000 + df2.loc[idx[:, :, ['C1', 'C3']], :] = df2 * 1000 df2 .. _whatsnew_0140.plotting: @@ -672,25 +675,25 @@ Deprecations .. code-block:: ipython # non-floating point indexes can only be indexed by integers / labels - In [1]: Series(1,np.arange(5))[3.0] + In [1]: pd.Series(1, np.arange(5))[3.0] pandas/core/index.py:469: FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point Out[1]: 1 - In [2]: Series(1,np.arange(5)).iloc[3.0] + In [2]: pd.Series(1, np.arange(5)).iloc[3.0] pandas/core/index.py:469: FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point Out[2]: 1 - In [3]: Series(1,np.arange(5)).iloc[3.0:4] + In [3]: pd.Series(1, np.arange(5)).iloc[3.0:4] pandas/core/index.py:527: FutureWarning: slice indexers when using iloc should be integers and not floating point Out[3]: 3 1 dtype: int64 # these are Float64Indexes, so integer or floating point is acceptable - In [4]: Series(1,np.arange(5.))[3] + In [4]: pd.Series(1, np.arange(5.))[3] Out[4]: 1 - In [5]: Series(1,np.arange(5.))[3.0] + In [5]: pd.Series(1, np.arange(5.))[3.0] Out[6]: 1 - Numpy 1.9 compat w.r.t. deprecation warnings (:issue:`6960`) @@ -743,13 +746,13 @@ Enhancements .. ipython:: python - Series({('a', 'b'): 1, ('a', 'a'): 0, - ('a', 'c'): 2, ('b', 'a'): 3, ('b', 'b'): 4}) - DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2}, - ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4}, - ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6}, - ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8}, - ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}}) + pd.Series({('a', 'b'): 1, ('a', 'a'): 0, + ('a', 'c'): 2, ('b', 'a'): 3, ('b', 'b'): 4}) + pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2}, + ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4}, + ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6}, + ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8}, + ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}}) - Added the ``sym_diff`` method to ``Index`` (:issue:`5543`) - ``DataFrame.to_latex`` now takes a longtable keyword, which if True will return a table in a longtable environment. (:issue:`6617`) @@ -762,20 +765,32 @@ Enhancements .. ipython:: python - household = DataFrame(dict(household_id = [1,2,3], - male = [0,1,0], - wealth = [196087.3,316478.7,294750]), - columns = ['household_id','male','wealth'] - ).set_index('household_id') + household = pd.DataFrame({'household_id': [1, 2, 3], + 'male': [0, 1, 0], + 'wealth': [196087.3, 316478.7, 294750] + }, + columns=['household_id', 'male', 'wealth'] + ).set_index('household_id') household - portfolio = DataFrame(dict(household_id = [1,2,2,3,3,3,4], - asset_id = ["nl0000301109","nl0000289783","gb00b03mlx29", - "gb00b03mlx29","lu0197800237","nl0000289965",np.nan], - name = ["ABN Amro","Robeco","Royal Dutch Shell","Royal Dutch Shell", - "AAB Eastern Europe Equity Fund","Postbank BioTech Fonds",np.nan], - share = [1.0,0.4,0.6,0.15,0.6,0.25,1.0]), - columns = ['household_id','asset_id','name','share'] - ).set_index(['household_id','asset_id']) + portfolio = pd.DataFrame({'household_id': [1, 2, 2, 3, 3, 3, 4], + 'asset_id': ["nl0000301109", + "nl0000289783", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "nl0000289965", + np.nan], + 'name': ["ABN Amro", + "Robeco", + "Royal Dutch Shell", + "Royal Dutch Shell", + "AAB Eastern Europe Equity Fund", + "Postbank BioTech Fonds", + np.nan], + 'share': [1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0] + }, + columns=['household_id', 'asset_id', 'name', 'share'] + ).set_index(['household_id', 'asset_id']) portfolio household.join(portfolio, how='inner') @@ -810,7 +825,7 @@ Enhancements .. ipython:: python import datetime - df = DataFrame({ + df = pd.DataFrame({ 'Branch' : 'A A A A A B'.split(), 'Buyer': 'Carl Mark Carl Carl Joe Joe'.split(), 'Quantity': [1, 3, 5, 1, 8, 1], @@ -833,8 +848,8 @@ Enhancements .. ipython:: python - prng = period_range('2013-01-01 09:00', periods=100, freq='H') - ps = Series(np.random.randn(len(prng)), index=prng) + prng = pd.period_range('2013-01-01 09:00', periods=100, freq='H') + ps = pd.Series(np.random.randn(len(prng)), index=prng) ps ps['2013-01-02'] @@ -1062,4 +1077,4 @@ Bug Fixes Contributors ~~~~~~~~~~~~ -.. contributors:: v0.13.1..v0.14.0 +.. contributors:: v0.13.1..v0.14.0 \ No newline at end of file diff --git a/doc/source/whatsnew/v0.14.1.rst b/doc/source/whatsnew/v0.14.1.rst index 3c2a5c60209db..c47570a50b9c5 100644 --- a/doc/source/whatsnew/v0.14.1.rst +++ b/doc/source/whatsnew/v0.14.1.rst @@ -5,11 +5,6 @@ v0.14.1 (July 11, 2014) {{ header }} -.. ipython:: python - :suppress: - - from pandas import * # noqa F401, F403 - This is a minor release from 0.14.0 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all @@ -58,13 +53,13 @@ API changes .. code-block:: ipython - In [6]: from pandas.tseries import offsets + In [6]: from pandas.tseries import offsets - In [7]: d = pd.Timestamp('2014-01-01 09:00') + In [7]: d = pd.Timestamp('2014-01-01 09:00') - # old behaviour < 0.14.1 - In [8]: d + offsets.MonthEnd() - Out[8]: Timestamp('2014-01-31 00:00:00') + # old behaviour < 0.14.1 + In [8]: d + offsets.MonthEnd() + Out[8]: pd.Timestamp('2014-01-31 00:00:00') Starting from 0.14.1 all offsets preserve time by default. The old behaviour can be obtained with ``normalize=True`` @@ -105,10 +100,10 @@ Enhancements import pandas.tseries.offsets as offsets day = offsets.Day() - day.apply(Timestamp('2014-01-01 09:00')) + day.apply(pd.Timestamp('2014-01-01 09:00')) day = offsets.Day(normalize=True) - day.apply(Timestamp('2014-01-01 09:00')) + day.apply(pd.Timestamp('2014-01-01 09:00')) - ``PeriodIndex`` is represented as the same format as ``DatetimeIndex`` (:issue:`7601`) - ``StringMethods`` now work on empty Series (:issue:`7242`) @@ -127,9 +122,8 @@ Enhancements pytz timezones across pandas. (:issue:`4688`) .. ipython:: python - - rng = date_range('3/6/2012 00:00', periods=10, freq='D', - tz='dateutil/Europe/London') + rng = pd.date_range('3/6/2012 00:00', periods=10, freq='D', + tz='dateutil/Europe/London') rng.tz See :ref:`the docs `. diff --git a/setup.cfg b/setup.cfg index 30b4d13bd0a66..cc606eabada4e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -56,8 +56,6 @@ exclude = doc/source/whatsnew/v0.12.0.rst doc/source/whatsnew/v0.13.0.rst doc/source/whatsnew/v0.13.1.rst - doc/source/whatsnew/v0.14.0.rst - doc/source/whatsnew/v0.14.1.rst doc/source/whatsnew/v0.15.0.rst doc/source/whatsnew/v0.15.1.rst doc/source/whatsnew/v0.15.2.rst